FlowerPower 0.9.12.4__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -35
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +49 -4
  14. flowerpower/cli/pipeline.py +576 -381
  15. flowerpower/cli/utils.py +55 -0
  16. flowerpower/flowerpower.py +12 -7
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +18 -142
  59. flowerpower/web/app.py +0 -0
  60. flowerpower-1.0.0b1.dist-info/METADATA +324 -0
  61. flowerpower-1.0.0b1.dist-info/RECORD +94 -0
  62. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +1 -1
  63. flowerpower/cfg/pipeline/tracker.py +0 -14
  64. flowerpower/cfg/project/open_telemetry.py +0 -8
  65. flowerpower/cfg/project/tracker.py +0 -11
  66. flowerpower/cfg/project/worker.py +0 -19
  67. flowerpower/cli/scheduler.py +0 -309
  68. flowerpower/event_handler.py +0 -23
  69. flowerpower/mqtt.py +0 -525
  70. flowerpower/pipeline.py +0 -2419
  71. flowerpower/scheduler.py +0 -680
  72. flowerpower/tui.py +0 -79
  73. flowerpower/utils/datastore.py +0 -186
  74. flowerpower/utils/eventbroker.py +0 -127
  75. flowerpower/utils/executor.py +0 -58
  76. flowerpower/utils/trigger.py +0 -140
  77. flowerpower-0.9.12.4.dist-info/METADATA +0 -575
  78. flowerpower-0.9.12.4.dist-info/RECORD +0 -70
  79. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  80. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
  81. {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -1,605 +1,800 @@
1
- import importlib.util
2
-
1
+ # Import necessary libraries
3
2
  import typer
4
3
  from loguru import logger
4
+ from typing_extensions import Annotated
5
+ import datetime as dt
6
+ import duration_parser
7
+ from ..pipeline.manager import HookType, PipelineManager
8
+ from ..utils.logging import setup_logging
9
+ from .utils import parse_dict_or_list_param#, parse_param_dict
5
10
 
6
- # Import your existing pipeline functions
7
- # from ..pipeline import (
8
- # add as add_pipeline_,
9
- # add_job as add_pipeline_job_,
10
- # all_pipelines as all_pipelines_,
11
- # delete as delete_pipeline_,
12
- # get_summary as get_pipeline_summary_,
13
- # new as new_pipeline_,
14
- # run as run_pipeline_,
15
- # run_job as run_pipeline_job_,
16
- # schedule as schedule_pipeline_,
17
- # save_dag as save_pipeline_dag_,
18
- # show_dag as show_pipeline_dag_,
19
- # show_summary as show_pipeline_summary_,
20
- # # start_mqtt_listener as start_mqtt_listener_,
21
- # )
22
- from ..pipeline import Pipeline, PipelineManager
23
- from .utils import parse_dict_or_list_param, parse_param_dict
24
-
25
- # Optional imports
26
- if importlib.util.find_spec("apscheduler"):
27
- from ..scheduler import get_schedule_manager
28
- from ..scheduler import start_worker as start_worker_
29
- else:
30
- get_schedule_manager = None
31
- start_worker_ = None
32
-
11
+ setup_logging()
33
12
 
34
13
  app = typer.Typer(help="Pipeline management commands")
35
14
 
36
15
 
37
16
  @app.command()
38
17
  def run(
39
- name: str,
40
- executor: str | None = None,
41
- base_dir: str | None = None,
42
- inputs: str | None = None,
43
- final_vars: str | None = None,
44
- config: str | None = None,
45
- with_tracker: bool = False,
46
- with_opentelemetry: bool = False,
47
- with_progressbar: bool = False,
48
- storage_options: str | None = None,
18
+ name: str = typer.Argument(..., help="Name of the pipeline to run"),
19
+ executor: str | None = typer.Option(None, help="Executor to use for running the pipeline"),
20
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
21
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
22
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
23
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
24
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
25
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
26
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
27
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
28
+ max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
29
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
30
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
49
31
  ):
50
32
  """
51
- Run the specified pipeline.
33
+ Run a pipeline immediately.
34
+
35
+ This command executes a pipeline with the specified configuration and inputs.
36
+ The pipeline will run synchronously, and the command will wait for completion.
52
37
 
53
38
  Args:
54
39
  name: Name of the pipeline to run
55
- executor: Executor to use
56
- base_dir: Base directory for the pipeline
57
- inputs: Input parameters as JSON, dict string, or key=value pairs
58
- final_vars: Final variables as JSON or list
59
- config: Config for the hamilton pipeline executor
60
- with_tracker: Enable tracking with hamilton ui
61
- with_opentelemetry: Enable OpenTelemetry tracing
62
- with_progressbar: Enable progress bar
63
- storage_options: Storage options as JSON, dict string, or key=value pairs
40
+ executor: Type of executor to use
41
+ base_dir: Base directory containing pipelines and configurations
42
+ inputs: Input parameters for the pipeline
43
+ final_vars: Final variables to request from the pipeline
44
+ config: Configuration for the Hamilton executor
45
+ cache: Cache configuration for improved performance
46
+ storage_options: Options for storage backends
47
+ log_level: Set the logging level
48
+ with_adapter: Configuration for adapters like trackers or monitors
49
+ max_retries: Maximum number of retry attempts on failure
50
+ retry_delay: Base delay between retries in seconds
51
+ jitter_factor: Random factor applied to delay for jitter (0-1)
64
52
 
65
53
  Examples:
66
- # JSON inputs
67
- pipeline run my_pipeline --inputs '{"key": "value"}'
54
+ # Run a pipeline with default settings
55
+ $ pipeline run my_pipeline
68
56
 
69
- # Dict string inputs
70
- pipeline run my_pipeline --inputs "{'key': 'value'}"
57
+ # Run with custom inputs
58
+ $ pipeline run my_pipeline --inputs '{"data_path": "data/myfile.csv", "limit": 100}'
71
59
 
72
- # Key-value pair inputs
73
- pipeline run my_pipeline --inputs 'key1=value1,key2=value2'
60
+ # Specify which final variables to calculate
61
+ $ pipeline run my_pipeline --final-vars '["output_table", "summary_metrics"]'
74
62
 
75
- # List final vars
76
- pipeline run my_pipeline --final-vars '["var1", "var2"]'
63
+ # Configure caching
64
+ $ pipeline run my_pipeline --cache '{"type": "memory", "ttl": 3600}'
77
65
 
78
- # Storage options
79
- pipeline run my_pipeline --storage-options 'endpoint=http://localhost,use_ssl=true'
66
+ # Use a different executor
67
+ $ pipeline run my_pipeline --executor distributed
68
+
69
+ # Enable adapters for monitoring/tracking
70
+ $ pipeline run my_pipeline --with-adapter '{"tracker": true, "opentelemetry": true}'
71
+
72
+ # Set a specific logging level
73
+ $ pipeline run my_pipeline --log-level debug
74
+
75
+ # Configure automatic retries on failure
76
+ $ pipeline run my_pipeline --max-retries 3 --retry-delay 2.0 --jitter-factor 0.2
80
77
  """
81
78
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
82
79
  parsed_config = parse_dict_or_list_param(config, "dict")
80
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
83
81
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
84
82
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
83
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
85
84
 
86
- with Pipeline(
87
- name=name,
85
+ with PipelineManager(
88
86
  base_dir=base_dir,
89
- storage_options=parsed_storage_options or {},
90
- ) as pipeline:
91
- pipeline.run(
92
- executor=executor,
87
+ storage_options=parsed_storage_options or {},
88
+ log_level=log_level,
89
+ ) as manager:
90
+ _ = manager.run(
91
+ name=name,
93
92
  inputs=parsed_inputs,
94
93
  final_vars=parsed_final_vars,
95
94
  config=parsed_config,
96
- with_tracker=with_tracker,
97
- with_opentelemetry=with_opentelemetry,
98
- with_progressbar=with_progressbar,
95
+ cache=parsed_cache,
96
+ executor_cfg=executor,
97
+ with_adapter_cfg=parsed_with_adapter,
98
+ max_retries=max_retries,
99
+ retry_delay=retry_delay,
100
+ jitter_factor=jitter_factor,
99
101
  )
102
+ logger.info(f"Pipeline '{name}' finished running.")
100
103
 
101
104
 
102
105
  @app.command()
103
106
  def run_job(
104
- name: str,
105
- executor: str | None = None,
106
- base_dir: str | None = None,
107
- inputs: str | None = None,
108
- final_vars: str | None = None,
109
- config: str | None = None,
110
- with_tracker: bool = False,
111
- with_opentelemetry: bool = False,
112
- with_progressbar: bool = False,
113
- storage_options: str | None = None,
107
+ name: str = typer.Argument(..., help="Name or ID of the pipeline job to run"),
108
+ executor: str | None = typer.Option(None, help="Executor to use for running the job"),
109
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
110
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
111
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
112
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
113
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
114
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
115
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
116
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
117
+ max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
118
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
119
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
114
120
  ):
115
121
  """
116
- Run the specified pipeline job.
122
+ Run a specific pipeline job.
123
+
124
+ This command runs an existing job by its ID. The job should have been previously
125
+ added to the system via the add-job command or through scheduling.
117
126
 
118
127
  Args:
119
- name: Name of the pipeline job to run
120
- executor: Executor to use
121
- base_dir: Base directory for the pipeline
122
- inputs: Input parameters as JSON, dict string, or key=value pairs
123
- final_vars: Final variables as JSON or list
124
- config: Config for the hamilton pipeline executor
125
- with_tracker: Enable tracking with hamilton ui
126
- with_opentelemetry: Enable OpenTelemetry tracing
127
- with_progressbar: Enable progress bar
128
- storage_options: Storage options as JSON, dict string, or key=value pairs
128
+ name: Job ID to run
129
+ executor: Type of executor to use (maps to executor_cfg in manager)
130
+ base_dir: Base directory containing pipelines and configurations
131
+ inputs: Input parameters for the pipeline
132
+ final_vars: Final variables to request from the pipeline
133
+ config: Configuration for the Hamilton executor
134
+ cache: Cache configuration
135
+ storage_options: Options for storage backends
136
+ log_level: Set the logging level
137
+ with_adapter: Configuration for adapters like trackers or monitors
138
+ max_retries: Maximum number of retry attempts on failure
139
+ retry_delay: Base delay between retries in seconds
140
+ jitter_factor: Random factor applied to delay for jitter (0-1)
129
141
 
130
142
  Examples:
131
- # JSON inputs
132
- pipeline run-job 123 --inputs '{"key": "value"}'
143
+ # Run a job with a specific ID
144
+ $ pipeline run-job job-123456
133
145
 
134
- # Dict string inputs
135
- pipeline run-job 123 --inputs "{'key': 'value'}"
146
+ # Run a job with custom inputs
147
+ $ pipeline run-job job-123456 --inputs '{"data_path": "data/myfile.csv"}'
136
148
 
137
- # Key-value pair inputs
138
- pipeline run-job 123 --inputs 'key1=value1,key2=value2'
149
+ # Specify a different executor
150
+ $ pipeline run-job job-123456 --executor local
139
151
 
140
- # List final vars
141
- pipeline run-job 123 --final-vars '["var1", "var2"]'
152
+ # Use caching for better performance
153
+ $ pipeline run-job job-123456 --cache '{"type": "memory"}'
142
154
 
143
- # Storage options
144
- pipeline run-job 123 --storage-options 'endpoint=http://localhost,use_ssl=true'
155
+ # Configure adapters for monitoring
156
+ $ pipeline run-job job-123456 --with-adapter '{"tracker": true, "opentelemetry": false}'
157
+
158
+ # Set up automatic retries for resilience
159
+ $ pipeline run-job job-123456 --max-retries 3 --retry-delay 2.0
145
160
  """
146
161
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
147
162
  parsed_config = parse_dict_or_list_param(config, "dict")
163
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
148
164
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
149
165
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
166
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
150
167
 
151
- with Pipeline(
152
- name=name,
168
+ with PipelineManager(
153
169
  base_dir=base_dir,
154
- storage_options=parsed_storage_options or {},
155
- ) as pipeline:
156
- pipeline.run_job(
157
- executor=executor,
170
+ storage_options=parsed_storage_options or {},
171
+ log_level=log_level,
172
+ ) as manager:
173
+ _ = manager.run_job(
174
+ name=name,
158
175
  inputs=parsed_inputs,
159
176
  final_vars=parsed_final_vars,
160
177
  config=parsed_config,
161
- with_tracker=with_tracker,
162
- with_opentelemetry=with_opentelemetry,
163
- with_progressbar=with_progressbar,
178
+ cache=parsed_cache,
179
+ executor_cfg=executor,
180
+ with_adapter_cfg=parsed_with_adapter,
181
+ max_retries=max_retries,
182
+ retry_delay=retry_delay,
183
+ jitter_factor=jitter_factor,
164
184
  )
185
+ logger.info(f"Job '{name}' finished running.")
165
186
 
166
187
 
167
188
  @app.command()
168
189
  def add_job(
169
- name: str,
170
- executor: str | None = None,
171
- base_dir: str | None = None,
172
- inputs: str | None = None,
173
- final_vars: str | None = None,
174
- config: str | None = None,
175
- with_tracker: bool = False,
176
- with_opentelemetry: bool = False,
177
- with_progressbar: bool = False,
178
- storage_options: str | None = None,
190
+ name: str = typer.Argument(..., help="Name of the pipeline to add as a job"),
191
+ executor: str | None = typer.Option(None, help="Executor to use for running the job"),
192
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
193
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
194
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
195
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
196
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
197
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
198
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
199
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
200
+ run_at: str | None = typer.Option(None, help="Run at a specific time (ISO format)"),
201
+ run_in: str | None = typer.Option(None, help="Run in a specific interval (e.g., '5m', '1h', '12m34s')"),
202
+ max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
203
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
204
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
179
205
  ):
180
206
  """
181
- Add a job to the specified pipeline.
207
+ Add a pipeline job to the queue.
208
+
209
+ This command adds a job to the queue for later execution. The job is based on
210
+ an existing pipeline with customized inputs and configuration.
182
211
 
183
212
  Args:
184
- name: Name of the pipeline to add as job
185
- executor: Executor to use
186
- base_dir: Base directory for the pipeline
187
- inputs: Input parameters as JSON, dict string, or key=value pairs
188
- final_vars: Final variables as JSON or list
189
- config: Config for the hamilton pipeline executor
190
- with_tracker: Enable tracking with hamilton ui
191
- with_opentelemetry: Enable OpenTelemetry tracing
192
- with_progressbar: Enable progress bar
193
- storage_options: Storage options as JSON, dict string, or key=value pairs
213
+ name: Pipeline name to add as a job
214
+ executor: Type of executor to use
215
+ base_dir: Base directory containing pipelines and configurations
216
+ inputs: Input parameters for the pipeline
217
+ final_vars: Final variables to request from the pipeline
218
+ config: Configuration for the Hamilton executor
219
+ cache: Cache configuration
220
+ storage_options: Options for storage backends
221
+ log_level: Set the logging level
222
+ with_adapter: Configuration for adapters like trackers or monitors
223
+ run_at: Run the job at a specific time (ISO format)
224
+ run_in: Run the job in a specific interval (e.g., '5m', '1h')
225
+ max_retries: Maximum number of retry attempts on failure
226
+ retry_delay: Base delay between retries in seconds
227
+ jitter_factor: Random factor applied to delay for jitter (0-1)
194
228
 
195
229
  Examples:
196
- # JSON inputs
197
- pipeline add-job my_pipeline --inputs '{"key": "value"}'
230
+ # Add a basic job
231
+ $ pipeline add-job my_pipeline
198
232
 
199
- # Dict string inputs
200
- pipeline add-job my_pipeline --inputs "{'key': 'value'}"
233
+ # Add a job with custom inputs
234
+ $ pipeline add-job my_pipeline --inputs '{"data_path": "data/myfile.csv"}'
201
235
 
202
- # Key-value pair inputs
203
- pipeline add-job my_pipeline --inputs 'key1=value1,key2=value2'
236
+ # Specify final variables to calculate
237
+ $ pipeline add-job my_pipeline --final-vars '["output_table", "metrics"]'
204
238
 
205
- # List final vars
206
- pipeline add-job my_pipeline --final-vars '["var1", "var2"]'
239
+ # Configure caching
240
+ $ pipeline add-job my_pipeline --cache '{"type": "memory", "ttl": 3600}'
207
241
 
208
- # Storage options
209
- pipeline add-job my_pipeline --storage-options 'endpoint=http://localhost,use_ssl=true'
242
+ # Use a specific log level
243
+ $ pipeline add-job my_pipeline --log-level debug
244
+
245
+ # Configure automatic retries for resilience
246
+ $ pipeline add-job my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
210
247
  """
211
248
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
212
249
  parsed_config = parse_dict_or_list_param(config, "dict")
250
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
213
251
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
214
252
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
253
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
254
+ run_at = dt.datetime.fromisoformat(run_at) if run_at else None
255
+ run_in = duration_parser.parse(run_in) if run_in else None
215
256
 
216
- storage_options = (parsed_storage_options or {},)
217
-
218
- with Pipeline(
219
- name=name,
257
+ with PipelineManager(
220
258
  base_dir=base_dir,
221
259
  storage_options=parsed_storage_options or {},
222
- ) as pipeline:
223
- pipeline.add_job(
224
- executor=executor,
260
+ log_level=log_level,
261
+ ) as manager:
262
+ job_id = manager.add_job(
263
+ name=name,
225
264
  inputs=parsed_inputs,
226
265
  final_vars=parsed_final_vars,
227
266
  config=parsed_config,
228
- with_tracker=with_tracker,
229
- with_opentelemetry=with_opentelemetry,
230
- with_progressbar=with_progressbar,
267
+ cache=parsed_cache,
268
+ executor_cfg=executor,
269
+ with_adapter_cfg=parsed_with_adapter,
270
+ run_at=run_at,
271
+ run_in=run_in,
272
+ max_retries=max_retries,
273
+ retry_delay=retry_delay,
274
+ jitter_factor=jitter_factor,
231
275
  )
276
+ logger.info(f"Job {job_id} added for pipeline '{name}'.")
232
277
 
233
278
 
234
279
  @app.command()
235
280
  def schedule(
236
- name: str,
237
- executor: str | None = None,
238
- base_dir: str | None = None,
239
- trigger_type: str = "cron",
240
- inputs: str | None = None,
241
- final_vars: str | None = None,
242
- config: str | None = None,
243
- with_tracker: bool = False,
244
- with_opentelemetry: bool = False,
245
- with_progressbar: bool = False,
246
- paused: bool = False,
247
- coalesce: str = "latest",
248
- misfire_grace_time: float | None = None,
249
- max_jitter: float | None = None,
250
- max_running_jobs: int | None = None,
251
- conflict_policy: str = "do_nothing",
252
- crontab: str | None = None,
253
- cron_params: str | None = None,
254
- interval_params: str | None = None,
255
- calendarinterval_params: str | None = None,
256
- date_params: str | None = None,
257
- storage_options: str | None = None,
258
- overwrite: bool = False,
281
+ name: str = typer.Argument(..., help="Name of the pipeline to schedule"),
282
+ executor: str | None = typer.Option(None, help="Executor to use for running the job"),
283
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
284
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
285
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
286
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
287
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
288
+ cron: str | None = typer.Option(None, help="Cron expression for scheduling"),
289
+ interval: str | None = typer.Option(None, help="Interval for scheduling (e.g., '5m', '1h')"),
290
+ date: str | None = typer.Option(None, help="Specific date and time for scheduling (ISO format)"),
291
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
292
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
293
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
294
+ overwrite: bool = typer.Option(False, help="Overwrite existing schedule if it exists"),
295
+ schedule_id: str | None = typer.Option(None, help="Custom ID for the schedule (autogenerated if not provided)"),
296
+ max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
297
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
298
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
259
299
  ):
260
300
  """
261
- Schedule a pipeline with various configuration options.
301
+ Schedule a pipeline to run at specified times.
302
+
303
+ This command schedules a pipeline to run automatically based on various
304
+ scheduling triggers like cron expressions, time intervals, or specific dates.
262
305
 
263
306
  Args:
264
- name: Name of the pipeline to schedule
265
- executor: Executor to use
266
- base_dir: Base directory for the pipeline
267
- trigger_type: Type of schedule
268
- inputs: Input parameters as JSON, dict string, or key=value pairs
269
- final_vars: Final variables as JSON or list
270
- config: Config for the hamilton pipeline executor
271
- with_tracker: Enable tracking with hamilton ui
272
- with_opentelemetry: Enable OpenTelemetry tracing
273
- with_progressbar: Enable progress bar
274
- paused: Start the job in paused state
275
- coalesce: Coalesce policy
276
- misfire_grace_time: Misfire grace time
277
- max_jitter: Maximum jitter
278
- max_running_jobs: Maximum running jobs
279
- conflict_policy: Conflict policy
280
- crontab: Crontab expression
281
- cron_params: Cron parameters as JSON or key=value pairs
282
- interval_params: Interval parameters as JSON or key=value pairs
283
- calendarinterval_params: Calendar interval parameters as JSON or key=value pairs
284
- date_params: Date parameters as JSON or key=value pairs
285
- storage_options: Storage options as JSON, dict string, or key=value pairs
286
- overwrite: Overwrite existing schedule
307
+ name: Pipeline name to schedule
308
+ executor: Type of executor to use
309
+ base_dir: Base directory containing pipelines and configurations
310
+ inputs: Input parameters for the pipeline
311
+ final_vars: Final variables to request from the pipeline
312
+ config: Configuration for the Hamilton executor
313
+ cache: Cache configuration
314
+ cron: Cron expression for scheduling (e.g., "0 * * * *")
315
+ interval: Interval for scheduling (e.g., "5m", "1h")
316
+ date: Specific date and time for scheduling (ISO format)
317
+ storage_options: Options for storage backends
318
+ log_level: Set the logging level
319
+ with_adapter: Configuration for adapters like trackers or monitors
320
+ overwrite: Overwrite existing schedule with same ID
321
+ schedule_id: Custom identifier for the schedule
322
+ max_retries: Maximum number of retry attempts on failure
323
+ retry_delay: Base delay between retries in seconds
324
+ jitter_factor: Random factor applied to delay for jitter (0-1)
287
325
 
288
326
  Examples:
289
- # JSON inputs
290
- pipeline schedule my_pipeline --inputs '{"key": "value"}'
291
-
292
- # Dict string inputs
293
- pipeline schedule my_pipeline --inputs "{'key': 'value'}"
294
-
295
- # Key-value pair inputs
296
- pipeline schedule my_pipeline --inputs 'key1=value1,key2=value2'
297
-
298
- # List final vars
299
- pipeline schedule my_pipeline --final-vars '["var1", "var2"]'
300
-
301
- # Storage options
302
- pipeline schedule my_pipeline --storage-options 'endpoint=http://localhost,use_ssl=true'
327
+ # Schedule with cron expression (every hour)
328
+ $ pipeline schedule my_pipeline --trigger-type cron --crontab "0 * * * *"
303
329
 
304
- # Cron schedule
305
- pipeline schedule my_pipeline --trigger-type cron --crontab '0 0 * * *'
330
+ # Schedule to run every 15 minutes
331
+ $ pipeline schedule my_pipeline --trigger-type interval --interval_params minutes=15
306
332
 
307
- # Interval schedule
308
- pipeline schedule my_pipeline --trigger-type interval --interval_params minutes=1
333
+ # Schedule to run at a specific date and time
334
+ $ pipeline schedule my_pipeline --trigger-type date --date_params run_date="2025-12-31 23:59:59"
309
335
 
310
- # Calendar interval schedule
311
- pipeline schedule my_pipeline --trigger-type calendarinterval --calendarinterval_params month=5
336
+ # Schedule with custom inputs and cache settings
337
+ $ pipeline schedule my_pipeline --inputs '{"source": "database"}' --cache '{"type": "redis"}'
312
338
 
313
- # Date schedule
314
- pipeline schedule my_pipeline --trigger-type date --date_params run_date='2021-01-01 12:00:01'
339
+ # Create a schedule in paused state
340
+ $ pipeline schedule my_pipeline --crontab "0 9 * * 1-5" --paused
315
341
 
342
+ # Set a custom schedule ID
343
+ $ pipeline schedule my_pipeline --crontab "0 12 * * *" --schedule_id "daily-noon-run"
344
+
345
+ # Configure automatic retries for resilience
346
+ $ pipeline schedule my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
316
347
  """
317
- if get_schedule_manager is None:
318
- raise ValueError("APScheduler not installed. Please install it first.")
319
-
320
- # Parse inputs
321
348
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
322
349
  parsed_config = parse_dict_or_list_param(config, "dict")
350
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
323
351
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
324
352
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
353
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
354
+ interval = duration_parser.parse(interval) if interval else None
355
+ cron = cron if cron else None
356
+ date = dt.datetime.fromisoformat(date) if date else None
325
357
 
326
- # Parse various parameter dictionaries
327
- cron_params_dict = parse_param_dict(cron_params)
328
- interval_params_dict = parse_param_dict(interval_params)
329
- calendarinterval_params_dict = parse_param_dict(calendarinterval_params)
330
- date_params_dict = parse_param_dict(date_params)
331
-
332
- # Combine all parameter dictionaries
333
- kwargs = {
334
- **cron_params_dict,
335
- **interval_params_dict,
336
- **calendarinterval_params_dict,
337
- **date_params_dict,
338
- }
339
-
340
- # Add crontab if provided
341
- if crontab is not None:
342
- kwargs["crontab"] = crontab
343
-
344
- # Convert numeric parameters
345
- for key in ["weeks", "days", "hours", "minutes", "seconds"]:
346
- if key in kwargs:
347
- try:
348
- kwargs[key] = float(kwargs[key])
349
- except ValueError:
350
- logger.warning(f"Could not convert {key} to float: {kwargs[key]}")
351
-
352
- with Pipeline(
353
- name=name,
358
+ with PipelineManager(
354
359
  base_dir=base_dir,
355
360
  storage_options=parsed_storage_options or {},
356
- ) as pipeline:
357
- id_ = pipeline.schedule(
358
- executor=executor,
359
- trigger_type=trigger_type,
361
+ log_level=log_level,
362
+ ) as manager:
363
+ # Combine common schedule kwargs
364
+
365
+ id_ = manager.schedule(
366
+ name=name,
360
367
  inputs=parsed_inputs,
361
368
  final_vars=parsed_final_vars,
362
369
  config=parsed_config,
363
- with_tracker=with_tracker,
364
- with_opentelemetry=with_opentelemetry,
365
- with_progressbar=with_progressbar,
366
- paused=paused,
367
- coalesce=coalesce,
368
- misfire_grace_time=misfire_grace_time,
369
- max_jitter=max_jitter,
370
- max_running_jobs=max_running_jobs,
371
- conflict_policy=conflict_policy,
370
+ cache=parsed_cache,
371
+ executor_cfg=executor,
372
+ with_adapter_cfg=parsed_with_adapter,
373
+ cron=cron,
374
+ interval=interval,
375
+ date=date,
372
376
  overwrite=overwrite,
373
- **kwargs,
377
+ schedule_id=schedule_id,
378
+ max_retries=max_retries,
379
+ retry_delay=retry_delay,
380
+ jitter_factor=jitter_factor,
374
381
  )
375
382
 
376
- logger.info(f"Job {id_} scheduled.")
383
+ logger.info(f"Pipeline '{name}' scheduled with ID {id_}.")
377
384
 
378
385
 
379
386
  @app.command()
380
387
  def schedule_all(
381
- executor: str | None = None,
382
- base_dir: str | None = None,
383
- inputs: str | None = None,
384
- final_vars: str | None = None,
385
- config: str | None = None,
386
- with_tracker: bool = False,
387
- with_opentelemetry: bool = False,
388
- with_progressbar: bool = False,
389
- paused: bool = False,
390
- coalesce: str = "latest",
391
- misfire_grace_time: float | None = None,
392
- max_jitter: float | None = None,
393
- max_running_jobs: int | None = None,
394
- conflict_policy: str = "do_nothing",
395
- storage_options: str | None = None,
396
- overwrite: bool = False,
388
+ executor: str | None = typer.Option(None, help="Override executor specified in pipeline configs"),
389
+ base_dir: str | None = typer.Option(None, help="Base directory containing pipelines and configurations"),
390
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
391
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
392
+ overwrite: bool = typer.Option(False, help="Overwrite existing schedules if they exist"),
397
393
  ):
398
394
  """
399
- Schedule all pipelines using the pipeline specific configurations (`conf/pipelines/<name>.yml`).
395
+ Schedule all pipelines based on their individual configurations.
396
+
397
+ This command reads the configuration files for all pipelines in the project
398
+ and schedules them based on their individual scheduling settings. This is useful
399
+ for setting up all scheduled pipelines at once after deployment or system restart.
400
400
 
401
401
  Args:
402
- executor: Executor to use
403
- base_dir: Base directory for the pipeline
404
- inputs: Input parameters as JSON, dict string, or key=value pairs
405
- final_vars: Final variables as JSON or list
406
- config: Config for the hamilton pipeline executor
407
- with_tracker: Enable tracking with hamilton ui
408
- with_opentelemetry: Enable OpenTelemetry tracing
409
- with_progressbar: Enable progress bar
410
- paused: Start the job in paused state
411
- coalesce: Coalesce policy
412
- misfire_grace_time: Misfire grace time
413
- max_jitter: Maximum jitter
414
- max_running_jobs: Maximum running jobs
415
- conflict_policy: Conflict policy
416
- storage_options: Storage options as JSON, dict string, or key=value pairs
417
- overwrite: Overwrite existing schedule
402
+ executor: Override executor specified in pipeline configs
403
+ base_dir: Base directory containing pipelines and configurations
404
+ storage_options: Options for storage backends
405
+ log_level: Set the logging level
406
+ overwrite: Whether to overwrite existing schedules
418
407
 
419
408
  Examples:
420
- pipeline schedule-all
421
- """
422
- if get_schedule_manager is None:
423
- raise ValueError("APScheduler not installed. Please install it first.")
409
+ # Schedule all pipelines using their configurations
410
+ $ pipeline schedule-all
424
411
 
412
+ # Force overwrite of existing schedules
413
+ $ pipeline schedule-all --overwrite
414
+
415
+ # Override executor for all pipelines
416
+ $ pipeline schedule-all --executor distributed
417
+
418
+ # Set custom base directory
419
+ $ pipeline schedule-all --base-dir /path/to/project
420
+ """
425
421
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
426
422
 
427
423
  with PipelineManager(
428
424
  base_dir=base_dir,
429
425
  storage_options=parsed_storage_options or {},
426
+ log_level=log_level,
430
427
  ) as manager:
431
428
  manager.schedule_all(
432
- executor=executor,
433
- inputs=inputs,
434
- final_vars=final_vars,
435
- config=config,
436
- with_tracker=with_tracker,
437
- with_opentelemetry=with_opentelemetry,
438
- with_progressbar=with_progressbar,
439
- paused=paused,
440
- coalesce=coalesce,
441
- misfire_grace_time=misfire_grace_time,
442
- max_jitter=max_jitter,
443
- max_running_jobs=max_running_jobs,
444
- conflict_policy=conflict_policy,
445
429
  overwrite=overwrite,
430
+ executor_cfg=executor
446
431
  )
432
+ logger.info("Scheduled all pipelines based on their configurations.")
447
433
 
448
434
 
449
435
  @app.command()
450
436
  def new(
451
- name: str,
452
- base_dir: str | None = None,
453
- storage_options: str | None = None,
454
- overwrite: bool = False,
437
+ name: str = typer.Argument(..., help="Name of the pipeline to create"),
438
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
439
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
440
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
441
+ overwrite: bool = typer.Option(False, help="Overwrite existing pipeline if it exists"),
455
442
  ):
456
443
  """
457
- Create a new pipeline.
444
+ Create a new pipeline structure.
445
+
446
+ This command creates a new pipeline with the necessary directory structure,
447
+ configuration file, and skeleton module file. It prepares all the required
448
+ components for you to start implementing your pipeline logic.
458
449
 
459
450
  Args:
460
- name: Name of the new pipeline
461
- base_dir: Base directory for the new pipeline
462
- overwrite: Overwrite existing pipeline
463
- storage_options: Storage options as JSON, dict string, or key=value pairs
451
+ name: Name for the new pipeline
452
+ base_dir: Base directory to create the pipeline in
453
+ storage_options: Options for storage backends
454
+ log_level: Set the logging level
455
+ overwrite: Whether to overwrite existing pipeline with the same name
464
456
 
465
457
  Examples:
466
- pipeline new my_pipeline
458
+ # Create a new pipeline with default settings
459
+ $ pipeline new my_new_pipeline
460
+
461
+ # Create a pipeline, overwriting if it exists
462
+ $ pipeline new my_new_pipeline --overwrite
463
+
464
+ # Create a pipeline in a specific directory
465
+ $ pipeline new my_new_pipeline --base-dir /path/to/project
467
466
  """
468
467
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
469
-
470
468
  with PipelineManager(
471
469
  base_dir=base_dir,
472
470
  storage_options=parsed_storage_options or {},
471
+ log_level=log_level,
473
472
  ) as manager:
474
473
  manager.new(name=name, overwrite=overwrite)
474
+ logger.info(f"New pipeline structure created for '{name}'.")
475
475
 
476
476
 
477
477
  @app.command()
478
478
  def delete(
479
- name: str,
480
- base_dir: str | None = None,
481
- cfg: bool = False,
482
- module: bool = False,
483
- storage_options: str | None = None,
479
+ name: str = typer.Argument(..., help="Name of the pipeline to delete"),
480
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
481
+ cfg: bool = typer.Option(False, "--cfg", "-c", help="Delete only the configuration file"),
482
+ module: bool = typer.Option(False, "--module", "-m", help="Delete only the pipeline module"),
483
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
484
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
484
485
  ):
485
486
  """
486
- Delete the specified pipeline.
487
+ Delete a pipeline's configuration and/or module files.
488
+
489
+ This command removes a pipeline's configuration file and/or module file from the project.
490
+ If neither --cfg nor --module is specified, both will be deleted.
487
491
 
488
492
  Args:
489
493
  name: Name of the pipeline to delete
490
- base_dir: Base directory for the pipeline
491
- cfg: Remove associated configuration
492
- module: Remove associated module
493
- storage_options: Storage options as JSON, dict string, or key=value pairs
494
+ base_dir: Base directory containing the pipeline
495
+ cfg: Delete only the configuration file
496
+ module: Delete only the pipeline module
497
+ storage_options: Options for storage backends
498
+ log_level: Set the logging level
494
499
 
495
500
  Examples:
496
- pipeline delete my_pipeline
501
+ # Delete a pipeline (both config and module)
502
+ $ pipeline delete my_pipeline
503
+
504
+ # Delete only the configuration file
505
+ $ pipeline delete my_pipeline --cfg
506
+
507
+ # Delete only the module file
508
+ $ pipeline delete my_pipeline --module
497
509
  """
498
510
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
499
511
 
500
- with Pipeline(
501
- name=name,
512
+ # If neither flag is set, default to deleting both
513
+ delete_cfg = cfg or not (cfg or module)
514
+ delete_module = module or not (cfg or module)
515
+
516
+ with PipelineManager(
502
517
  base_dir=base_dir,
503
518
  storage_options=parsed_storage_options or {},
504
- ) as pipeline:
505
- pipeline.delete(cfg=cfg, module=module)
519
+ log_level=log_level,
520
+ ) as manager:
521
+ manager.delete(name=name, cfg=delete_cfg, module=delete_module)
522
+
523
+ deleted_parts = []
524
+ if delete_cfg:
525
+ deleted_parts.append("config")
526
+ if delete_module:
527
+ deleted_parts.append("module")
528
+ logger.info(f"Pipeline '{name}' deleted ({', '.join(deleted_parts)})." if deleted_parts else f"Pipeline '{name}' - nothing specified to delete.")
506
529
 
507
530
 
508
531
  @app.command()
509
532
  def show_dag(
510
- name: str, base_dir: str | None = None, storage_options: str | None = None
533
+ name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
534
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
535
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
536
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
537
+ format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf). If 'raw', returns object."),
511
538
  ):
512
539
  """
513
- Show the DAG of the specified pipeline.
540
+ Show the DAG (Directed Acyclic Graph) of a pipeline.
541
+
542
+ This command generates and displays a visual representation of the pipeline's
543
+ execution graph, showing how nodes are connected and dependencies between them.
514
544
 
515
545
  Args:
516
- name: Name of the pipeline to show
517
- base_dir: Base directory for the pipeline
518
- storage_options: Storage options as JSON, dict string, or key=value pairs
546
+ name: Name of the pipeline to visualize
547
+ base_dir: Base directory containing the pipeline
548
+ storage_options: Options for storage backends
549
+ log_level: Set the logging level
550
+ format: Output format for the visualization
519
551
 
520
552
  Examples:
521
- pipeline show-dag my_pipeline
553
+ # Show pipeline DAG in PNG format (default)
554
+ $ pipeline show-dag my_pipeline
555
+
556
+ # Generate SVG format visualization
557
+ $ pipeline show-dag my_pipeline --format svg
558
+
559
+ # Get raw graphviz object
560
+ $ pipeline show-dag my_pipeline --format raw
522
561
  """
523
562
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
563
+ is_raw = format.lower() == "raw"
524
564
 
525
- with Pipeline(
526
- name=name,
565
+ with PipelineManager(
527
566
  base_dir=base_dir,
528
567
  storage_options=parsed_storage_options or {},
529
- ) as pipeline:
530
- pipeline.show_dag()
568
+ log_level=log_level,
569
+ ) as manager:
570
+ # Manager's show_dag likely handles rendering or returning raw object
571
+ try:
572
+ graph_or_none = manager.show_dag(name=name, format=format if not is_raw else "png", raw=is_raw)
573
+ if is_raw and graph_or_none:
574
+ print("Graphviz object returned (not rendered):")
575
+ # print(graph_or_none) # Or handle as needed
576
+ elif not is_raw:
577
+ logger.info(f"DAG for pipeline '{name}' displayed/saved (format: {format}).")
578
+ except ImportError:
579
+ logger.error("Graphviz is not installed. Cannot show/save DAG. Install with: pip install graphviz")
580
+ except Exception as e:
581
+ logger.error(f"Failed to generate DAG for pipeline '{name}': {e}")
531
582
 
532
583
 
533
584
  @app.command()
534
585
  def save_dag(
535
- name: str, base_dir: str | None = None, storage_options: str | None = None
586
+ name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
587
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
588
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
589
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
590
+ format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf)"),
591
+ output_path: str | None = typer.Option(None, help="Custom path to save the file (default: <name>.<format>)"),
536
592
  ):
537
593
  """
538
- Save the DAG of the specified pipeline.
594
+ Save the DAG (Directed Acyclic Graph) of a pipeline to a file.
595
+
596
+ This command generates a visual representation of the pipeline's execution graph
597
+ and saves it to a file in the specified format.
539
598
 
540
599
  Args:
541
- name: Name of the pipeline to save
542
- base_dir: Base directory for the pipeline
543
- storage_options: Storage options as JSON, dict string, or key=value pairs
600
+ name: Name of the pipeline to visualize
601
+ base_dir: Base directory containing the pipeline
602
+ storage_options: Options for storage backends
603
+ log_level: Set the logging level
604
+ format: Output format for the visualization
605
+ output_path: Custom file path to save the output (defaults to pipeline name)
544
606
 
545
607
  Examples:
546
- pipeline save-dag my_pipeline
608
+ # Save pipeline DAG in PNG format (default)
609
+ $ pipeline save-dag my_pipeline
610
+
611
+ # Save in SVG format
612
+ $ pipeline save-dag my_pipeline --format svg
613
+
614
+ # Save to a custom location
615
+ $ pipeline save-dag my_pipeline --output-path ./visualizations/my_graph.png
547
616
  """
548
617
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
549
-
550
- with Pipeline(
551
- name=name,
618
+ with PipelineManager(
552
619
  base_dir=base_dir,
553
620
  storage_options=parsed_storage_options or {},
554
- ) as pipeline:
555
- pipeline.save_dag()
621
+ log_level=log_level,
622
+ ) as manager:
623
+ try:
624
+ file_path = manager.save_dag(name=name, format=format, output_path=output_path)
625
+ logger.info(f"DAG for pipeline '{name}' saved to {file_path}.")
626
+ except ImportError:
627
+ logger.error("Graphviz is not installed. Cannot save DAG. Install with: pip install graphviz")
628
+ except Exception as e:
629
+ logger.error(f"Failed to save DAG for pipeline '{name}': {e}")
556
630
 
557
631
 
558
632
  @app.command()
559
- def show_pipelines(base_dir: str | None = None, storage_options: str | None = None):
633
+ def show_pipelines(
634
+ base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
635
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
636
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
637
+ format: str = typer.Option("table", help="Output format (table, json, yaml)"),
638
+ ):
560
639
  """
561
- List all available pipelines.
640
+ List all available pipelines in the project.
641
+
642
+ This command displays a list of all pipelines defined in the project,
643
+ providing an overview of what pipelines are available to run or schedule.
562
644
 
563
645
  Args:
564
- base_dir: Base directory for the pipelines
565
- storage_options: Storage options as JSON, dict string, or key=value pairs
646
+ base_dir: Base directory containing pipelines
647
+ storage_options: Options for storage backends
648
+ log_level: Set the logging level
649
+ format: Output format for the list (table, json, yaml)
566
650
 
567
651
  Examples:
568
- pipeline list-pipelines
652
+ # List all pipelines in table format (default)
653
+ $ pipeline show-pipelines
654
+
655
+ # Output in JSON format
656
+ $ pipeline show-pipelines --format json
657
+
658
+ # List pipelines from a specific directory
659
+ $ pipeline show-pipelines --base-dir /path/to/project
569
660
  """
570
661
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
571
662
  with PipelineManager(
572
663
  base_dir=base_dir,
573
664
  storage_options=parsed_storage_options or {},
665
+ log_level=log_level,
574
666
  ) as manager:
575
- manager.show_pipelines()
667
+ manager.show_pipelines(format=format)
576
668
 
577
669
 
578
670
  @app.command()
579
671
  def show_summary(
580
- name: str | None = None,
581
- cfg: bool = True,
582
- module: bool = True,
583
- base_dir: str | None = None,
584
- storage_options: str | None = None,
672
+ name: str | None = typer.Option(None, help="Name of specific pipeline to show (all pipelines if not specified)"),
673
+ cfg: bool = typer.Option(True, help="Include configuration details"),
674
+ code: bool = typer.Option(True, help="Include code/module details"),
675
+ project: bool = typer.Option(True, help="Include project context"),
676
+ base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
677
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
678
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
679
+ to_html: bool = typer.Option(False, help="Output summary as HTML"),
680
+ to_svg: bool = typer.Option(False, help="Output summary as SVG (if applicable)"),
681
+ output_file: str | None = typer.Option(None, help="Save output to specified file instead of printing"),
585
682
  ):
586
683
  """
587
- Show the summary of the specified pipeline.
684
+ Show summary information for one or all pipelines.
685
+
686
+ This command displays detailed information about pipelines including their
687
+ configuration, code structure, and project context. You can view information
688
+ for a specific pipeline or get an overview of all pipelines.
588
689
 
589
690
  Args:
590
- name: Name of the pipeline to show
591
- base_dir: Base directory for the pipeline
592
- cfg: Show configuration
593
- module: Show module information
594
- storage_options: Storage options as JSON, dict string, or key=value pairs
691
+ name: Name of specific pipeline to summarize (all if not specified)
692
+ cfg: Include configuration details
693
+ code: Include code/module details
694
+ project: Include project context information
695
+ base_dir: Base directory containing pipelines
696
+ storage_options: Options for storage backends
697
+ log_level: Set the logging level
698
+ to_html: Generate HTML output instead of text
699
+ to_svg: Generate SVG output (where applicable)
700
+ output_file: File path to save the output instead of printing to console
595
701
 
596
702
  Examples:
597
- pipeline show-summary my_pipeline
703
+ # Show summary for all pipelines
704
+ $ pipeline show-summary
705
+
706
+ # Show summary for a specific pipeline
707
+ $ pipeline show-summary --name my_pipeline
708
+
709
+ # Show only configuration information
710
+ $ pipeline show-summary --name my_pipeline --cfg --no-code --no-project
711
+
712
+ # Generate HTML report
713
+ $ pipeline show-summary --to-html --output-file pipeline_report.html
598
714
  """
599
715
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
716
+ with PipelineManager(
717
+ base_dir=base_dir,
718
+ storage_options=parsed_storage_options or {},
719
+ log_level=log_level,
720
+ ) as manager:
721
+ # Assumes manager.show_summary handles printing/returning formatted output
722
+ summary_output = manager.show_summary(
723
+ name=name,
724
+ cfg=cfg,
725
+ code=code,
726
+ project=project,
727
+ to_html=to_html,
728
+ to_svg=to_svg,
729
+ )
730
+
731
+ if summary_output:
732
+ if output_file:
733
+ with open(output_file, 'w') as f:
734
+ f.write(summary_output)
735
+ logger.info(f"Summary saved to {output_file}")
736
+ else:
737
+ print(summary_output)
738
+ # Otherwise, assume manager printed the summary
739
+
740
+
741
+ @app.command()
742
+ def add_hook(
743
+ name: str = typer.Argument(..., help="Name of the pipeline to add the hook to"),
744
+ function_name: str = typer.Option(..., "--function", "-f", help="Name of the hook function defined in the pipeline module"),
745
+ type: Annotated[HookType, typer.Option(help="Type of hook to add")] = HookType.MQTT_BUILD_CONFIG,
746
+ to: str | None = typer.Option(None, help="Target node name or tag (required for node hooks)"),
747
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
748
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
749
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
750
+ ):
751
+ """
752
+ Add a hook to a pipeline configuration.
753
+
754
+ This command adds a hook function to a pipeline's configuration. Hooks are functions
755
+ that are called at specific points during pipeline execution to perform additional
756
+ tasks like logging, monitoring, or data validation.
757
+
758
+ Args:
759
+ name: Name of the pipeline to add the hook to
760
+ function_name: Name of the hook function (must be defined in the pipeline module)
761
+ type: Type of hook (determines when the hook is called during execution)
762
+ to: Target node or tag (required for node-specific hooks)
763
+ base_dir: Base directory containing the pipeline
764
+ storage_options: Options for storage backends
765
+ log_level: Set the logging level
766
+
767
+ Examples:
768
+ # Add a post-run hook
769
+ $ pipeline add-hook my_pipeline --function log_results
770
+
771
+ # Add a pre-run hook
772
+ $ pipeline add-hook my_pipeline --function validate_inputs --type PRE_RUN
773
+
774
+ # Add a node-specific hook (executed before a specific node runs)
775
+ $ pipeline add-hook my_pipeline --function validate_data --type NODE_PRE_EXECUTE --to data_processor
776
+
777
+ # Add a hook for all nodes with a specific tag
778
+ $ pipeline add-hook my_pipeline --function log_metrics --type NODE_POST_EXECUTE --to @metrics
779
+ """
780
+ parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
781
+
782
+ # Validate 'to' argument for node hooks
783
+ if type in (HookType.NODE_PRE_EXECUTE, HookType.NODE_POST_EXECUTE) and not to:
784
+ raise typer.BadParameter("The '--to' option (target node/tag) is required for node hooks.")
600
785
 
601
786
  with PipelineManager(
602
787
  base_dir=base_dir,
603
788
  storage_options=parsed_storage_options or {},
789
+ log_level=log_level,
604
790
  ) as manager:
605
- manager.show_summary(name=name, cfg=cfg, module=module)
791
+ try:
792
+ manager.add_hook(
793
+ name=name,
794
+ type=type,
795
+ to=to,
796
+ function_name=function_name,
797
+ )
798
+ logger.info(f"Hook '{function_name}' added to pipeline '{name}' (type: {type.value}).")
799
+ except Exception as e:
800
+ logger.error(f"Failed to add hook to pipeline '{name}': {e}")