FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. flowerpower/__init__.py +17 -2
  2. flowerpower/cfg/__init__.py +201 -149
  3. flowerpower/cfg/base.py +122 -24
  4. flowerpower/cfg/pipeline/__init__.py +254 -0
  5. flowerpower/cfg/pipeline/adapter.py +66 -0
  6. flowerpower/cfg/pipeline/run.py +40 -11
  7. flowerpower/cfg/pipeline/schedule.py +69 -79
  8. flowerpower/cfg/project/__init__.py +149 -0
  9. flowerpower/cfg/project/adapter.py +57 -0
  10. flowerpower/cfg/project/job_queue.py +165 -0
  11. flowerpower/cli/__init__.py +92 -37
  12. flowerpower/cli/job_queue.py +878 -0
  13. flowerpower/cli/mqtt.py +32 -1
  14. flowerpower/cli/pipeline.py +559 -406
  15. flowerpower/cli/utils.py +29 -18
  16. flowerpower/flowerpower.py +12 -8
  17. flowerpower/fs/__init__.py +20 -2
  18. flowerpower/fs/base.py +350 -26
  19. flowerpower/fs/ext.py +797 -216
  20. flowerpower/fs/storage_options.py +1097 -55
  21. flowerpower/io/base.py +13 -18
  22. flowerpower/io/loader/__init__.py +28 -0
  23. flowerpower/io/loader/deltatable.py +7 -10
  24. flowerpower/io/metadata.py +1 -0
  25. flowerpower/io/saver/__init__.py +28 -0
  26. flowerpower/io/saver/deltatable.py +4 -3
  27. flowerpower/job_queue/__init__.py +252 -0
  28. flowerpower/job_queue/apscheduler/__init__.py +11 -0
  29. flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  30. flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  31. flowerpower/job_queue/apscheduler/manager.py +1063 -0
  32. flowerpower/job_queue/apscheduler/setup.py +524 -0
  33. flowerpower/job_queue/apscheduler/trigger.py +169 -0
  34. flowerpower/job_queue/apscheduler/utils.py +309 -0
  35. flowerpower/job_queue/base.py +382 -0
  36. flowerpower/job_queue/rq/__init__.py +10 -0
  37. flowerpower/job_queue/rq/_trigger.py +37 -0
  38. flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  39. flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  40. flowerpower/job_queue/rq/manager.py +1449 -0
  41. flowerpower/job_queue/rq/setup.py +150 -0
  42. flowerpower/job_queue/rq/utils.py +69 -0
  43. flowerpower/pipeline/__init__.py +5 -0
  44. flowerpower/pipeline/base.py +118 -0
  45. flowerpower/pipeline/io.py +407 -0
  46. flowerpower/pipeline/job_queue.py +505 -0
  47. flowerpower/pipeline/manager.py +1586 -0
  48. flowerpower/pipeline/registry.py +560 -0
  49. flowerpower/pipeline/runner.py +560 -0
  50. flowerpower/pipeline/visualizer.py +142 -0
  51. flowerpower/plugins/mqtt/__init__.py +12 -0
  52. flowerpower/plugins/mqtt/cfg.py +16 -0
  53. flowerpower/plugins/mqtt/manager.py +789 -0
  54. flowerpower/settings.py +110 -0
  55. flowerpower/utils/logging.py +21 -0
  56. flowerpower/utils/misc.py +57 -9
  57. flowerpower/utils/sql.py +122 -24
  58. flowerpower/utils/templates.py +2 -142
  59. flowerpower-1.0.0b1.dist-info/METADATA +324 -0
  60. flowerpower-1.0.0b1.dist-info/RECORD +94 -0
  61. flowerpower/_web/__init__.py +0 -61
  62. flowerpower/_web/routes/config.py +0 -103
  63. flowerpower/_web/routes/pipelines.py +0 -173
  64. flowerpower/_web/routes/scheduler.py +0 -136
  65. flowerpower/cfg/pipeline/tracker.py +0 -14
  66. flowerpower/cfg/project/open_telemetry.py +0 -8
  67. flowerpower/cfg/project/tracker.py +0 -11
  68. flowerpower/cfg/project/worker.py +0 -19
  69. flowerpower/cli/scheduler.py +0 -309
  70. flowerpower/cli/web.py +0 -44
  71. flowerpower/event_handler.py +0 -23
  72. flowerpower/mqtt.py +0 -609
  73. flowerpower/pipeline.py +0 -2499
  74. flowerpower/scheduler.py +0 -680
  75. flowerpower/tui.py +0 -79
  76. flowerpower/utils/datastore.py +0 -186
  77. flowerpower/utils/eventbroker.py +0 -127
  78. flowerpower/utils/executor.py +0 -58
  79. flowerpower/utils/trigger.py +0 -140
  80. flowerpower-0.9.13.1.dist-info/METADATA +0 -586
  81. flowerpower-0.9.13.1.dist-info/RECORD +0 -76
  82. /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
  83. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +0 -0
  84. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
  85. {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
@@ -1,647 +1,800 @@
1
- import importlib.util
2
-
1
+ # Import necessary libraries
3
2
  import typer
4
- from typing_extensions import Annotated
5
3
  from loguru import logger
4
+ from typing_extensions import Annotated
5
+ import datetime as dt
6
+ import duration_parser
7
+ from ..pipeline.manager import HookType, PipelineManager
8
+ from ..utils.logging import setup_logging
9
+ from .utils import parse_dict_or_list_param#, parse_param_dict
6
10
 
7
-
8
- # Import your existing pipeline functions
9
- # from ..pipeline import (
10
- # add as add_pipeline_,
11
- # add_job as add_pipeline_job_,
12
- # all_pipelines as all_pipelines_,
13
- # delete as delete_pipeline_,
14
- # get_summary as get_pipeline_summary_,
15
- # new as new_pipeline_,
16
- # run as run_pipeline_,
17
- # run_job as run_pipeline_job_,
18
- # schedule as schedule_pipeline_,
19
- # save_dag as save_pipeline_dag_,
20
- # show_dag as show_pipeline_dag_,
21
- # show_summary as show_pipeline_summary_,
22
- # # start_mqtt_listener as start_mqtt_listener_,
23
- # )
24
- from ..pipeline import Pipeline, PipelineManager, HookType
25
- from .utils import parse_dict_or_list_param, parse_param_dict
26
-
27
- # Optional imports
28
- if importlib.util.find_spec("apscheduler"):
29
- from ..scheduler import get_schedule_manager
30
- from ..scheduler import start_worker as start_worker_
31
- else:
32
- get_schedule_manager = None
33
- start_worker_ = None
34
-
11
+ setup_logging()
35
12
 
36
13
  app = typer.Typer(help="Pipeline management commands")
37
14
 
38
15
 
39
16
  @app.command()
40
17
  def run(
41
- name: str,
42
- executor: str | None = None,
43
- base_dir: str | None = None,
44
- inputs: str | None = None,
45
- final_vars: str | None = None,
46
- config: str | None = None,
47
- with_tracker: bool = False,
48
- with_opentelemetry: bool = False,
49
- with_progressbar: bool = False,
50
- storage_options: str | None = None,
18
+ name: str = typer.Argument(..., help="Name of the pipeline to run"),
19
+ executor: str | None = typer.Option(None, help="Executor to use for running the pipeline"),
20
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
21
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
22
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
23
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
24
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
25
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
26
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
27
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
28
+ max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
29
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
30
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
51
31
  ):
52
32
  """
53
- Run the specified pipeline.
33
+ Run a pipeline immediately.
34
+
35
+ This command executes a pipeline with the specified configuration and inputs.
36
+ The pipeline will run synchronously, and the command will wait for completion.
54
37
 
55
38
  Args:
56
39
  name: Name of the pipeline to run
57
- executor: Executor to use
58
- base_dir: Base directory for the pipeline
59
- inputs: Input parameters as JSON, dict string, or key=value pairs
60
- final_vars: Final variables as JSON or list
61
- config: Config for the hamilton pipeline executor
62
- with_tracker: Enable tracking with hamilton ui
63
- with_opentelemetry: Enable OpenTelemetry tracing
64
- with_progressbar: Enable progress bar
65
- storage_options: Storage options as JSON, dict string, or key=value pairs
40
+ executor: Type of executor to use
41
+ base_dir: Base directory containing pipelines and configurations
42
+ inputs: Input parameters for the pipeline
43
+ final_vars: Final variables to request from the pipeline
44
+ config: Configuration for the Hamilton executor
45
+ cache: Cache configuration for improved performance
46
+ storage_options: Options for storage backends
47
+ log_level: Set the logging level
48
+ with_adapter: Configuration for adapters like trackers or monitors
49
+ max_retries: Maximum number of retry attempts on failure
50
+ retry_delay: Base delay between retries in seconds
51
+ jitter_factor: Random factor applied to delay for jitter (0-1)
66
52
 
67
53
  Examples:
68
- # JSON inputs
69
- pipeline run my_pipeline --inputs '{"key": "value"}'
54
+ # Run a pipeline with default settings
55
+ $ pipeline run my_pipeline
56
+
57
+ # Run with custom inputs
58
+ $ pipeline run my_pipeline --inputs '{"data_path": "data/myfile.csv", "limit": 100}'
70
59
 
71
- # Dict string inputs
72
- pipeline run my_pipeline --inputs "{'key': 'value'}"
60
+ # Specify which final variables to calculate
61
+ $ pipeline run my_pipeline --final-vars '["output_table", "summary_metrics"]'
73
62
 
74
- # Key-value pair inputs
75
- pipeline run my_pipeline --inputs 'key1=value1,key2=value2'
63
+ # Configure caching
64
+ $ pipeline run my_pipeline --cache '{"type": "memory", "ttl": 3600}'
76
65
 
77
- # List final vars
78
- pipeline run my_pipeline --final-vars '["var1", "var2"]'
66
+ # Use a different executor
67
+ $ pipeline run my_pipeline --executor distributed
79
68
 
80
- # Storage options
81
- pipeline run my_pipeline --storage-options 'endpoint=http://localhost,use_ssl=true'
69
+ # Enable adapters for monitoring/tracking
70
+ $ pipeline run my_pipeline --with-adapter '{"tracker": true, "opentelemetry": true}'
71
+
72
+ # Set a specific logging level
73
+ $ pipeline run my_pipeline --log-level debug
74
+
75
+ # Configure automatic retries on failure
76
+ $ pipeline run my_pipeline --max-retries 3 --retry-delay 2.0 --jitter-factor 0.2
82
77
  """
83
78
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
84
79
  parsed_config = parse_dict_or_list_param(config, "dict")
80
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
85
81
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
86
82
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
83
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
87
84
 
88
- with Pipeline(
89
- name=name,
85
+ with PipelineManager(
90
86
  base_dir=base_dir,
91
- storage_options=parsed_storage_options or {},
92
- ) as pipeline:
93
- pipeline.run(
94
- executor=executor,
87
+ storage_options=parsed_storage_options or {},
88
+ log_level=log_level,
89
+ ) as manager:
90
+ _ = manager.run(
91
+ name=name,
95
92
  inputs=parsed_inputs,
96
93
  final_vars=parsed_final_vars,
97
94
  config=parsed_config,
98
- with_tracker=with_tracker,
99
- with_opentelemetry=with_opentelemetry,
100
- with_progressbar=with_progressbar,
95
+ cache=parsed_cache,
96
+ executor_cfg=executor,
97
+ with_adapter_cfg=parsed_with_adapter,
98
+ max_retries=max_retries,
99
+ retry_delay=retry_delay,
100
+ jitter_factor=jitter_factor,
101
101
  )
102
+ logger.info(f"Pipeline '{name}' finished running.")
102
103
 
103
104
 
104
105
  @app.command()
105
106
  def run_job(
106
- name: str,
107
- executor: str | None = None,
108
- base_dir: str | None = None,
109
- inputs: str | None = None,
110
- final_vars: str | None = None,
111
- config: str | None = None,
112
- with_tracker: bool = False,
113
- with_opentelemetry: bool = False,
114
- with_progressbar: bool = False,
115
- storage_options: str | None = None,
107
+ name: str = typer.Argument(..., help="Name or ID of the pipeline job to run"),
108
+ executor: str | None = typer.Option(None, help="Executor to use for running the job"),
109
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
110
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
111
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
112
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
113
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
114
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
115
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
116
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
117
+ max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
118
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
119
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
116
120
  ):
117
121
  """
118
- Run the specified pipeline job.
122
+ Run a specific pipeline job.
123
+
124
+ This command runs an existing job by its ID. The job should have been previously
125
+ added to the system via the add-job command or through scheduling.
119
126
 
120
127
  Args:
121
- name: Name of the pipeline job to run
122
- executor: Executor to use
123
- base_dir: Base directory for the pipeline
124
- inputs: Input parameters as JSON, dict string, or key=value pairs
125
- final_vars: Final variables as JSON or list
126
- config: Config for the hamilton pipeline executor
127
- with_tracker: Enable tracking with hamilton ui
128
- with_opentelemetry: Enable OpenTelemetry tracing
129
- with_progressbar: Enable progress bar
130
- storage_options: Storage options as JSON, dict string, or key=value pairs
128
+ name: Job ID to run
129
+ executor: Type of executor to use (maps to executor_cfg in manager)
130
+ base_dir: Base directory containing pipelines and configurations
131
+ inputs: Input parameters for the pipeline
132
+ final_vars: Final variables to request from the pipeline
133
+ config: Configuration for the Hamilton executor
134
+ cache: Cache configuration
135
+ storage_options: Options for storage backends
136
+ log_level: Set the logging level
137
+ with_adapter: Configuration for adapters like trackers or monitors
138
+ max_retries: Maximum number of retry attempts on failure
139
+ retry_delay: Base delay between retries in seconds
140
+ jitter_factor: Random factor applied to delay for jitter (0-1)
131
141
 
132
142
  Examples:
133
- # JSON inputs
134
- pipeline run-job 123 --inputs '{"key": "value"}'
143
+ # Run a job with a specific ID
144
+ $ pipeline run-job job-123456
135
145
 
136
- # Dict string inputs
137
- pipeline run-job 123 --inputs "{'key': 'value'}"
146
+ # Run a job with custom inputs
147
+ $ pipeline run-job job-123456 --inputs '{"data_path": "data/myfile.csv"}'
138
148
 
139
- # Key-value pair inputs
140
- pipeline run-job 123 --inputs 'key1=value1,key2=value2'
149
+ # Specify a different executor
150
+ $ pipeline run-job job-123456 --executor local
141
151
 
142
- # List final vars
143
- pipeline run-job 123 --final-vars '["var1", "var2"]'
152
+ # Use caching for better performance
153
+ $ pipeline run-job job-123456 --cache '{"type": "memory"}'
144
154
 
145
- # Storage options
146
- pipeline run-job 123 --storage-options 'endpoint=http://localhost,use_ssl=true'
155
+ # Configure adapters for monitoring
156
+ $ pipeline run-job job-123456 --with-adapter '{"tracker": true, "opentelemetry": false}'
157
+
158
+ # Set up automatic retries for resilience
159
+ $ pipeline run-job job-123456 --max-retries 3 --retry-delay 2.0
147
160
  """
148
161
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
149
162
  parsed_config = parse_dict_or_list_param(config, "dict")
163
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
150
164
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
151
165
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
166
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
152
167
 
153
- with Pipeline(
154
- name=name,
168
+ with PipelineManager(
155
169
  base_dir=base_dir,
156
- storage_options=parsed_storage_options or {},
157
- ) as pipeline:
158
- pipeline.run_job(
159
- executor=executor,
170
+ storage_options=parsed_storage_options or {},
171
+ log_level=log_level,
172
+ ) as manager:
173
+ _ = manager.run_job(
174
+ name=name,
160
175
  inputs=parsed_inputs,
161
176
  final_vars=parsed_final_vars,
162
177
  config=parsed_config,
163
- with_tracker=with_tracker,
164
- with_opentelemetry=with_opentelemetry,
165
- with_progressbar=with_progressbar,
178
+ cache=parsed_cache,
179
+ executor_cfg=executor,
180
+ with_adapter_cfg=parsed_with_adapter,
181
+ max_retries=max_retries,
182
+ retry_delay=retry_delay,
183
+ jitter_factor=jitter_factor,
166
184
  )
185
+ logger.info(f"Job '{name}' finished running.")
167
186
 
168
187
 
169
188
  @app.command()
170
189
  def add_job(
171
- name: str,
172
- executor: str | None = None,
173
- base_dir: str | None = None,
174
- inputs: str | None = None,
175
- final_vars: str | None = None,
176
- config: str | None = None,
177
- with_tracker: bool = False,
178
- with_opentelemetry: bool = False,
179
- with_progressbar: bool = False,
180
- storage_options: str | None = None,
190
+ name: str = typer.Argument(..., help="Name of the pipeline to add as a job"),
191
+ executor: str | None = typer.Option(None, help="Executor to use for running the job"),
192
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
193
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
194
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
195
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
196
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
197
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
198
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
199
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
200
+ run_at: str | None = typer.Option(None, help="Run at a specific time (ISO format)"),
201
+ run_in: str | None = typer.Option(None, help="Run in a specific interval (e.g., '5m', '1h', '12m34s')"),
202
+ max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
203
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
204
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
181
205
  ):
182
206
  """
183
- Add a job to the specified pipeline.
207
+ Add a pipeline job to the queue.
208
+
209
+ This command adds a job to the queue for later execution. The job is based on
210
+ an existing pipeline with customized inputs and configuration.
184
211
 
185
212
  Args:
186
- name: Name of the pipeline to add as job
187
- executor: Executor to use
188
- base_dir: Base directory for the pipeline
189
- inputs: Input parameters as JSON, dict string, or key=value pairs
190
- final_vars: Final variables as JSON or list
191
- config: Config for the hamilton pipeline executor
192
- with_tracker: Enable tracking with hamilton ui
193
- with_opentelemetry: Enable OpenTelemetry tracing
194
- with_progressbar: Enable progress bar
195
- storage_options: Storage options as JSON, dict string, or key=value pairs
213
+ name: Pipeline name to add as a job
214
+ executor: Type of executor to use
215
+ base_dir: Base directory containing pipelines and configurations
216
+ inputs: Input parameters for the pipeline
217
+ final_vars: Final variables to request from the pipeline
218
+ config: Configuration for the Hamilton executor
219
+ cache: Cache configuration
220
+ storage_options: Options for storage backends
221
+ log_level: Set the logging level
222
+ with_adapter: Configuration for adapters like trackers or monitors
223
+ run_at: Run the job at a specific time (ISO format)
224
+ run_in: Run the job in a specific interval (e.g., '5m', '1h')
225
+ max_retries: Maximum number of retry attempts on failure
226
+ retry_delay: Base delay between retries in seconds
227
+ jitter_factor: Random factor applied to delay for jitter (0-1)
196
228
 
197
229
  Examples:
198
- # JSON inputs
199
- pipeline add-job my_pipeline --inputs '{"key": "value"}'
230
+ # Add a basic job
231
+ $ pipeline add-job my_pipeline
200
232
 
201
- # Dict string inputs
202
- pipeline add-job my_pipeline --inputs "{'key': 'value'}"
233
+ # Add a job with custom inputs
234
+ $ pipeline add-job my_pipeline --inputs '{"data_path": "data/myfile.csv"}'
203
235
 
204
- # Key-value pair inputs
205
- pipeline add-job my_pipeline --inputs 'key1=value1,key2=value2'
236
+ # Specify final variables to calculate
237
+ $ pipeline add-job my_pipeline --final-vars '["output_table", "metrics"]'
206
238
 
207
- # List final vars
208
- pipeline add-job my_pipeline --final-vars '["var1", "var2"]'
239
+ # Configure caching
240
+ $ pipeline add-job my_pipeline --cache '{"type": "memory", "ttl": 3600}'
209
241
 
210
- # Storage options
211
- pipeline add-job my_pipeline --storage-options 'endpoint=http://localhost,use_ssl=true'
242
+ # Use a specific log level
243
+ $ pipeline add-job my_pipeline --log-level debug
244
+
245
+ # Configure automatic retries for resilience
246
+ $ pipeline add-job my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
212
247
  """
213
248
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
214
249
  parsed_config = parse_dict_or_list_param(config, "dict")
250
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
215
251
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
216
252
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
253
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
254
+ run_at = dt.datetime.fromisoformat(run_at) if run_at else None
255
+ run_in = duration_parser.parse(run_in) if run_in else None
217
256
 
218
- storage_options = (parsed_storage_options or {},)
219
-
220
- with Pipeline(
221
- name=name,
257
+ with PipelineManager(
222
258
  base_dir=base_dir,
223
259
  storage_options=parsed_storage_options or {},
224
- ) as pipeline:
225
- pipeline.add_job(
226
- executor=executor,
260
+ log_level=log_level,
261
+ ) as manager:
262
+ job_id = manager.add_job(
263
+ name=name,
227
264
  inputs=parsed_inputs,
228
265
  final_vars=parsed_final_vars,
229
266
  config=parsed_config,
230
- with_tracker=with_tracker,
231
- with_opentelemetry=with_opentelemetry,
232
- with_progressbar=with_progressbar,
267
+ cache=parsed_cache,
268
+ executor_cfg=executor,
269
+ with_adapter_cfg=parsed_with_adapter,
270
+ run_at=run_at,
271
+ run_in=run_in,
272
+ max_retries=max_retries,
273
+ retry_delay=retry_delay,
274
+ jitter_factor=jitter_factor,
233
275
  )
276
+ logger.info(f"Job {job_id} added for pipeline '{name}'.")
234
277
 
235
278
 
236
279
  @app.command()
237
280
  def schedule(
238
- name: str,
239
- executor: str | None = None,
240
- base_dir: str | None = None,
241
- trigger_type: str = "cron",
242
- inputs: str | None = None,
243
- final_vars: str | None = None,
244
- config: str | None = None,
245
- with_tracker: bool = False,
246
- with_opentelemetry: bool = False,
247
- with_progressbar: bool = False,
248
- paused: bool = False,
249
- coalesce: str = "latest",
250
- misfire_grace_time: float | None = None,
251
- max_jitter: float | None = None,
252
- max_running_jobs: int | None = None,
253
- conflict_policy: str = "do_nothing",
254
- crontab: str | None = None,
255
- cron_params: str | None = None,
256
- interval_params: str | None = None,
257
- calendarinterval_params: str | None = None,
258
- date_params: str | None = None,
259
- storage_options: str | None = None,
260
- overwrite: bool = False,
281
+ name: str = typer.Argument(..., help="Name of the pipeline to schedule"),
282
+ executor: str | None = typer.Option(None, help="Executor to use for running the job"),
283
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
284
+ inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
285
+ final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
286
+ config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
287
+ cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
288
+ cron: str | None = typer.Option(None, help="Cron expression for scheduling"),
289
+ interval: str | None = typer.Option(None, help="Interval for scheduling (e.g., '5m', '1h')"),
290
+ date: str | None = typer.Option(None, help="Specific date and time for scheduling (ISO format)"),
291
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
292
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
293
+ with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
294
+ overwrite: bool = typer.Option(False, help="Overwrite existing schedule if it exists"),
295
+ schedule_id: str | None = typer.Option(None, help="Custom ID for the schedule (autogenerated if not provided)"),
296
+ max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
297
+ retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
298
+ jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
261
299
  ):
262
300
  """
263
- Schedule a pipeline with various configuration options.
301
+ Schedule a pipeline to run at specified times.
302
+
303
+ This command schedules a pipeline to run automatically based on various
304
+ scheduling triggers like cron expressions, time intervals, or specific dates.
264
305
 
265
306
  Args:
266
- name: Name of the pipeline to schedule
267
- executor: Executor to use
268
- base_dir: Base directory for the pipeline
269
- trigger_type: Type of schedule
270
- inputs: Input parameters as JSON, dict string, or key=value pairs
271
- final_vars: Final variables as JSON or list
272
- config: Config for the hamilton pipeline executor
273
- with_tracker: Enable tracking with hamilton ui
274
- with_opentelemetry: Enable OpenTelemetry tracing
275
- with_progressbar: Enable progress bar
276
- paused: Start the job in paused state
277
- coalesce: Coalesce policy
278
- misfire_grace_time: Misfire grace time
279
- max_jitter: Maximum jitter
280
- max_running_jobs: Maximum running jobs
281
- conflict_policy: Conflict policy
282
- crontab: Crontab expression
283
- cron_params: Cron parameters as JSON or key=value pairs
284
- interval_params: Interval parameters as JSON or key=value pairs
285
- calendarinterval_params: Calendar interval parameters as JSON or key=value pairs
286
- date_params: Date parameters as JSON or key=value pairs
287
- storage_options: Storage options as JSON, dict string, or key=value pairs
288
- overwrite: Overwrite existing schedule
307
+ name: Pipeline name to schedule
308
+ executor: Type of executor to use
309
+ base_dir: Base directory containing pipelines and configurations
310
+ inputs: Input parameters for the pipeline
311
+ final_vars: Final variables to request from the pipeline
312
+ config: Configuration for the Hamilton executor
313
+ cache: Cache configuration
314
+ cron: Cron expression for scheduling (e.g., "0 * * * *")
315
+ interval: Interval for scheduling (e.g., "5m", "1h")
316
+ date: Specific date and time for scheduling (ISO format)
317
+ storage_options: Options for storage backends
318
+ log_level: Set the logging level
319
+ with_adapter: Configuration for adapters like trackers or monitors
320
+ overwrite: Overwrite existing schedule with same ID
321
+ schedule_id: Custom identifier for the schedule
322
+ max_retries: Maximum number of retry attempts on failure
323
+ retry_delay: Base delay between retries in seconds
324
+ jitter_factor: Random factor applied to delay for jitter (0-1)
289
325
 
290
326
  Examples:
291
- # JSON inputs
292
- pipeline schedule my_pipeline --inputs '{"key": "value"}'
293
-
294
- # Dict string inputs
295
- pipeline schedule my_pipeline --inputs "{'key': 'value'}"
296
-
297
- # Key-value pair inputs
298
- pipeline schedule my_pipeline --inputs 'key1=value1,key2=value2'
299
-
300
- # List final vars
301
- pipeline schedule my_pipeline --final-vars '["var1", "var2"]'
327
+ # Schedule with cron expression (every hour)
328
+ $ pipeline schedule my_pipeline --trigger-type cron --crontab "0 * * * *"
302
329
 
303
- # Storage options
304
- pipeline schedule my_pipeline --storage-options 'endpoint=http://localhost,use_ssl=true'
330
+ # Schedule to run every 15 minutes
331
+ $ pipeline schedule my_pipeline --trigger-type interval --interval_params minutes=15
305
332
 
306
- # Cron schedule
307
- pipeline schedule my_pipeline --trigger-type cron --crontab '0 0 * * *'
333
+ # Schedule to run at a specific date and time
334
+ $ pipeline schedule my_pipeline --trigger-type date --date_params run_date="2025-12-31 23:59:59"
308
335
 
309
- # Interval schedule
310
- pipeline schedule my_pipeline --trigger-type interval --interval_params minutes=1
336
+ # Schedule with custom inputs and cache settings
337
+ $ pipeline schedule my_pipeline --inputs '{"source": "database"}' --cache '{"type": "redis"}'
311
338
 
312
- # Calendar interval schedule
313
- pipeline schedule my_pipeline --trigger-type calendarinterval --calendarinterval_params month=5
314
-
315
- # Date schedule
316
- pipeline schedule my_pipeline --trigger-type date --date_params run_date='2021-01-01 12:00:01'
339
+ # Create a schedule in paused state
340
+ $ pipeline schedule my_pipeline --crontab "0 9 * * 1-5" --paused
317
341
 
342
+ # Set a custom schedule ID
343
+ $ pipeline schedule my_pipeline --crontab "0 12 * * *" --schedule_id "daily-noon-run"
344
+
345
+ # Configure automatic retries for resilience
346
+ $ pipeline schedule my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
318
347
  """
319
- if get_schedule_manager is None:
320
- raise ValueError("APScheduler not installed. Please install it first.")
321
-
322
- # Parse inputs
323
348
  parsed_inputs = parse_dict_or_list_param(inputs, "dict")
324
349
  parsed_config = parse_dict_or_list_param(config, "dict")
350
+ parsed_cache = parse_dict_or_list_param(cache, "dict")
325
351
  parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
326
352
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
353
+ parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
354
+ interval = duration_parser.parse(interval) if interval else None
355
+ cron = cron if cron else None
356
+ date = dt.datetime.fromisoformat(date) if date else None
327
357
 
328
- # Parse various parameter dictionaries
329
- cron_params_dict = parse_param_dict(cron_params)
330
- interval_params_dict = parse_param_dict(interval_params)
331
- calendarinterval_params_dict = parse_param_dict(calendarinterval_params)
332
- date_params_dict = parse_param_dict(date_params)
333
-
334
- # Combine all parameter dictionaries
335
- kwargs = {
336
- **cron_params_dict,
337
- **interval_params_dict,
338
- **calendarinterval_params_dict,
339
- **date_params_dict,
340
- }
341
-
342
- # Add crontab if provided
343
- if crontab is not None:
344
- kwargs["crontab"] = crontab
345
-
346
- # Convert numeric parameters
347
- for key in ["weeks", "days", "hours", "minutes", "seconds"]:
348
- if key in kwargs:
349
- try:
350
- kwargs[key] = float(kwargs[key])
351
- except ValueError:
352
- logger.warning(f"Could not convert {key} to float: {kwargs[key]}")
353
-
354
- with Pipeline(
355
- name=name,
358
+ with PipelineManager(
356
359
  base_dir=base_dir,
357
360
  storage_options=parsed_storage_options or {},
358
- ) as pipeline:
359
- id_ = pipeline.schedule(
360
- executor=executor,
361
- trigger_type=trigger_type,
361
+ log_level=log_level,
362
+ ) as manager:
363
+ # Combine common schedule kwargs
364
+
365
+ id_ = manager.schedule(
366
+ name=name,
362
367
  inputs=parsed_inputs,
363
368
  final_vars=parsed_final_vars,
364
369
  config=parsed_config,
365
- with_tracker=with_tracker,
366
- with_opentelemetry=with_opentelemetry,
367
- with_progressbar=with_progressbar,
368
- paused=paused,
369
- coalesce=coalesce,
370
- misfire_grace_time=misfire_grace_time,
371
- max_jitter=max_jitter,
372
- max_running_jobs=max_running_jobs,
373
- conflict_policy=conflict_policy,
370
+ cache=parsed_cache,
371
+ executor_cfg=executor,
372
+ with_adapter_cfg=parsed_with_adapter,
373
+ cron=cron,
374
+ interval=interval,
375
+ date=date,
374
376
  overwrite=overwrite,
375
- **kwargs,
377
+ schedule_id=schedule_id,
378
+ max_retries=max_retries,
379
+ retry_delay=retry_delay,
380
+ jitter_factor=jitter_factor,
376
381
  )
377
382
 
378
- logger.info(f"Job {id_} scheduled.")
383
+ logger.info(f"Pipeline '{name}' scheduled with ID {id_}.")
379
384
 
380
385
 
381
386
  @app.command()
382
387
  def schedule_all(
383
- executor: str | None = None,
384
- base_dir: str | None = None,
385
- inputs: str | None = None,
386
- final_vars: str | None = None,
387
- config: str | None = None,
388
- with_tracker: bool = False,
389
- with_opentelemetry: bool = False,
390
- with_progressbar: bool = False,
391
- paused: bool = False,
392
- coalesce: str = "latest",
393
- misfire_grace_time: float | None = None,
394
- max_jitter: float | None = None,
395
- max_running_jobs: int | None = None,
396
- conflict_policy: str = "do_nothing",
397
- storage_options: str | None = None,
398
- overwrite: bool = False,
388
+ executor: str | None = typer.Option(None, help="Override executor specified in pipeline configs"),
389
+ base_dir: str | None = typer.Option(None, help="Base directory containing pipelines and configurations"),
390
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
391
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
392
+ overwrite: bool = typer.Option(False, help="Overwrite existing schedules if they exist"),
399
393
  ):
400
394
  """
401
- Schedule all pipelines using the pipeline specific configurations (`conf/pipelines/<name>.yml`).
395
+ Schedule all pipelines based on their individual configurations.
396
+
397
+ This command reads the configuration files for all pipelines in the project
398
+ and schedules them based on their individual scheduling settings. This is useful
399
+ for setting up all scheduled pipelines at once after deployment or system restart.
402
400
 
403
401
  Args:
404
- executor: Executor to use
405
- base_dir: Base directory for the pipeline
406
- inputs: Input parameters as JSON, dict string, or key=value pairs
407
- final_vars: Final variables as JSON or list
408
- config: Config for the hamilton pipeline executor
409
- with_tracker: Enable tracking with hamilton ui
410
- with_opentelemetry: Enable OpenTelemetry tracing
411
- with_progressbar: Enable progress bar
412
- paused: Start the job in paused state
413
- coalesce: Coalesce policy
414
- misfire_grace_time: Misfire grace time
415
- max_jitter: Maximum jitter
416
- max_running_jobs: Maximum running jobs
417
- conflict_policy: Conflict policy
418
- storage_options: Storage options as JSON, dict string, or key=value pairs
419
- overwrite: Overwrite existing schedule
402
+ executor: Override executor specified in pipeline configs
403
+ base_dir: Base directory containing pipelines and configurations
404
+ storage_options: Options for storage backends
405
+ log_level: Set the logging level
406
+ overwrite: Whether to overwrite existing schedules
420
407
 
421
408
  Examples:
422
- pipeline schedule-all
423
- """
424
- if get_schedule_manager is None:
425
- raise ValueError("APScheduler not installed. Please install it first.")
409
+ # Schedule all pipelines using their configurations
410
+ $ pipeline schedule-all
411
+
412
+ # Force overwrite of existing schedules
413
+ $ pipeline schedule-all --overwrite
414
+
415
+ # Override executor for all pipelines
416
+ $ pipeline schedule-all --executor distributed
426
417
 
418
+ # Set custom base directory
419
+ $ pipeline schedule-all --base-dir /path/to/project
420
+ """
427
421
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
428
422
 
429
423
  with PipelineManager(
430
424
  base_dir=base_dir,
431
425
  storage_options=parsed_storage_options or {},
426
+ log_level=log_level,
432
427
  ) as manager:
433
428
  manager.schedule_all(
434
- executor=executor,
435
- inputs=inputs,
436
- final_vars=final_vars,
437
- config=config,
438
- with_tracker=with_tracker,
439
- with_opentelemetry=with_opentelemetry,
440
- with_progressbar=with_progressbar,
441
- paused=paused,
442
- coalesce=coalesce,
443
- misfire_grace_time=misfire_grace_time,
444
- max_jitter=max_jitter,
445
- max_running_jobs=max_running_jobs,
446
- conflict_policy=conflict_policy,
447
429
  overwrite=overwrite,
430
+ executor_cfg=executor
448
431
  )
432
+ logger.info("Scheduled all pipelines based on their configurations.")
449
433
 
450
434
 
451
435
  @app.command()
452
436
  def new(
453
- name: str,
454
- base_dir: str | None = None,
455
- storage_options: str | None = None,
456
- overwrite: bool = False,
437
+ name: str = typer.Argument(..., help="Name of the pipeline to create"),
438
+ base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
439
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
440
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
441
+ overwrite: bool = typer.Option(False, help="Overwrite existing pipeline if it exists"),
457
442
  ):
458
443
  """
459
- Create a new pipeline.
444
+ Create a new pipeline structure.
445
+
446
+ This command creates a new pipeline with the necessary directory structure,
447
+ configuration file, and skeleton module file. It prepares all the required
448
+ components for you to start implementing your pipeline logic.
460
449
 
461
450
  Args:
462
- name: Name of the new pipeline
463
- base_dir: Base directory for the new pipeline
464
- overwrite: Overwrite existing pipeline
465
- storage_options: Storage options as JSON, dict string, or key=value pairs
451
+ name: Name for the new pipeline
452
+ base_dir: Base directory to create the pipeline in
453
+ storage_options: Options for storage backends
454
+ log_level: Set the logging level
455
+ overwrite: Whether to overwrite existing pipeline with the same name
466
456
 
467
457
  Examples:
468
- pipeline new my_pipeline
458
+ # Create a new pipeline with default settings
459
+ $ pipeline new my_new_pipeline
460
+
461
+ # Create a pipeline, overwriting if it exists
462
+ $ pipeline new my_new_pipeline --overwrite
463
+
464
+ # Create a pipeline in a specific directory
465
+ $ pipeline new my_new_pipeline --base-dir /path/to/project
469
466
  """
470
467
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
471
-
472
468
  with PipelineManager(
473
469
  base_dir=base_dir,
474
470
  storage_options=parsed_storage_options or {},
471
+ log_level=log_level,
475
472
  ) as manager:
476
473
  manager.new(name=name, overwrite=overwrite)
474
+ logger.info(f"New pipeline structure created for '{name}'.")
477
475
 
478
476
 
479
477
  @app.command()
480
478
  def delete(
481
- name: str,
482
- base_dir: str | None = None,
483
- cfg: bool = False,
484
- module: bool = False,
485
- hooks: bool = False,
486
- storage_options: str | None = None,
479
+ name: str = typer.Argument(..., help="Name of the pipeline to delete"),
480
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
481
+ cfg: bool = typer.Option(False, "--cfg", "-c", help="Delete only the configuration file"),
482
+ module: bool = typer.Option(False, "--module", "-m", help="Delete only the pipeline module"),
483
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
484
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
487
485
  ):
488
486
  """
489
- Delete the specified pipeline.
487
+ Delete a pipeline's configuration and/or module files.
488
+
489
+ This command removes a pipeline's configuration file and/or module file from the project.
490
+ If neither --cfg nor --module is specified, both will be deleted.
490
491
 
491
492
  Args:
492
493
  name: Name of the pipeline to delete
493
- base_dir: Base directory for the pipeline
494
- cfg: Remove associated configuration
495
- module: Remove associated module
496
- hooks: Remove associated hooks
497
- storage_options: Storage options as JSON, dict string, or key=value pairs
494
+ base_dir: Base directory containing the pipeline
495
+ cfg: Delete only the configuration file
496
+ module: Delete only the pipeline module
497
+ storage_options: Options for storage backends
498
+ log_level: Set the logging level
498
499
 
499
500
  Examples:
500
- pipeline delete my_pipeline
501
+ # Delete a pipeline (both config and module)
502
+ $ pipeline delete my_pipeline
503
+
504
+ # Delete only the configuration file
505
+ $ pipeline delete my_pipeline --cfg
506
+
507
+ # Delete only the module file
508
+ $ pipeline delete my_pipeline --module
501
509
  """
502
510
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
503
511
 
504
- with Pipeline(
505
- name=name,
512
+ # If neither flag is set, default to deleting both
513
+ delete_cfg = cfg or not (cfg or module)
514
+ delete_module = module or not (cfg or module)
515
+
516
+ with PipelineManager(
506
517
  base_dir=base_dir,
507
518
  storage_options=parsed_storage_options or {},
508
- ) as pipeline:
509
- pipeline.delete(cfg=cfg, module=module, hooks=hooks)
519
+ log_level=log_level,
520
+ ) as manager:
521
+ manager.delete(name=name, cfg=delete_cfg, module=delete_module)
522
+
523
+ deleted_parts = []
524
+ if delete_cfg:
525
+ deleted_parts.append("config")
526
+ if delete_module:
527
+ deleted_parts.append("module")
528
+ logger.info(f"Pipeline '{name}' deleted ({', '.join(deleted_parts)})." if deleted_parts else f"Pipeline '{name}' - nothing specified to delete.")
510
529
 
511
530
 
512
531
  @app.command()
513
532
  def show_dag(
514
- name: str, base_dir: str | None = None, storage_options: str | None = None, config: str | None = None
533
+ name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
534
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
535
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
536
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
537
+ format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf). If 'raw', returns object."),
515
538
  ):
516
539
  """
517
- Show the DAG of the specified pipeline.
540
+ Show the DAG (Directed Acyclic Graph) of a pipeline.
541
+
542
+ This command generates and displays a visual representation of the pipeline's
543
+ execution graph, showing how nodes are connected and dependencies between them.
518
544
 
519
545
  Args:
520
- name: Name of the pipeline to show
521
- base_dir: Base directory for the pipeline
522
- storage_options: Storage options as JSON, dict string, or key=value pairs
523
- config: Config for the hamilton pipeline executor
546
+ name: Name of the pipeline to visualize
547
+ base_dir: Base directory containing the pipeline
548
+ storage_options: Options for storage backends
549
+ log_level: Set the logging level
550
+ format: Output format for the visualization
524
551
 
525
552
  Examples:
526
- pipeline show-dag my_pipeline
553
+ # Show pipeline DAG in PNG format (default)
554
+ $ pipeline show-dag my_pipeline
555
+
556
+ # Generate SVG format visualization
557
+ $ pipeline show-dag my_pipeline --format svg
558
+
559
+ # Get raw graphviz object
560
+ $ pipeline show-dag my_pipeline --format raw
527
561
  """
528
562
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
529
- parsed_config = parse_dict_or_list_param(config, "dict")
563
+ is_raw = format.lower() == "raw"
530
564
 
531
- with Pipeline(
532
- name=name,
565
+ with PipelineManager(
533
566
  base_dir=base_dir,
534
567
  storage_options=parsed_storage_options or {},
535
- ) as pipeline:
536
- pipeline.show_dag(config=parsed_config)
568
+ log_level=log_level,
569
+ ) as manager:
570
+ # Manager's show_dag likely handles rendering or returning raw object
571
+ try:
572
+ graph_or_none = manager.show_dag(name=name, format=format if not is_raw else "png", raw=is_raw)
573
+ if is_raw and graph_or_none:
574
+ print("Graphviz object returned (not rendered):")
575
+ # print(graph_or_none) # Or handle as needed
576
+ elif not is_raw:
577
+ logger.info(f"DAG for pipeline '{name}' displayed/saved (format: {format}).")
578
+ except ImportError:
579
+ logger.error("Graphviz is not installed. Cannot show/save DAG. Install with: pip install graphviz")
580
+ except Exception as e:
581
+ logger.error(f"Failed to generate DAG for pipeline '{name}': {e}")
537
582
 
538
583
 
539
584
  @app.command()
540
585
  def save_dag(
541
- name: str, base_dir: str | None = None, storage_options: str | None = None, config: str | None = None
586
+ name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
587
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
588
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
589
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
590
+ format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf)"),
591
+ output_path: str | None = typer.Option(None, help="Custom path to save the file (default: <name>.<format>)"),
542
592
  ):
543
593
  """
544
- Save the DAG of the specified pipeline.
594
+ Save the DAG (Directed Acyclic Graph) of a pipeline to a file.
595
+
596
+ This command generates a visual representation of the pipeline's execution graph
597
+ and saves it to a file in the specified format.
545
598
 
546
599
  Args:
547
- name: Name of the pipeline to save
548
- base_dir: Base directory for the pipeline
549
- storage_options: Storage options as JSON, dict string, or key=value pairs
550
- config: Config for the hamilton pipeline executor
600
+ name: Name of the pipeline to visualize
601
+ base_dir: Base directory containing the pipeline
602
+ storage_options: Options for storage backends
603
+ log_level: Set the logging level
604
+ format: Output format for the visualization
605
+ output_path: Custom file path to save the output (defaults to pipeline name)
551
606
 
552
607
  Examples:
553
- pipeline save-dag my_pipeline
608
+ # Save pipeline DAG in PNG format (default)
609
+ $ pipeline save-dag my_pipeline
610
+
611
+ # Save in SVG format
612
+ $ pipeline save-dag my_pipeline --format svg
613
+
614
+ # Save to a custom location
615
+ $ pipeline save-dag my_pipeline --output-path ./visualizations/my_graph.png
554
616
  """
555
617
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
556
- parsed_config = parse_dict_or_list_param(config, "dict")
557
-
558
- with Pipeline(
559
- name=name,
618
+ with PipelineManager(
560
619
  base_dir=base_dir,
561
620
  storage_options=parsed_storage_options or {},
562
- ) as pipeline:
563
- pipeline.save_dag(config=parsed_config)
621
+ log_level=log_level,
622
+ ) as manager:
623
+ try:
624
+ file_path = manager.save_dag(name=name, format=format, output_path=output_path)
625
+ logger.info(f"DAG for pipeline '{name}' saved to {file_path}.")
626
+ except ImportError:
627
+ logger.error("Graphviz is not installed. Cannot save DAG. Install with: pip install graphviz")
628
+ except Exception as e:
629
+ logger.error(f"Failed to save DAG for pipeline '{name}': {e}")
564
630
 
565
631
 
566
632
  @app.command()
567
- def show_pipelines(base_dir: str | None = None, storage_options: str | None = None):
633
+ def show_pipelines(
634
+ base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
635
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
636
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
637
+ format: str = typer.Option("table", help="Output format (table, json, yaml)"),
638
+ ):
568
639
  """
569
- List all available pipelines.
640
+ List all available pipelines in the project.
641
+
642
+ This command displays a list of all pipelines defined in the project,
643
+ providing an overview of what pipelines are available to run or schedule.
570
644
 
571
645
  Args:
572
- base_dir: Base directory for the pipelines
573
- storage_options: Storage options as JSON, dict string, or key=value pairs
646
+ base_dir: Base directory containing pipelines
647
+ storage_options: Options for storage backends
648
+ log_level: Set the logging level
649
+ format: Output format for the list (table, json, yaml)
574
650
 
575
651
  Examples:
576
- pipeline list-pipelines
652
+ # List all pipelines in table format (default)
653
+ $ pipeline show-pipelines
654
+
655
+ # Output in JSON format
656
+ $ pipeline show-pipelines --format json
657
+
658
+ # List pipelines from a specific directory
659
+ $ pipeline show-pipelines --base-dir /path/to/project
577
660
  """
578
661
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
579
662
  with PipelineManager(
580
663
  base_dir=base_dir,
581
664
  storage_options=parsed_storage_options or {},
665
+ log_level=log_level,
582
666
  ) as manager:
583
- manager.show_pipelines()
667
+ manager.show_pipelines(format=format)
584
668
 
585
669
 
586
670
  @app.command()
587
671
  def show_summary(
588
- name: str | None = None,
589
- cfg: bool = True,
590
- module: bool = True,
591
- base_dir: str | None = None,
592
- storage_options: str | None = None,
672
+ name: str | None = typer.Option(None, help="Name of specific pipeline to show (all pipelines if not specified)"),
673
+ cfg: bool = typer.Option(True, help="Include configuration details"),
674
+ code: bool = typer.Option(True, help="Include code/module details"),
675
+ project: bool = typer.Option(True, help="Include project context"),
676
+ base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
677
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
678
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
679
+ to_html: bool = typer.Option(False, help="Output summary as HTML"),
680
+ to_svg: bool = typer.Option(False, help="Output summary as SVG (if applicable)"),
681
+ output_file: str | None = typer.Option(None, help="Save output to specified file instead of printing"),
593
682
  ):
594
683
  """
595
- Show the summary of the specified pipeline.
684
+ Show summary information for one or all pipelines.
685
+
686
+ This command displays detailed information about pipelines including their
687
+ configuration, code structure, and project context. You can view information
688
+ for a specific pipeline or get an overview of all pipelines.
596
689
 
597
690
  Args:
598
- name: Name of the pipeline to show
599
- base_dir: Base directory for the pipeline
600
- cfg: Show configuration
601
- module: Show module information
602
- storage_options: Storage options as JSON, dict string, or key=value pairs
691
+ name: Name of specific pipeline to summarize (all if not specified)
692
+ cfg: Include configuration details
693
+ code: Include code/module details
694
+ project: Include project context information
695
+ base_dir: Base directory containing pipelines
696
+ storage_options: Options for storage backends
697
+ log_level: Set the logging level
698
+ to_html: Generate HTML output instead of text
699
+ to_svg: Generate SVG output (where applicable)
700
+ output_file: File path to save the output instead of printing to console
603
701
 
604
702
  Examples:
605
- pipeline show-summary my_pipeline
703
+ # Show summary for all pipelines
704
+ $ pipeline show-summary
705
+
706
+ # Show summary for a specific pipeline
707
+ $ pipeline show-summary --name my_pipeline
708
+
709
+ # Show only configuration information
710
+ $ pipeline show-summary --name my_pipeline --cfg --no-code --no-project
711
+
712
+ # Generate HTML report
713
+ $ pipeline show-summary --to-html --output-file pipeline_report.html
606
714
  """
607
715
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
608
-
609
716
  with PipelineManager(
610
717
  base_dir=base_dir,
611
718
  storage_options=parsed_storage_options or {},
719
+ log_level=log_level,
612
720
  ) as manager:
613
- manager.show_summary(name=name, cfg=cfg, module=module)
721
+ # Assumes manager.show_summary handles printing/returning formatted output
722
+ summary_output = manager.show_summary(
723
+ name=name,
724
+ cfg=cfg,
725
+ code=code,
726
+ project=project,
727
+ to_html=to_html,
728
+ to_svg=to_svg,
729
+ )
730
+
731
+ if summary_output:
732
+ if output_file:
733
+ with open(output_file, 'w') as f:
734
+ f.write(summary_output)
735
+ logger.info(f"Summary saved to {output_file}")
736
+ else:
737
+ print(summary_output)
738
+ # Otherwise, assume manager printed the summary
739
+
614
740
 
615
741
  @app.command()
616
742
  def add_hook(
617
- name: str,
618
- type: Annotated[HookType, typer.Option(help="Type of the hook to add")],
619
- to: str | None = None,
620
- function_name: str | None = None,
621
- base_dir: str | None = None,
622
- storage_options: str | None = None,
743
+ name: str = typer.Argument(..., help="Name of the pipeline to add the hook to"),
744
+ function_name: str = typer.Option(..., "--function", "-f", help="Name of the hook function defined in the pipeline module"),
745
+ type: Annotated[HookType, typer.Option(help="Type of hook to add")] = HookType.MQTT_BUILD_CONFIG,
746
+ to: str | None = typer.Option(None, help="Target node name or tag (required for node hooks)"),
747
+ base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
748
+ storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
749
+ log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
623
750
  ):
624
751
  """
625
- Add a hook to the specified pipeline.
752
+ Add a hook to a pipeline configuration.
753
+
754
+ This command adds a hook function to a pipeline's configuration. Hooks are functions
755
+ that are called at specific points during pipeline execution to perform additional
756
+ tasks like logging, monitoring, or data validation.
626
757
 
627
758
  Args:
628
759
  name: Name of the pipeline to add the hook to
629
- type: Type of the hook to add
630
- to: File in which to add the hook. If not provided, the hook will be added to the hook.py file in the pipelines hook folder.
631
- function_name: the name of the hook function. If not provided uses the default name of the hook type.
632
- base_dir: Base directory for the pipeline
633
- storage_options: Storage options as JSON, dict string, or key=value pairs
760
+ function_name: Name of the hook function (must be defined in the pipeline module)
761
+ type: Type of hook (determines when the hook is called during execution)
762
+ to: Target node or tag (required for node-specific hooks)
763
+ base_dir: Base directory containing the pipeline
764
+ storage_options: Options for storage backends
765
+ log_level: Set the logging level
634
766
 
635
767
  Examples:
636
- pipeline add-hook my_pipeline mqtt-build-config
768
+ # Add a post-run hook
769
+ $ pipeline add-hook my_pipeline --function log_results
770
+
771
+ # Add a pre-run hook
772
+ $ pipeline add-hook my_pipeline --function validate_inputs --type PRE_RUN
773
+
774
+ # Add a node-specific hook (executed before a specific node runs)
775
+ $ pipeline add-hook my_pipeline --function validate_data --type NODE_PRE_EXECUTE --to data_processor
776
+
777
+ # Add a hook for all nodes with a specific tag
778
+ $ pipeline add-hook my_pipeline --function log_metrics --type NODE_POST_EXECUTE --to @metrics
637
779
  """
638
780
  parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
639
781
 
640
- if to is not None and not to.endswith(".py"):
641
- to = to + ".py"
782
+ # Validate 'to' argument for node hooks
783
+ if type in (HookType.NODE_PRE_EXECUTE, HookType.NODE_POST_EXECUTE) and not to:
784
+ raise typer.BadParameter("The '--to' option (target node/tag) is required for node hooks.")
642
785
 
643
786
  with PipelineManager(
644
787
  base_dir=base_dir,
645
788
  storage_options=parsed_storage_options or {},
789
+ log_level=log_level,
646
790
  ) as manager:
647
- manager.add_hook(name, type=type, to=to, function_name=function_name)
791
+ try:
792
+ manager.add_hook(
793
+ name=name,
794
+ type=type,
795
+ to=to,
796
+ function_name=function_name,
797
+ )
798
+ logger.info(f"Hook '{function_name}' added to pipeline '{name}' (type: {type.value}).")
799
+ except Exception as e:
800
+ logger.error(f"Failed to add hook to pipeline '{name}': {e}")