FlowerPower 0.9.12.4__py3-none-any.whl → 1.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +17 -2
- flowerpower/cfg/__init__.py +201 -149
- flowerpower/cfg/base.py +122 -24
- flowerpower/cfg/pipeline/__init__.py +254 -0
- flowerpower/cfg/pipeline/adapter.py +66 -0
- flowerpower/cfg/pipeline/run.py +40 -11
- flowerpower/cfg/pipeline/schedule.py +69 -79
- flowerpower/cfg/project/__init__.py +149 -0
- flowerpower/cfg/project/adapter.py +57 -0
- flowerpower/cfg/project/job_queue.py +165 -0
- flowerpower/cli/__init__.py +92 -35
- flowerpower/cli/job_queue.py +878 -0
- flowerpower/cli/mqtt.py +49 -4
- flowerpower/cli/pipeline.py +576 -381
- flowerpower/cli/utils.py +55 -0
- flowerpower/flowerpower.py +12 -7
- flowerpower/fs/__init__.py +20 -2
- flowerpower/fs/base.py +350 -26
- flowerpower/fs/ext.py +797 -216
- flowerpower/fs/storage_options.py +1097 -55
- flowerpower/io/base.py +13 -18
- flowerpower/io/loader/__init__.py +28 -0
- flowerpower/io/loader/deltatable.py +7 -10
- flowerpower/io/metadata.py +1 -0
- flowerpower/io/saver/__init__.py +28 -0
- flowerpower/io/saver/deltatable.py +4 -3
- flowerpower/job_queue/__init__.py +252 -0
- flowerpower/job_queue/apscheduler/__init__.py +11 -0
- flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
- flowerpower/job_queue/apscheduler/manager.py +1063 -0
- flowerpower/job_queue/apscheduler/setup.py +524 -0
- flowerpower/job_queue/apscheduler/trigger.py +169 -0
- flowerpower/job_queue/apscheduler/utils.py +309 -0
- flowerpower/job_queue/base.py +382 -0
- flowerpower/job_queue/rq/__init__.py +10 -0
- flowerpower/job_queue/rq/_trigger.py +37 -0
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
- flowerpower/job_queue/rq/manager.py +1449 -0
- flowerpower/job_queue/rq/setup.py +150 -0
- flowerpower/job_queue/rq/utils.py +69 -0
- flowerpower/pipeline/__init__.py +5 -0
- flowerpower/pipeline/base.py +118 -0
- flowerpower/pipeline/io.py +407 -0
- flowerpower/pipeline/job_queue.py +505 -0
- flowerpower/pipeline/manager.py +1586 -0
- flowerpower/pipeline/registry.py +560 -0
- flowerpower/pipeline/runner.py +560 -0
- flowerpower/pipeline/visualizer.py +142 -0
- flowerpower/plugins/mqtt/__init__.py +12 -0
- flowerpower/plugins/mqtt/cfg.py +16 -0
- flowerpower/plugins/mqtt/manager.py +789 -0
- flowerpower/settings.py +110 -0
- flowerpower/utils/logging.py +21 -0
- flowerpower/utils/misc.py +57 -9
- flowerpower/utils/sql.py +122 -24
- flowerpower/utils/templates.py +18 -142
- flowerpower/web/app.py +0 -0
- flowerpower-1.0.0b1.dist-info/METADATA +324 -0
- flowerpower-1.0.0b1.dist-info/RECORD +94 -0
- {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +1 -1
- flowerpower/cfg/pipeline/tracker.py +0 -14
- flowerpower/cfg/project/open_telemetry.py +0 -8
- flowerpower/cfg/project/tracker.py +0 -11
- flowerpower/cfg/project/worker.py +0 -19
- flowerpower/cli/scheduler.py +0 -309
- flowerpower/event_handler.py +0 -23
- flowerpower/mqtt.py +0 -525
- flowerpower/pipeline.py +0 -2419
- flowerpower/scheduler.py +0 -680
- flowerpower/tui.py +0 -79
- flowerpower/utils/datastore.py +0 -186
- flowerpower/utils/eventbroker.py +0 -127
- flowerpower/utils/executor.py +0 -58
- flowerpower/utils/trigger.py +0 -140
- flowerpower-0.9.12.4.dist-info/METADATA +0 -575
- flowerpower-0.9.12.4.dist-info/RECORD +0 -70
- /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
- {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.9.12.4.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
flowerpower/cli/pipeline.py
CHANGED
@@ -1,605 +1,800 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# Import necessary libraries
|
3
2
|
import typer
|
4
3
|
from loguru import logger
|
4
|
+
from typing_extensions import Annotated
|
5
|
+
import datetime as dt
|
6
|
+
import duration_parser
|
7
|
+
from ..pipeline.manager import HookType, PipelineManager
|
8
|
+
from ..utils.logging import setup_logging
|
9
|
+
from .utils import parse_dict_or_list_param#, parse_param_dict
|
5
10
|
|
6
|
-
|
7
|
-
# from ..pipeline import (
|
8
|
-
# add as add_pipeline_,
|
9
|
-
# add_job as add_pipeline_job_,
|
10
|
-
# all_pipelines as all_pipelines_,
|
11
|
-
# delete as delete_pipeline_,
|
12
|
-
# get_summary as get_pipeline_summary_,
|
13
|
-
# new as new_pipeline_,
|
14
|
-
# run as run_pipeline_,
|
15
|
-
# run_job as run_pipeline_job_,
|
16
|
-
# schedule as schedule_pipeline_,
|
17
|
-
# save_dag as save_pipeline_dag_,
|
18
|
-
# show_dag as show_pipeline_dag_,
|
19
|
-
# show_summary as show_pipeline_summary_,
|
20
|
-
# # start_mqtt_listener as start_mqtt_listener_,
|
21
|
-
# )
|
22
|
-
from ..pipeline import Pipeline, PipelineManager
|
23
|
-
from .utils import parse_dict_or_list_param, parse_param_dict
|
24
|
-
|
25
|
-
# Optional imports
|
26
|
-
if importlib.util.find_spec("apscheduler"):
|
27
|
-
from ..scheduler import get_schedule_manager
|
28
|
-
from ..scheduler import start_worker as start_worker_
|
29
|
-
else:
|
30
|
-
get_schedule_manager = None
|
31
|
-
start_worker_ = None
|
32
|
-
|
11
|
+
setup_logging()
|
33
12
|
|
34
13
|
app = typer.Typer(help="Pipeline management commands")
|
35
14
|
|
36
15
|
|
37
16
|
@app.command()
|
38
17
|
def run(
|
39
|
-
name: str,
|
40
|
-
executor: str | None = None,
|
41
|
-
base_dir: str | None = None,
|
42
|
-
inputs: str | None = None,
|
43
|
-
final_vars: str | None = None,
|
44
|
-
config: str | None = None,
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
18
|
+
name: str = typer.Argument(..., help="Name of the pipeline to run"),
|
19
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the pipeline"),
|
20
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
21
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
22
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
23
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
24
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
25
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
26
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
27
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
28
|
+
max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
|
29
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
30
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
49
31
|
):
|
50
32
|
"""
|
51
|
-
Run
|
33
|
+
Run a pipeline immediately.
|
34
|
+
|
35
|
+
This command executes a pipeline with the specified configuration and inputs.
|
36
|
+
The pipeline will run synchronously, and the command will wait for completion.
|
52
37
|
|
53
38
|
Args:
|
54
39
|
name: Name of the pipeline to run
|
55
|
-
executor:
|
56
|
-
base_dir: Base directory
|
57
|
-
inputs: Input parameters
|
58
|
-
final_vars: Final variables
|
59
|
-
config:
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
40
|
+
executor: Type of executor to use
|
41
|
+
base_dir: Base directory containing pipelines and configurations
|
42
|
+
inputs: Input parameters for the pipeline
|
43
|
+
final_vars: Final variables to request from the pipeline
|
44
|
+
config: Configuration for the Hamilton executor
|
45
|
+
cache: Cache configuration for improved performance
|
46
|
+
storage_options: Options for storage backends
|
47
|
+
log_level: Set the logging level
|
48
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
49
|
+
max_retries: Maximum number of retry attempts on failure
|
50
|
+
retry_delay: Base delay between retries in seconds
|
51
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
64
52
|
|
65
53
|
Examples:
|
66
|
-
|
67
|
-
|
54
|
+
# Run a pipeline with default settings
|
55
|
+
$ pipeline run my_pipeline
|
68
56
|
|
69
|
-
|
70
|
-
|
57
|
+
# Run with custom inputs
|
58
|
+
$ pipeline run my_pipeline --inputs '{"data_path": "data/myfile.csv", "limit": 100}'
|
71
59
|
|
72
|
-
|
73
|
-
|
60
|
+
# Specify which final variables to calculate
|
61
|
+
$ pipeline run my_pipeline --final-vars '["output_table", "summary_metrics"]'
|
74
62
|
|
75
|
-
|
76
|
-
|
63
|
+
# Configure caching
|
64
|
+
$ pipeline run my_pipeline --cache '{"type": "memory", "ttl": 3600}'
|
77
65
|
|
78
|
-
|
79
|
-
|
66
|
+
# Use a different executor
|
67
|
+
$ pipeline run my_pipeline --executor distributed
|
68
|
+
|
69
|
+
# Enable adapters for monitoring/tracking
|
70
|
+
$ pipeline run my_pipeline --with-adapter '{"tracker": true, "opentelemetry": true}'
|
71
|
+
|
72
|
+
# Set a specific logging level
|
73
|
+
$ pipeline run my_pipeline --log-level debug
|
74
|
+
|
75
|
+
# Configure automatic retries on failure
|
76
|
+
$ pipeline run my_pipeline --max-retries 3 --retry-delay 2.0 --jitter-factor 0.2
|
80
77
|
"""
|
81
78
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
82
79
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
80
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
83
81
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
84
82
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
83
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
85
84
|
|
86
|
-
with
|
87
|
-
name=name,
|
85
|
+
with PipelineManager(
|
88
86
|
base_dir=base_dir,
|
89
|
-
storage_options=parsed_storage_options or {},
|
90
|
-
|
91
|
-
|
92
|
-
|
87
|
+
storage_options=parsed_storage_options or {},
|
88
|
+
log_level=log_level,
|
89
|
+
) as manager:
|
90
|
+
_ = manager.run(
|
91
|
+
name=name,
|
93
92
|
inputs=parsed_inputs,
|
94
93
|
final_vars=parsed_final_vars,
|
95
94
|
config=parsed_config,
|
96
|
-
|
97
|
-
|
98
|
-
|
95
|
+
cache=parsed_cache,
|
96
|
+
executor_cfg=executor,
|
97
|
+
with_adapter_cfg=parsed_with_adapter,
|
98
|
+
max_retries=max_retries,
|
99
|
+
retry_delay=retry_delay,
|
100
|
+
jitter_factor=jitter_factor,
|
99
101
|
)
|
102
|
+
logger.info(f"Pipeline '{name}' finished running.")
|
100
103
|
|
101
104
|
|
102
105
|
@app.command()
|
103
106
|
def run_job(
|
104
|
-
name: str,
|
105
|
-
executor: str | None = None,
|
106
|
-
base_dir: str | None = None,
|
107
|
-
inputs: str | None = None,
|
108
|
-
final_vars: str | None = None,
|
109
|
-
config: str | None = None,
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
107
|
+
name: str = typer.Argument(..., help="Name or ID of the pipeline job to run"),
|
108
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the job"),
|
109
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
110
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
111
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
112
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
113
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
114
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
115
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
116
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
117
|
+
max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
|
118
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
119
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
114
120
|
):
|
115
121
|
"""
|
116
|
-
Run
|
122
|
+
Run a specific pipeline job.
|
123
|
+
|
124
|
+
This command runs an existing job by its ID. The job should have been previously
|
125
|
+
added to the system via the add-job command or through scheduling.
|
117
126
|
|
118
127
|
Args:
|
119
|
-
name:
|
120
|
-
executor:
|
121
|
-
base_dir: Base directory
|
122
|
-
inputs: Input parameters
|
123
|
-
final_vars: Final variables
|
124
|
-
config:
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
128
|
+
name: Job ID to run
|
129
|
+
executor: Type of executor to use (maps to executor_cfg in manager)
|
130
|
+
base_dir: Base directory containing pipelines and configurations
|
131
|
+
inputs: Input parameters for the pipeline
|
132
|
+
final_vars: Final variables to request from the pipeline
|
133
|
+
config: Configuration for the Hamilton executor
|
134
|
+
cache: Cache configuration
|
135
|
+
storage_options: Options for storage backends
|
136
|
+
log_level: Set the logging level
|
137
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
138
|
+
max_retries: Maximum number of retry attempts on failure
|
139
|
+
retry_delay: Base delay between retries in seconds
|
140
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
129
141
|
|
130
142
|
Examples:
|
131
|
-
|
132
|
-
|
143
|
+
# Run a job with a specific ID
|
144
|
+
$ pipeline run-job job-123456
|
133
145
|
|
134
|
-
|
135
|
-
|
146
|
+
# Run a job with custom inputs
|
147
|
+
$ pipeline run-job job-123456 --inputs '{"data_path": "data/myfile.csv"}'
|
136
148
|
|
137
|
-
|
138
|
-
|
149
|
+
# Specify a different executor
|
150
|
+
$ pipeline run-job job-123456 --executor local
|
139
151
|
|
140
|
-
|
141
|
-
|
152
|
+
# Use caching for better performance
|
153
|
+
$ pipeline run-job job-123456 --cache '{"type": "memory"}'
|
142
154
|
|
143
|
-
|
144
|
-
|
155
|
+
# Configure adapters for monitoring
|
156
|
+
$ pipeline run-job job-123456 --with-adapter '{"tracker": true, "opentelemetry": false}'
|
157
|
+
|
158
|
+
# Set up automatic retries for resilience
|
159
|
+
$ pipeline run-job job-123456 --max-retries 3 --retry-delay 2.0
|
145
160
|
"""
|
146
161
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
147
162
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
163
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
148
164
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
149
165
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
166
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
150
167
|
|
151
|
-
with
|
152
|
-
name=name,
|
168
|
+
with PipelineManager(
|
153
169
|
base_dir=base_dir,
|
154
|
-
storage_options=parsed_storage_options or {},
|
155
|
-
|
156
|
-
|
157
|
-
|
170
|
+
storage_options=parsed_storage_options or {},
|
171
|
+
log_level=log_level,
|
172
|
+
) as manager:
|
173
|
+
_ = manager.run_job(
|
174
|
+
name=name,
|
158
175
|
inputs=parsed_inputs,
|
159
176
|
final_vars=parsed_final_vars,
|
160
177
|
config=parsed_config,
|
161
|
-
|
162
|
-
|
163
|
-
|
178
|
+
cache=parsed_cache,
|
179
|
+
executor_cfg=executor,
|
180
|
+
with_adapter_cfg=parsed_with_adapter,
|
181
|
+
max_retries=max_retries,
|
182
|
+
retry_delay=retry_delay,
|
183
|
+
jitter_factor=jitter_factor,
|
164
184
|
)
|
185
|
+
logger.info(f"Job '{name}' finished running.")
|
165
186
|
|
166
187
|
|
167
188
|
@app.command()
|
168
189
|
def add_job(
|
169
|
-
name: str,
|
170
|
-
executor: str | None = None,
|
171
|
-
base_dir: str | None = None,
|
172
|
-
inputs: str | None = None,
|
173
|
-
final_vars: str | None = None,
|
174
|
-
config: str | None = None,
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
190
|
+
name: str = typer.Argument(..., help="Name of the pipeline to add as a job"),
|
191
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the job"),
|
192
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
193
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
194
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
195
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
196
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
197
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
198
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
199
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
200
|
+
run_at: str | None = typer.Option(None, help="Run at a specific time (ISO format)"),
|
201
|
+
run_in: str | None = typer.Option(None, help="Run in a specific interval (e.g., '5m', '1h', '12m34s')"),
|
202
|
+
max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
|
203
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
204
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
179
205
|
):
|
180
206
|
"""
|
181
|
-
Add a job to the
|
207
|
+
Add a pipeline job to the queue.
|
208
|
+
|
209
|
+
This command adds a job to the queue for later execution. The job is based on
|
210
|
+
an existing pipeline with customized inputs and configuration.
|
182
211
|
|
183
212
|
Args:
|
184
|
-
name:
|
185
|
-
executor:
|
186
|
-
base_dir: Base directory
|
187
|
-
inputs: Input parameters
|
188
|
-
final_vars: Final variables
|
189
|
-
config:
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
213
|
+
name: Pipeline name to add as a job
|
214
|
+
executor: Type of executor to use
|
215
|
+
base_dir: Base directory containing pipelines and configurations
|
216
|
+
inputs: Input parameters for the pipeline
|
217
|
+
final_vars: Final variables to request from the pipeline
|
218
|
+
config: Configuration for the Hamilton executor
|
219
|
+
cache: Cache configuration
|
220
|
+
storage_options: Options for storage backends
|
221
|
+
log_level: Set the logging level
|
222
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
223
|
+
run_at: Run the job at a specific time (ISO format)
|
224
|
+
run_in: Run the job in a specific interval (e.g., '5m', '1h')
|
225
|
+
max_retries: Maximum number of retry attempts on failure
|
226
|
+
retry_delay: Base delay between retries in seconds
|
227
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
194
228
|
|
195
229
|
Examples:
|
196
|
-
|
197
|
-
|
230
|
+
# Add a basic job
|
231
|
+
$ pipeline add-job my_pipeline
|
198
232
|
|
199
|
-
|
200
|
-
|
233
|
+
# Add a job with custom inputs
|
234
|
+
$ pipeline add-job my_pipeline --inputs '{"data_path": "data/myfile.csv"}'
|
201
235
|
|
202
|
-
|
203
|
-
|
236
|
+
# Specify final variables to calculate
|
237
|
+
$ pipeline add-job my_pipeline --final-vars '["output_table", "metrics"]'
|
204
238
|
|
205
|
-
|
206
|
-
|
239
|
+
# Configure caching
|
240
|
+
$ pipeline add-job my_pipeline --cache '{"type": "memory", "ttl": 3600}'
|
207
241
|
|
208
|
-
|
209
|
-
|
242
|
+
# Use a specific log level
|
243
|
+
$ pipeline add-job my_pipeline --log-level debug
|
244
|
+
|
245
|
+
# Configure automatic retries for resilience
|
246
|
+
$ pipeline add-job my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
|
210
247
|
"""
|
211
248
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
212
249
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
250
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
213
251
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
214
252
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
253
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
254
|
+
run_at = dt.datetime.fromisoformat(run_at) if run_at else None
|
255
|
+
run_in = duration_parser.parse(run_in) if run_in else None
|
215
256
|
|
216
|
-
|
217
|
-
|
218
|
-
with Pipeline(
|
219
|
-
name=name,
|
257
|
+
with PipelineManager(
|
220
258
|
base_dir=base_dir,
|
221
259
|
storage_options=parsed_storage_options or {},
|
222
|
-
|
223
|
-
|
224
|
-
|
260
|
+
log_level=log_level,
|
261
|
+
) as manager:
|
262
|
+
job_id = manager.add_job(
|
263
|
+
name=name,
|
225
264
|
inputs=parsed_inputs,
|
226
265
|
final_vars=parsed_final_vars,
|
227
266
|
config=parsed_config,
|
228
|
-
|
229
|
-
|
230
|
-
|
267
|
+
cache=parsed_cache,
|
268
|
+
executor_cfg=executor,
|
269
|
+
with_adapter_cfg=parsed_with_adapter,
|
270
|
+
run_at=run_at,
|
271
|
+
run_in=run_in,
|
272
|
+
max_retries=max_retries,
|
273
|
+
retry_delay=retry_delay,
|
274
|
+
jitter_factor=jitter_factor,
|
231
275
|
)
|
276
|
+
logger.info(f"Job {job_id} added for pipeline '{name}'.")
|
232
277
|
|
233
278
|
|
234
279
|
@app.command()
|
235
280
|
def schedule(
|
236
|
-
name: str,
|
237
|
-
executor: str | None = None,
|
238
|
-
base_dir: str | None = None,
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
interval_params: str | None = None,
|
255
|
-
calendarinterval_params: str | None = None,
|
256
|
-
date_params: str | None = None,
|
257
|
-
storage_options: str | None = None,
|
258
|
-
overwrite: bool = False,
|
281
|
+
name: str = typer.Argument(..., help="Name of the pipeline to schedule"),
|
282
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the job"),
|
283
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
284
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
285
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
286
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
287
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
288
|
+
cron: str | None = typer.Option(None, help="Cron expression for scheduling"),
|
289
|
+
interval: str | None = typer.Option(None, help="Interval for scheduling (e.g., '5m', '1h')"),
|
290
|
+
date: str | None = typer.Option(None, help="Specific date and time for scheduling (ISO format)"),
|
291
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
292
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
293
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
294
|
+
overwrite: bool = typer.Option(False, help="Overwrite existing schedule if it exists"),
|
295
|
+
schedule_id: str | None = typer.Option(None, help="Custom ID for the schedule (autogenerated if not provided)"),
|
296
|
+
max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
|
297
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
298
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
259
299
|
):
|
260
300
|
"""
|
261
|
-
Schedule a pipeline
|
301
|
+
Schedule a pipeline to run at specified times.
|
302
|
+
|
303
|
+
This command schedules a pipeline to run automatically based on various
|
304
|
+
scheduling triggers like cron expressions, time intervals, or specific dates.
|
262
305
|
|
263
306
|
Args:
|
264
|
-
name:
|
265
|
-
executor:
|
266
|
-
base_dir: Base directory
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
interval_params: Interval parameters as JSON or key=value pairs
|
283
|
-
calendarinterval_params: Calendar interval parameters as JSON or key=value pairs
|
284
|
-
date_params: Date parameters as JSON or key=value pairs
|
285
|
-
storage_options: Storage options as JSON, dict string, or key=value pairs
|
286
|
-
overwrite: Overwrite existing schedule
|
307
|
+
name: Pipeline name to schedule
|
308
|
+
executor: Type of executor to use
|
309
|
+
base_dir: Base directory containing pipelines and configurations
|
310
|
+
inputs: Input parameters for the pipeline
|
311
|
+
final_vars: Final variables to request from the pipeline
|
312
|
+
config: Configuration for the Hamilton executor
|
313
|
+
cache: Cache configuration
|
314
|
+
cron: Cron expression for scheduling (e.g., "0 * * * *")
|
315
|
+
interval: Interval for scheduling (e.g., "5m", "1h")
|
316
|
+
date: Specific date and time for scheduling (ISO format)
|
317
|
+
storage_options: Options for storage backends
|
318
|
+
log_level: Set the logging level
|
319
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
320
|
+
overwrite: Overwrite existing schedule with same ID
|
321
|
+
schedule_id: Custom identifier for the schedule
|
322
|
+
max_retries: Maximum number of retry attempts on failure
|
323
|
+
retry_delay: Base delay between retries in seconds
|
324
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
287
325
|
|
288
326
|
Examples:
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
# Dict string inputs
|
293
|
-
pipeline schedule my_pipeline --inputs "{'key': 'value'}"
|
294
|
-
|
295
|
-
# Key-value pair inputs
|
296
|
-
pipeline schedule my_pipeline --inputs 'key1=value1,key2=value2'
|
297
|
-
|
298
|
-
# List final vars
|
299
|
-
pipeline schedule my_pipeline --final-vars '["var1", "var2"]'
|
300
|
-
|
301
|
-
# Storage options
|
302
|
-
pipeline schedule my_pipeline --storage-options 'endpoint=http://localhost,use_ssl=true'
|
327
|
+
# Schedule with cron expression (every hour)
|
328
|
+
$ pipeline schedule my_pipeline --trigger-type cron --crontab "0 * * * *"
|
303
329
|
|
304
|
-
|
305
|
-
|
330
|
+
# Schedule to run every 15 minutes
|
331
|
+
$ pipeline schedule my_pipeline --trigger-type interval --interval_params minutes=15
|
306
332
|
|
307
|
-
|
308
|
-
|
333
|
+
# Schedule to run at a specific date and time
|
334
|
+
$ pipeline schedule my_pipeline --trigger-type date --date_params run_date="2025-12-31 23:59:59"
|
309
335
|
|
310
|
-
|
311
|
-
|
336
|
+
# Schedule with custom inputs and cache settings
|
337
|
+
$ pipeline schedule my_pipeline --inputs '{"source": "database"}' --cache '{"type": "redis"}'
|
312
338
|
|
313
|
-
|
314
|
-
|
339
|
+
# Create a schedule in paused state
|
340
|
+
$ pipeline schedule my_pipeline --crontab "0 9 * * 1-5" --paused
|
315
341
|
|
342
|
+
# Set a custom schedule ID
|
343
|
+
$ pipeline schedule my_pipeline --crontab "0 12 * * *" --schedule_id "daily-noon-run"
|
344
|
+
|
345
|
+
# Configure automatic retries for resilience
|
346
|
+
$ pipeline schedule my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
|
316
347
|
"""
|
317
|
-
if get_schedule_manager is None:
|
318
|
-
raise ValueError("APScheduler not installed. Please install it first.")
|
319
|
-
|
320
|
-
# Parse inputs
|
321
348
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
322
349
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
350
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
323
351
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
324
352
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
353
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
354
|
+
interval = duration_parser.parse(interval) if interval else None
|
355
|
+
cron = cron if cron else None
|
356
|
+
date = dt.datetime.fromisoformat(date) if date else None
|
325
357
|
|
326
|
-
|
327
|
-
cron_params_dict = parse_param_dict(cron_params)
|
328
|
-
interval_params_dict = parse_param_dict(interval_params)
|
329
|
-
calendarinterval_params_dict = parse_param_dict(calendarinterval_params)
|
330
|
-
date_params_dict = parse_param_dict(date_params)
|
331
|
-
|
332
|
-
# Combine all parameter dictionaries
|
333
|
-
kwargs = {
|
334
|
-
**cron_params_dict,
|
335
|
-
**interval_params_dict,
|
336
|
-
**calendarinterval_params_dict,
|
337
|
-
**date_params_dict,
|
338
|
-
}
|
339
|
-
|
340
|
-
# Add crontab if provided
|
341
|
-
if crontab is not None:
|
342
|
-
kwargs["crontab"] = crontab
|
343
|
-
|
344
|
-
# Convert numeric parameters
|
345
|
-
for key in ["weeks", "days", "hours", "minutes", "seconds"]:
|
346
|
-
if key in kwargs:
|
347
|
-
try:
|
348
|
-
kwargs[key] = float(kwargs[key])
|
349
|
-
except ValueError:
|
350
|
-
logger.warning(f"Could not convert {key} to float: {kwargs[key]}")
|
351
|
-
|
352
|
-
with Pipeline(
|
353
|
-
name=name,
|
358
|
+
with PipelineManager(
|
354
359
|
base_dir=base_dir,
|
355
360
|
storage_options=parsed_storage_options or {},
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
361
|
+
log_level=log_level,
|
362
|
+
) as manager:
|
363
|
+
# Combine common schedule kwargs
|
364
|
+
|
365
|
+
id_ = manager.schedule(
|
366
|
+
name=name,
|
360
367
|
inputs=parsed_inputs,
|
361
368
|
final_vars=parsed_final_vars,
|
362
369
|
config=parsed_config,
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
max_jitter=max_jitter,
|
370
|
-
max_running_jobs=max_running_jobs,
|
371
|
-
conflict_policy=conflict_policy,
|
370
|
+
cache=parsed_cache,
|
371
|
+
executor_cfg=executor,
|
372
|
+
with_adapter_cfg=parsed_with_adapter,
|
373
|
+
cron=cron,
|
374
|
+
interval=interval,
|
375
|
+
date=date,
|
372
376
|
overwrite=overwrite,
|
373
|
-
|
377
|
+
schedule_id=schedule_id,
|
378
|
+
max_retries=max_retries,
|
379
|
+
retry_delay=retry_delay,
|
380
|
+
jitter_factor=jitter_factor,
|
374
381
|
)
|
375
382
|
|
376
|
-
logger.info(f"
|
383
|
+
logger.info(f"Pipeline '{name}' scheduled with ID {id_}.")
|
377
384
|
|
378
385
|
|
379
386
|
@app.command()
|
380
387
|
def schedule_all(
|
381
|
-
executor: str | None = None,
|
382
|
-
base_dir: str | None = None,
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
with_tracker: bool = False,
|
387
|
-
with_opentelemetry: bool = False,
|
388
|
-
with_progressbar: bool = False,
|
389
|
-
paused: bool = False,
|
390
|
-
coalesce: str = "latest",
|
391
|
-
misfire_grace_time: float | None = None,
|
392
|
-
max_jitter: float | None = None,
|
393
|
-
max_running_jobs: int | None = None,
|
394
|
-
conflict_policy: str = "do_nothing",
|
395
|
-
storage_options: str | None = None,
|
396
|
-
overwrite: bool = False,
|
388
|
+
executor: str | None = typer.Option(None, help="Override executor specified in pipeline configs"),
|
389
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing pipelines and configurations"),
|
390
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
391
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
392
|
+
overwrite: bool = typer.Option(False, help="Overwrite existing schedules if they exist"),
|
397
393
|
):
|
398
394
|
"""
|
399
|
-
Schedule all pipelines
|
395
|
+
Schedule all pipelines based on their individual configurations.
|
396
|
+
|
397
|
+
This command reads the configuration files for all pipelines in the project
|
398
|
+
and schedules them based on their individual scheduling settings. This is useful
|
399
|
+
for setting up all scheduled pipelines at once after deployment or system restart.
|
400
400
|
|
401
401
|
Args:
|
402
|
-
executor:
|
403
|
-
base_dir: Base directory
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
with_tracker: Enable tracking with hamilton ui
|
408
|
-
with_opentelemetry: Enable OpenTelemetry tracing
|
409
|
-
with_progressbar: Enable progress bar
|
410
|
-
paused: Start the job in paused state
|
411
|
-
coalesce: Coalesce policy
|
412
|
-
misfire_grace_time: Misfire grace time
|
413
|
-
max_jitter: Maximum jitter
|
414
|
-
max_running_jobs: Maximum running jobs
|
415
|
-
conflict_policy: Conflict policy
|
416
|
-
storage_options: Storage options as JSON, dict string, or key=value pairs
|
417
|
-
overwrite: Overwrite existing schedule
|
402
|
+
executor: Override executor specified in pipeline configs
|
403
|
+
base_dir: Base directory containing pipelines and configurations
|
404
|
+
storage_options: Options for storage backends
|
405
|
+
log_level: Set the logging level
|
406
|
+
overwrite: Whether to overwrite existing schedules
|
418
407
|
|
419
408
|
Examples:
|
420
|
-
|
421
|
-
|
422
|
-
if get_schedule_manager is None:
|
423
|
-
raise ValueError("APScheduler not installed. Please install it first.")
|
409
|
+
# Schedule all pipelines using their configurations
|
410
|
+
$ pipeline schedule-all
|
424
411
|
|
412
|
+
# Force overwrite of existing schedules
|
413
|
+
$ pipeline schedule-all --overwrite
|
414
|
+
|
415
|
+
# Override executor for all pipelines
|
416
|
+
$ pipeline schedule-all --executor distributed
|
417
|
+
|
418
|
+
# Set custom base directory
|
419
|
+
$ pipeline schedule-all --base-dir /path/to/project
|
420
|
+
"""
|
425
421
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
426
422
|
|
427
423
|
with PipelineManager(
|
428
424
|
base_dir=base_dir,
|
429
425
|
storage_options=parsed_storage_options or {},
|
426
|
+
log_level=log_level,
|
430
427
|
) as manager:
|
431
428
|
manager.schedule_all(
|
432
|
-
executor=executor,
|
433
|
-
inputs=inputs,
|
434
|
-
final_vars=final_vars,
|
435
|
-
config=config,
|
436
|
-
with_tracker=with_tracker,
|
437
|
-
with_opentelemetry=with_opentelemetry,
|
438
|
-
with_progressbar=with_progressbar,
|
439
|
-
paused=paused,
|
440
|
-
coalesce=coalesce,
|
441
|
-
misfire_grace_time=misfire_grace_time,
|
442
|
-
max_jitter=max_jitter,
|
443
|
-
max_running_jobs=max_running_jobs,
|
444
|
-
conflict_policy=conflict_policy,
|
445
429
|
overwrite=overwrite,
|
430
|
+
executor_cfg=executor
|
446
431
|
)
|
432
|
+
logger.info("Scheduled all pipelines based on their configurations.")
|
447
433
|
|
448
434
|
|
449
435
|
@app.command()
|
450
436
|
def new(
|
451
|
-
name: str,
|
452
|
-
base_dir: str | None = None,
|
453
|
-
storage_options: str | None = None,
|
454
|
-
|
437
|
+
name: str = typer.Argument(..., help="Name of the pipeline to create"),
|
438
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
439
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
440
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
441
|
+
overwrite: bool = typer.Option(False, help="Overwrite existing pipeline if it exists"),
|
455
442
|
):
|
456
443
|
"""
|
457
|
-
Create a new pipeline.
|
444
|
+
Create a new pipeline structure.
|
445
|
+
|
446
|
+
This command creates a new pipeline with the necessary directory structure,
|
447
|
+
configuration file, and skeleton module file. It prepares all the required
|
448
|
+
components for you to start implementing your pipeline logic.
|
458
449
|
|
459
450
|
Args:
|
460
|
-
name: Name
|
461
|
-
base_dir: Base directory
|
462
|
-
|
463
|
-
|
451
|
+
name: Name for the new pipeline
|
452
|
+
base_dir: Base directory to create the pipeline in
|
453
|
+
storage_options: Options for storage backends
|
454
|
+
log_level: Set the logging level
|
455
|
+
overwrite: Whether to overwrite existing pipeline with the same name
|
464
456
|
|
465
457
|
Examples:
|
466
|
-
|
458
|
+
# Create a new pipeline with default settings
|
459
|
+
$ pipeline new my_new_pipeline
|
460
|
+
|
461
|
+
# Create a pipeline, overwriting if it exists
|
462
|
+
$ pipeline new my_new_pipeline --overwrite
|
463
|
+
|
464
|
+
# Create a pipeline in a specific directory
|
465
|
+
$ pipeline new my_new_pipeline --base-dir /path/to/project
|
467
466
|
"""
|
468
467
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
469
|
-
|
470
468
|
with PipelineManager(
|
471
469
|
base_dir=base_dir,
|
472
470
|
storage_options=parsed_storage_options or {},
|
471
|
+
log_level=log_level,
|
473
472
|
) as manager:
|
474
473
|
manager.new(name=name, overwrite=overwrite)
|
474
|
+
logger.info(f"New pipeline structure created for '{name}'.")
|
475
475
|
|
476
476
|
|
477
477
|
@app.command()
|
478
478
|
def delete(
|
479
|
-
name: str,
|
480
|
-
base_dir: str | None = None,
|
481
|
-
cfg: bool = False,
|
482
|
-
module: bool = False,
|
483
|
-
storage_options: str | None = None,
|
479
|
+
name: str = typer.Argument(..., help="Name of the pipeline to delete"),
|
480
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
481
|
+
cfg: bool = typer.Option(False, "--cfg", "-c", help="Delete only the configuration file"),
|
482
|
+
module: bool = typer.Option(False, "--module", "-m", help="Delete only the pipeline module"),
|
483
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
484
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
484
485
|
):
|
485
486
|
"""
|
486
|
-
Delete
|
487
|
+
Delete a pipeline's configuration and/or module files.
|
488
|
+
|
489
|
+
This command removes a pipeline's configuration file and/or module file from the project.
|
490
|
+
If neither --cfg nor --module is specified, both will be deleted.
|
487
491
|
|
488
492
|
Args:
|
489
493
|
name: Name of the pipeline to delete
|
490
|
-
base_dir: Base directory
|
491
|
-
cfg:
|
492
|
-
module:
|
493
|
-
storage_options:
|
494
|
+
base_dir: Base directory containing the pipeline
|
495
|
+
cfg: Delete only the configuration file
|
496
|
+
module: Delete only the pipeline module
|
497
|
+
storage_options: Options for storage backends
|
498
|
+
log_level: Set the logging level
|
494
499
|
|
495
500
|
Examples:
|
496
|
-
|
501
|
+
# Delete a pipeline (both config and module)
|
502
|
+
$ pipeline delete my_pipeline
|
503
|
+
|
504
|
+
# Delete only the configuration file
|
505
|
+
$ pipeline delete my_pipeline --cfg
|
506
|
+
|
507
|
+
# Delete only the module file
|
508
|
+
$ pipeline delete my_pipeline --module
|
497
509
|
"""
|
498
510
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
499
511
|
|
500
|
-
|
501
|
-
|
512
|
+
# If neither flag is set, default to deleting both
|
513
|
+
delete_cfg = cfg or not (cfg or module)
|
514
|
+
delete_module = module or not (cfg or module)
|
515
|
+
|
516
|
+
with PipelineManager(
|
502
517
|
base_dir=base_dir,
|
503
518
|
storage_options=parsed_storage_options or {},
|
504
|
-
|
505
|
-
|
519
|
+
log_level=log_level,
|
520
|
+
) as manager:
|
521
|
+
manager.delete(name=name, cfg=delete_cfg, module=delete_module)
|
522
|
+
|
523
|
+
deleted_parts = []
|
524
|
+
if delete_cfg:
|
525
|
+
deleted_parts.append("config")
|
526
|
+
if delete_module:
|
527
|
+
deleted_parts.append("module")
|
528
|
+
logger.info(f"Pipeline '{name}' deleted ({', '.join(deleted_parts)})." if deleted_parts else f"Pipeline '{name}' - nothing specified to delete.")
|
506
529
|
|
507
530
|
|
508
531
|
@app.command()
|
509
532
|
def show_dag(
|
510
|
-
name: str
|
533
|
+
name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
|
534
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
535
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
536
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
537
|
+
format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf). If 'raw', returns object."),
|
511
538
|
):
|
512
539
|
"""
|
513
|
-
Show the DAG of
|
540
|
+
Show the DAG (Directed Acyclic Graph) of a pipeline.
|
541
|
+
|
542
|
+
This command generates and displays a visual representation of the pipeline's
|
543
|
+
execution graph, showing how nodes are connected and dependencies between them.
|
514
544
|
|
515
545
|
Args:
|
516
|
-
name: Name of the pipeline to
|
517
|
-
base_dir: Base directory
|
518
|
-
storage_options:
|
546
|
+
name: Name of the pipeline to visualize
|
547
|
+
base_dir: Base directory containing the pipeline
|
548
|
+
storage_options: Options for storage backends
|
549
|
+
log_level: Set the logging level
|
550
|
+
format: Output format for the visualization
|
519
551
|
|
520
552
|
Examples:
|
521
|
-
|
553
|
+
# Show pipeline DAG in PNG format (default)
|
554
|
+
$ pipeline show-dag my_pipeline
|
555
|
+
|
556
|
+
# Generate SVG format visualization
|
557
|
+
$ pipeline show-dag my_pipeline --format svg
|
558
|
+
|
559
|
+
# Get raw graphviz object
|
560
|
+
$ pipeline show-dag my_pipeline --format raw
|
522
561
|
"""
|
523
562
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
563
|
+
is_raw = format.lower() == "raw"
|
524
564
|
|
525
|
-
with
|
526
|
-
name=name,
|
565
|
+
with PipelineManager(
|
527
566
|
base_dir=base_dir,
|
528
567
|
storage_options=parsed_storage_options or {},
|
529
|
-
|
530
|
-
|
568
|
+
log_level=log_level,
|
569
|
+
) as manager:
|
570
|
+
# Manager's show_dag likely handles rendering or returning raw object
|
571
|
+
try:
|
572
|
+
graph_or_none = manager.show_dag(name=name, format=format if not is_raw else "png", raw=is_raw)
|
573
|
+
if is_raw and graph_or_none:
|
574
|
+
print("Graphviz object returned (not rendered):")
|
575
|
+
# print(graph_or_none) # Or handle as needed
|
576
|
+
elif not is_raw:
|
577
|
+
logger.info(f"DAG for pipeline '{name}' displayed/saved (format: {format}).")
|
578
|
+
except ImportError:
|
579
|
+
logger.error("Graphviz is not installed. Cannot show/save DAG. Install with: pip install graphviz")
|
580
|
+
except Exception as e:
|
581
|
+
logger.error(f"Failed to generate DAG for pipeline '{name}': {e}")
|
531
582
|
|
532
583
|
|
533
584
|
@app.command()
|
534
585
|
def save_dag(
|
535
|
-
name: str
|
586
|
+
name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
|
587
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
588
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
589
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
590
|
+
format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf)"),
|
591
|
+
output_path: str | None = typer.Option(None, help="Custom path to save the file (default: <name>.<format>)"),
|
536
592
|
):
|
537
593
|
"""
|
538
|
-
Save the DAG of
|
594
|
+
Save the DAG (Directed Acyclic Graph) of a pipeline to a file.
|
595
|
+
|
596
|
+
This command generates a visual representation of the pipeline's execution graph
|
597
|
+
and saves it to a file in the specified format.
|
539
598
|
|
540
599
|
Args:
|
541
|
-
name: Name of the pipeline to
|
542
|
-
base_dir: Base directory
|
543
|
-
storage_options:
|
600
|
+
name: Name of the pipeline to visualize
|
601
|
+
base_dir: Base directory containing the pipeline
|
602
|
+
storage_options: Options for storage backends
|
603
|
+
log_level: Set the logging level
|
604
|
+
format: Output format for the visualization
|
605
|
+
output_path: Custom file path to save the output (defaults to pipeline name)
|
544
606
|
|
545
607
|
Examples:
|
546
|
-
|
608
|
+
# Save pipeline DAG in PNG format (default)
|
609
|
+
$ pipeline save-dag my_pipeline
|
610
|
+
|
611
|
+
# Save in SVG format
|
612
|
+
$ pipeline save-dag my_pipeline --format svg
|
613
|
+
|
614
|
+
# Save to a custom location
|
615
|
+
$ pipeline save-dag my_pipeline --output-path ./visualizations/my_graph.png
|
547
616
|
"""
|
548
617
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
549
|
-
|
550
|
-
with Pipeline(
|
551
|
-
name=name,
|
618
|
+
with PipelineManager(
|
552
619
|
base_dir=base_dir,
|
553
620
|
storage_options=parsed_storage_options or {},
|
554
|
-
|
555
|
-
|
621
|
+
log_level=log_level,
|
622
|
+
) as manager:
|
623
|
+
try:
|
624
|
+
file_path = manager.save_dag(name=name, format=format, output_path=output_path)
|
625
|
+
logger.info(f"DAG for pipeline '{name}' saved to {file_path}.")
|
626
|
+
except ImportError:
|
627
|
+
logger.error("Graphviz is not installed. Cannot save DAG. Install with: pip install graphviz")
|
628
|
+
except Exception as e:
|
629
|
+
logger.error(f"Failed to save DAG for pipeline '{name}': {e}")
|
556
630
|
|
557
631
|
|
558
632
|
@app.command()
|
559
|
-
def show_pipelines(
|
633
|
+
def show_pipelines(
|
634
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
|
635
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
636
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
637
|
+
format: str = typer.Option("table", help="Output format (table, json, yaml)"),
|
638
|
+
):
|
560
639
|
"""
|
561
|
-
List all available pipelines.
|
640
|
+
List all available pipelines in the project.
|
641
|
+
|
642
|
+
This command displays a list of all pipelines defined in the project,
|
643
|
+
providing an overview of what pipelines are available to run or schedule.
|
562
644
|
|
563
645
|
Args:
|
564
|
-
base_dir: Base directory
|
565
|
-
storage_options:
|
646
|
+
base_dir: Base directory containing pipelines
|
647
|
+
storage_options: Options for storage backends
|
648
|
+
log_level: Set the logging level
|
649
|
+
format: Output format for the list (table, json, yaml)
|
566
650
|
|
567
651
|
Examples:
|
568
|
-
|
652
|
+
# List all pipelines in table format (default)
|
653
|
+
$ pipeline show-pipelines
|
654
|
+
|
655
|
+
# Output in JSON format
|
656
|
+
$ pipeline show-pipelines --format json
|
657
|
+
|
658
|
+
# List pipelines from a specific directory
|
659
|
+
$ pipeline show-pipelines --base-dir /path/to/project
|
569
660
|
"""
|
570
661
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
571
662
|
with PipelineManager(
|
572
663
|
base_dir=base_dir,
|
573
664
|
storage_options=parsed_storage_options or {},
|
665
|
+
log_level=log_level,
|
574
666
|
) as manager:
|
575
|
-
manager.show_pipelines()
|
667
|
+
manager.show_pipelines(format=format)
|
576
668
|
|
577
669
|
|
578
670
|
@app.command()
|
579
671
|
def show_summary(
|
580
|
-
name: str | None = None,
|
581
|
-
cfg: bool = True,
|
582
|
-
|
583
|
-
|
584
|
-
|
672
|
+
name: str | None = typer.Option(None, help="Name of specific pipeline to show (all pipelines if not specified)"),
|
673
|
+
cfg: bool = typer.Option(True, help="Include configuration details"),
|
674
|
+
code: bool = typer.Option(True, help="Include code/module details"),
|
675
|
+
project: bool = typer.Option(True, help="Include project context"),
|
676
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
|
677
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
678
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
679
|
+
to_html: bool = typer.Option(False, help="Output summary as HTML"),
|
680
|
+
to_svg: bool = typer.Option(False, help="Output summary as SVG (if applicable)"),
|
681
|
+
output_file: str | None = typer.Option(None, help="Save output to specified file instead of printing"),
|
585
682
|
):
|
586
683
|
"""
|
587
|
-
Show
|
684
|
+
Show summary information for one or all pipelines.
|
685
|
+
|
686
|
+
This command displays detailed information about pipelines including their
|
687
|
+
configuration, code structure, and project context. You can view information
|
688
|
+
for a specific pipeline or get an overview of all pipelines.
|
588
689
|
|
589
690
|
Args:
|
590
|
-
name: Name of
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
691
|
+
name: Name of specific pipeline to summarize (all if not specified)
|
692
|
+
cfg: Include configuration details
|
693
|
+
code: Include code/module details
|
694
|
+
project: Include project context information
|
695
|
+
base_dir: Base directory containing pipelines
|
696
|
+
storage_options: Options for storage backends
|
697
|
+
log_level: Set the logging level
|
698
|
+
to_html: Generate HTML output instead of text
|
699
|
+
to_svg: Generate SVG output (where applicable)
|
700
|
+
output_file: File path to save the output instead of printing to console
|
595
701
|
|
596
702
|
Examples:
|
597
|
-
|
703
|
+
# Show summary for all pipelines
|
704
|
+
$ pipeline show-summary
|
705
|
+
|
706
|
+
# Show summary for a specific pipeline
|
707
|
+
$ pipeline show-summary --name my_pipeline
|
708
|
+
|
709
|
+
# Show only configuration information
|
710
|
+
$ pipeline show-summary --name my_pipeline --cfg --no-code --no-project
|
711
|
+
|
712
|
+
# Generate HTML report
|
713
|
+
$ pipeline show-summary --to-html --output-file pipeline_report.html
|
598
714
|
"""
|
599
715
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
716
|
+
with PipelineManager(
|
717
|
+
base_dir=base_dir,
|
718
|
+
storage_options=parsed_storage_options or {},
|
719
|
+
log_level=log_level,
|
720
|
+
) as manager:
|
721
|
+
# Assumes manager.show_summary handles printing/returning formatted output
|
722
|
+
summary_output = manager.show_summary(
|
723
|
+
name=name,
|
724
|
+
cfg=cfg,
|
725
|
+
code=code,
|
726
|
+
project=project,
|
727
|
+
to_html=to_html,
|
728
|
+
to_svg=to_svg,
|
729
|
+
)
|
730
|
+
|
731
|
+
if summary_output:
|
732
|
+
if output_file:
|
733
|
+
with open(output_file, 'w') as f:
|
734
|
+
f.write(summary_output)
|
735
|
+
logger.info(f"Summary saved to {output_file}")
|
736
|
+
else:
|
737
|
+
print(summary_output)
|
738
|
+
# Otherwise, assume manager printed the summary
|
739
|
+
|
740
|
+
|
741
|
+
@app.command()
|
742
|
+
def add_hook(
|
743
|
+
name: str = typer.Argument(..., help="Name of the pipeline to add the hook to"),
|
744
|
+
function_name: str = typer.Option(..., "--function", "-f", help="Name of the hook function defined in the pipeline module"),
|
745
|
+
type: Annotated[HookType, typer.Option(help="Type of hook to add")] = HookType.MQTT_BUILD_CONFIG,
|
746
|
+
to: str | None = typer.Option(None, help="Target node name or tag (required for node hooks)"),
|
747
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
748
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
749
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
750
|
+
):
|
751
|
+
"""
|
752
|
+
Add a hook to a pipeline configuration.
|
753
|
+
|
754
|
+
This command adds a hook function to a pipeline's configuration. Hooks are functions
|
755
|
+
that are called at specific points during pipeline execution to perform additional
|
756
|
+
tasks like logging, monitoring, or data validation.
|
757
|
+
|
758
|
+
Args:
|
759
|
+
name: Name of the pipeline to add the hook to
|
760
|
+
function_name: Name of the hook function (must be defined in the pipeline module)
|
761
|
+
type: Type of hook (determines when the hook is called during execution)
|
762
|
+
to: Target node or tag (required for node-specific hooks)
|
763
|
+
base_dir: Base directory containing the pipeline
|
764
|
+
storage_options: Options for storage backends
|
765
|
+
log_level: Set the logging level
|
766
|
+
|
767
|
+
Examples:
|
768
|
+
# Add a post-run hook
|
769
|
+
$ pipeline add-hook my_pipeline --function log_results
|
770
|
+
|
771
|
+
# Add a pre-run hook
|
772
|
+
$ pipeline add-hook my_pipeline --function validate_inputs --type PRE_RUN
|
773
|
+
|
774
|
+
# Add a node-specific hook (executed before a specific node runs)
|
775
|
+
$ pipeline add-hook my_pipeline --function validate_data --type NODE_PRE_EXECUTE --to data_processor
|
776
|
+
|
777
|
+
# Add a hook for all nodes with a specific tag
|
778
|
+
$ pipeline add-hook my_pipeline --function log_metrics --type NODE_POST_EXECUTE --to @metrics
|
779
|
+
"""
|
780
|
+
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
781
|
+
|
782
|
+
# Validate 'to' argument for node hooks
|
783
|
+
if type in (HookType.NODE_PRE_EXECUTE, HookType.NODE_POST_EXECUTE) and not to:
|
784
|
+
raise typer.BadParameter("The '--to' option (target node/tag) is required for node hooks.")
|
600
785
|
|
601
786
|
with PipelineManager(
|
602
787
|
base_dir=base_dir,
|
603
788
|
storage_options=parsed_storage_options or {},
|
789
|
+
log_level=log_level,
|
604
790
|
) as manager:
|
605
|
-
|
791
|
+
try:
|
792
|
+
manager.add_hook(
|
793
|
+
name=name,
|
794
|
+
type=type,
|
795
|
+
to=to,
|
796
|
+
function_name=function_name,
|
797
|
+
)
|
798
|
+
logger.info(f"Hook '{function_name}' added to pipeline '{name}' (type: {type.value}).")
|
799
|
+
except Exception as e:
|
800
|
+
logger.error(f"Failed to add hook to pipeline '{name}': {e}")
|