FlowerPower 0.9.13.1__py3-none-any.whl → 1.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +17 -2
- flowerpower/cfg/__init__.py +201 -149
- flowerpower/cfg/base.py +122 -24
- flowerpower/cfg/pipeline/__init__.py +254 -0
- flowerpower/cfg/pipeline/adapter.py +66 -0
- flowerpower/cfg/pipeline/run.py +40 -11
- flowerpower/cfg/pipeline/schedule.py +69 -79
- flowerpower/cfg/project/__init__.py +149 -0
- flowerpower/cfg/project/adapter.py +57 -0
- flowerpower/cfg/project/job_queue.py +165 -0
- flowerpower/cli/__init__.py +92 -37
- flowerpower/cli/job_queue.py +878 -0
- flowerpower/cli/mqtt.py +32 -1
- flowerpower/cli/pipeline.py +559 -406
- flowerpower/cli/utils.py +29 -18
- flowerpower/flowerpower.py +12 -8
- flowerpower/fs/__init__.py +20 -2
- flowerpower/fs/base.py +350 -26
- flowerpower/fs/ext.py +797 -216
- flowerpower/fs/storage_options.py +1097 -55
- flowerpower/io/base.py +13 -18
- flowerpower/io/loader/__init__.py +28 -0
- flowerpower/io/loader/deltatable.py +7 -10
- flowerpower/io/metadata.py +1 -0
- flowerpower/io/saver/__init__.py +28 -0
- flowerpower/io/saver/deltatable.py +4 -3
- flowerpower/job_queue/__init__.py +252 -0
- flowerpower/job_queue/apscheduler/__init__.py +11 -0
- flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
- flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
- flowerpower/job_queue/apscheduler/manager.py +1063 -0
- flowerpower/job_queue/apscheduler/setup.py +524 -0
- flowerpower/job_queue/apscheduler/trigger.py +169 -0
- flowerpower/job_queue/apscheduler/utils.py +309 -0
- flowerpower/job_queue/base.py +382 -0
- flowerpower/job_queue/rq/__init__.py +10 -0
- flowerpower/job_queue/rq/_trigger.py +37 -0
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
- flowerpower/job_queue/rq/manager.py +1449 -0
- flowerpower/job_queue/rq/setup.py +150 -0
- flowerpower/job_queue/rq/utils.py +69 -0
- flowerpower/pipeline/__init__.py +5 -0
- flowerpower/pipeline/base.py +118 -0
- flowerpower/pipeline/io.py +407 -0
- flowerpower/pipeline/job_queue.py +505 -0
- flowerpower/pipeline/manager.py +1586 -0
- flowerpower/pipeline/registry.py +560 -0
- flowerpower/pipeline/runner.py +560 -0
- flowerpower/pipeline/visualizer.py +142 -0
- flowerpower/plugins/mqtt/__init__.py +12 -0
- flowerpower/plugins/mqtt/cfg.py +16 -0
- flowerpower/plugins/mqtt/manager.py +789 -0
- flowerpower/settings.py +110 -0
- flowerpower/utils/logging.py +21 -0
- flowerpower/utils/misc.py +57 -9
- flowerpower/utils/sql.py +122 -24
- flowerpower/utils/templates.py +2 -142
- flowerpower-1.0.0b1.dist-info/METADATA +324 -0
- flowerpower-1.0.0b1.dist-info/RECORD +94 -0
- flowerpower/_web/__init__.py +0 -61
- flowerpower/_web/routes/config.py +0 -103
- flowerpower/_web/routes/pipelines.py +0 -173
- flowerpower/_web/routes/scheduler.py +0 -136
- flowerpower/cfg/pipeline/tracker.py +0 -14
- flowerpower/cfg/project/open_telemetry.py +0 -8
- flowerpower/cfg/project/tracker.py +0 -11
- flowerpower/cfg/project/worker.py +0 -19
- flowerpower/cli/scheduler.py +0 -309
- flowerpower/cli/web.py +0 -44
- flowerpower/event_handler.py +0 -23
- flowerpower/mqtt.py +0 -609
- flowerpower/pipeline.py +0 -2499
- flowerpower/scheduler.py +0 -680
- flowerpower/tui.py +0 -79
- flowerpower/utils/datastore.py +0 -186
- flowerpower/utils/eventbroker.py +0 -127
- flowerpower/utils/executor.py +0 -58
- flowerpower/utils/trigger.py +0 -140
- flowerpower-0.9.13.1.dist-info/METADATA +0 -586
- flowerpower-0.9.13.1.dist-info/RECORD +0 -76
- /flowerpower/{cfg/pipeline/params.py → cli/worker.py} +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/WHEEL +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.9.13.1.dist-info → flowerpower-1.0.0b1.dist-info}/top_level.txt +0 -0
flowerpower/cli/pipeline.py
CHANGED
@@ -1,647 +1,800 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# Import necessary libraries
|
3
2
|
import typer
|
4
|
-
from typing_extensions import Annotated
|
5
3
|
from loguru import logger
|
4
|
+
from typing_extensions import Annotated
|
5
|
+
import datetime as dt
|
6
|
+
import duration_parser
|
7
|
+
from ..pipeline.manager import HookType, PipelineManager
|
8
|
+
from ..utils.logging import setup_logging
|
9
|
+
from .utils import parse_dict_or_list_param#, parse_param_dict
|
6
10
|
|
7
|
-
|
8
|
-
# Import your existing pipeline functions
|
9
|
-
# from ..pipeline import (
|
10
|
-
# add as add_pipeline_,
|
11
|
-
# add_job as add_pipeline_job_,
|
12
|
-
# all_pipelines as all_pipelines_,
|
13
|
-
# delete as delete_pipeline_,
|
14
|
-
# get_summary as get_pipeline_summary_,
|
15
|
-
# new as new_pipeline_,
|
16
|
-
# run as run_pipeline_,
|
17
|
-
# run_job as run_pipeline_job_,
|
18
|
-
# schedule as schedule_pipeline_,
|
19
|
-
# save_dag as save_pipeline_dag_,
|
20
|
-
# show_dag as show_pipeline_dag_,
|
21
|
-
# show_summary as show_pipeline_summary_,
|
22
|
-
# # start_mqtt_listener as start_mqtt_listener_,
|
23
|
-
# )
|
24
|
-
from ..pipeline import Pipeline, PipelineManager, HookType
|
25
|
-
from .utils import parse_dict_or_list_param, parse_param_dict
|
26
|
-
|
27
|
-
# Optional imports
|
28
|
-
if importlib.util.find_spec("apscheduler"):
|
29
|
-
from ..scheduler import get_schedule_manager
|
30
|
-
from ..scheduler import start_worker as start_worker_
|
31
|
-
else:
|
32
|
-
get_schedule_manager = None
|
33
|
-
start_worker_ = None
|
34
|
-
|
11
|
+
setup_logging()
|
35
12
|
|
36
13
|
app = typer.Typer(help="Pipeline management commands")
|
37
14
|
|
38
15
|
|
39
16
|
@app.command()
|
40
17
|
def run(
|
41
|
-
name: str,
|
42
|
-
executor: str | None = None,
|
43
|
-
base_dir: str | None = None,
|
44
|
-
inputs: str | None = None,
|
45
|
-
final_vars: str | None = None,
|
46
|
-
config: str | None = None,
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
18
|
+
name: str = typer.Argument(..., help="Name of the pipeline to run"),
|
19
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the pipeline"),
|
20
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
21
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
22
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
23
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
24
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
25
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
26
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
27
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
28
|
+
max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
|
29
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
30
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
51
31
|
):
|
52
32
|
"""
|
53
|
-
Run
|
33
|
+
Run a pipeline immediately.
|
34
|
+
|
35
|
+
This command executes a pipeline with the specified configuration and inputs.
|
36
|
+
The pipeline will run synchronously, and the command will wait for completion.
|
54
37
|
|
55
38
|
Args:
|
56
39
|
name: Name of the pipeline to run
|
57
|
-
executor:
|
58
|
-
base_dir: Base directory
|
59
|
-
inputs: Input parameters
|
60
|
-
final_vars: Final variables
|
61
|
-
config:
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
40
|
+
executor: Type of executor to use
|
41
|
+
base_dir: Base directory containing pipelines and configurations
|
42
|
+
inputs: Input parameters for the pipeline
|
43
|
+
final_vars: Final variables to request from the pipeline
|
44
|
+
config: Configuration for the Hamilton executor
|
45
|
+
cache: Cache configuration for improved performance
|
46
|
+
storage_options: Options for storage backends
|
47
|
+
log_level: Set the logging level
|
48
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
49
|
+
max_retries: Maximum number of retry attempts on failure
|
50
|
+
retry_delay: Base delay between retries in seconds
|
51
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
66
52
|
|
67
53
|
Examples:
|
68
|
-
|
69
|
-
|
54
|
+
# Run a pipeline with default settings
|
55
|
+
$ pipeline run my_pipeline
|
56
|
+
|
57
|
+
# Run with custom inputs
|
58
|
+
$ pipeline run my_pipeline --inputs '{"data_path": "data/myfile.csv", "limit": 100}'
|
70
59
|
|
71
|
-
|
72
|
-
|
60
|
+
# Specify which final variables to calculate
|
61
|
+
$ pipeline run my_pipeline --final-vars '["output_table", "summary_metrics"]'
|
73
62
|
|
74
|
-
|
75
|
-
|
63
|
+
# Configure caching
|
64
|
+
$ pipeline run my_pipeline --cache '{"type": "memory", "ttl": 3600}'
|
76
65
|
|
77
|
-
|
78
|
-
|
66
|
+
# Use a different executor
|
67
|
+
$ pipeline run my_pipeline --executor distributed
|
79
68
|
|
80
|
-
|
81
|
-
|
69
|
+
# Enable adapters for monitoring/tracking
|
70
|
+
$ pipeline run my_pipeline --with-adapter '{"tracker": true, "opentelemetry": true}'
|
71
|
+
|
72
|
+
# Set a specific logging level
|
73
|
+
$ pipeline run my_pipeline --log-level debug
|
74
|
+
|
75
|
+
# Configure automatic retries on failure
|
76
|
+
$ pipeline run my_pipeline --max-retries 3 --retry-delay 2.0 --jitter-factor 0.2
|
82
77
|
"""
|
83
78
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
84
79
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
80
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
85
81
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
86
82
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
83
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
87
84
|
|
88
|
-
with
|
89
|
-
name=name,
|
85
|
+
with PipelineManager(
|
90
86
|
base_dir=base_dir,
|
91
|
-
storage_options=parsed_storage_options or {},
|
92
|
-
|
93
|
-
|
94
|
-
|
87
|
+
storage_options=parsed_storage_options or {},
|
88
|
+
log_level=log_level,
|
89
|
+
) as manager:
|
90
|
+
_ = manager.run(
|
91
|
+
name=name,
|
95
92
|
inputs=parsed_inputs,
|
96
93
|
final_vars=parsed_final_vars,
|
97
94
|
config=parsed_config,
|
98
|
-
|
99
|
-
|
100
|
-
|
95
|
+
cache=parsed_cache,
|
96
|
+
executor_cfg=executor,
|
97
|
+
with_adapter_cfg=parsed_with_adapter,
|
98
|
+
max_retries=max_retries,
|
99
|
+
retry_delay=retry_delay,
|
100
|
+
jitter_factor=jitter_factor,
|
101
101
|
)
|
102
|
+
logger.info(f"Pipeline '{name}' finished running.")
|
102
103
|
|
103
104
|
|
104
105
|
@app.command()
|
105
106
|
def run_job(
|
106
|
-
name: str,
|
107
|
-
executor: str | None = None,
|
108
|
-
base_dir: str | None = None,
|
109
|
-
inputs: str | None = None,
|
110
|
-
final_vars: str | None = None,
|
111
|
-
config: str | None = None,
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
107
|
+
name: str = typer.Argument(..., help="Name or ID of the pipeline job to run"),
|
108
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the job"),
|
109
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
110
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
111
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
112
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
113
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
114
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
115
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
116
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
117
|
+
max_retries: int = typer.Option(0, help="Maximum number of retry attempts on failure"),
|
118
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
119
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
116
120
|
):
|
117
121
|
"""
|
118
|
-
Run
|
122
|
+
Run a specific pipeline job.
|
123
|
+
|
124
|
+
This command runs an existing job by its ID. The job should have been previously
|
125
|
+
added to the system via the add-job command or through scheduling.
|
119
126
|
|
120
127
|
Args:
|
121
|
-
name:
|
122
|
-
executor:
|
123
|
-
base_dir: Base directory
|
124
|
-
inputs: Input parameters
|
125
|
-
final_vars: Final variables
|
126
|
-
config:
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
128
|
+
name: Job ID to run
|
129
|
+
executor: Type of executor to use (maps to executor_cfg in manager)
|
130
|
+
base_dir: Base directory containing pipelines and configurations
|
131
|
+
inputs: Input parameters for the pipeline
|
132
|
+
final_vars: Final variables to request from the pipeline
|
133
|
+
config: Configuration for the Hamilton executor
|
134
|
+
cache: Cache configuration
|
135
|
+
storage_options: Options for storage backends
|
136
|
+
log_level: Set the logging level
|
137
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
138
|
+
max_retries: Maximum number of retry attempts on failure
|
139
|
+
retry_delay: Base delay between retries in seconds
|
140
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
131
141
|
|
132
142
|
Examples:
|
133
|
-
|
134
|
-
|
143
|
+
# Run a job with a specific ID
|
144
|
+
$ pipeline run-job job-123456
|
135
145
|
|
136
|
-
|
137
|
-
|
146
|
+
# Run a job with custom inputs
|
147
|
+
$ pipeline run-job job-123456 --inputs '{"data_path": "data/myfile.csv"}'
|
138
148
|
|
139
|
-
|
140
|
-
|
149
|
+
# Specify a different executor
|
150
|
+
$ pipeline run-job job-123456 --executor local
|
141
151
|
|
142
|
-
|
143
|
-
|
152
|
+
# Use caching for better performance
|
153
|
+
$ pipeline run-job job-123456 --cache '{"type": "memory"}'
|
144
154
|
|
145
|
-
|
146
|
-
|
155
|
+
# Configure adapters for monitoring
|
156
|
+
$ pipeline run-job job-123456 --with-adapter '{"tracker": true, "opentelemetry": false}'
|
157
|
+
|
158
|
+
# Set up automatic retries for resilience
|
159
|
+
$ pipeline run-job job-123456 --max-retries 3 --retry-delay 2.0
|
147
160
|
"""
|
148
161
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
149
162
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
163
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
150
164
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
151
165
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
166
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
152
167
|
|
153
|
-
with
|
154
|
-
name=name,
|
168
|
+
with PipelineManager(
|
155
169
|
base_dir=base_dir,
|
156
|
-
storage_options=parsed_storage_options or {},
|
157
|
-
|
158
|
-
|
159
|
-
|
170
|
+
storage_options=parsed_storage_options or {},
|
171
|
+
log_level=log_level,
|
172
|
+
) as manager:
|
173
|
+
_ = manager.run_job(
|
174
|
+
name=name,
|
160
175
|
inputs=parsed_inputs,
|
161
176
|
final_vars=parsed_final_vars,
|
162
177
|
config=parsed_config,
|
163
|
-
|
164
|
-
|
165
|
-
|
178
|
+
cache=parsed_cache,
|
179
|
+
executor_cfg=executor,
|
180
|
+
with_adapter_cfg=parsed_with_adapter,
|
181
|
+
max_retries=max_retries,
|
182
|
+
retry_delay=retry_delay,
|
183
|
+
jitter_factor=jitter_factor,
|
166
184
|
)
|
185
|
+
logger.info(f"Job '{name}' finished running.")
|
167
186
|
|
168
187
|
|
169
188
|
@app.command()
|
170
189
|
def add_job(
|
171
|
-
name: str,
|
172
|
-
executor: str | None = None,
|
173
|
-
base_dir: str | None = None,
|
174
|
-
inputs: str | None = None,
|
175
|
-
final_vars: str | None = None,
|
176
|
-
config: str | None = None,
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
190
|
+
name: str = typer.Argument(..., help="Name of the pipeline to add as a job"),
|
191
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the job"),
|
192
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
193
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
194
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
195
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
196
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
197
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
198
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
199
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
200
|
+
run_at: str | None = typer.Option(None, help="Run at a specific time (ISO format)"),
|
201
|
+
run_in: str | None = typer.Option(None, help="Run in a specific interval (e.g., '5m', '1h', '12m34s')"),
|
202
|
+
max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
|
203
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
204
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
181
205
|
):
|
182
206
|
"""
|
183
|
-
Add a job to the
|
207
|
+
Add a pipeline job to the queue.
|
208
|
+
|
209
|
+
This command adds a job to the queue for later execution. The job is based on
|
210
|
+
an existing pipeline with customized inputs and configuration.
|
184
211
|
|
185
212
|
Args:
|
186
|
-
name:
|
187
|
-
executor:
|
188
|
-
base_dir: Base directory
|
189
|
-
inputs: Input parameters
|
190
|
-
final_vars: Final variables
|
191
|
-
config:
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
213
|
+
name: Pipeline name to add as a job
|
214
|
+
executor: Type of executor to use
|
215
|
+
base_dir: Base directory containing pipelines and configurations
|
216
|
+
inputs: Input parameters for the pipeline
|
217
|
+
final_vars: Final variables to request from the pipeline
|
218
|
+
config: Configuration for the Hamilton executor
|
219
|
+
cache: Cache configuration
|
220
|
+
storage_options: Options for storage backends
|
221
|
+
log_level: Set the logging level
|
222
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
223
|
+
run_at: Run the job at a specific time (ISO format)
|
224
|
+
run_in: Run the job in a specific interval (e.g., '5m', '1h')
|
225
|
+
max_retries: Maximum number of retry attempts on failure
|
226
|
+
retry_delay: Base delay between retries in seconds
|
227
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
196
228
|
|
197
229
|
Examples:
|
198
|
-
|
199
|
-
|
230
|
+
# Add a basic job
|
231
|
+
$ pipeline add-job my_pipeline
|
200
232
|
|
201
|
-
|
202
|
-
|
233
|
+
# Add a job with custom inputs
|
234
|
+
$ pipeline add-job my_pipeline --inputs '{"data_path": "data/myfile.csv"}'
|
203
235
|
|
204
|
-
|
205
|
-
|
236
|
+
# Specify final variables to calculate
|
237
|
+
$ pipeline add-job my_pipeline --final-vars '["output_table", "metrics"]'
|
206
238
|
|
207
|
-
|
208
|
-
|
239
|
+
# Configure caching
|
240
|
+
$ pipeline add-job my_pipeline --cache '{"type": "memory", "ttl": 3600}'
|
209
241
|
|
210
|
-
|
211
|
-
|
242
|
+
# Use a specific log level
|
243
|
+
$ pipeline add-job my_pipeline --log-level debug
|
244
|
+
|
245
|
+
# Configure automatic retries for resilience
|
246
|
+
$ pipeline add-job my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
|
212
247
|
"""
|
213
248
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
214
249
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
250
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
215
251
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
216
252
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
253
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
254
|
+
run_at = dt.datetime.fromisoformat(run_at) if run_at else None
|
255
|
+
run_in = duration_parser.parse(run_in) if run_in else None
|
217
256
|
|
218
|
-
|
219
|
-
|
220
|
-
with Pipeline(
|
221
|
-
name=name,
|
257
|
+
with PipelineManager(
|
222
258
|
base_dir=base_dir,
|
223
259
|
storage_options=parsed_storage_options or {},
|
224
|
-
|
225
|
-
|
226
|
-
|
260
|
+
log_level=log_level,
|
261
|
+
) as manager:
|
262
|
+
job_id = manager.add_job(
|
263
|
+
name=name,
|
227
264
|
inputs=parsed_inputs,
|
228
265
|
final_vars=parsed_final_vars,
|
229
266
|
config=parsed_config,
|
230
|
-
|
231
|
-
|
232
|
-
|
267
|
+
cache=parsed_cache,
|
268
|
+
executor_cfg=executor,
|
269
|
+
with_adapter_cfg=parsed_with_adapter,
|
270
|
+
run_at=run_at,
|
271
|
+
run_in=run_in,
|
272
|
+
max_retries=max_retries,
|
273
|
+
retry_delay=retry_delay,
|
274
|
+
jitter_factor=jitter_factor,
|
233
275
|
)
|
276
|
+
logger.info(f"Job {job_id} added for pipeline '{name}'.")
|
234
277
|
|
235
278
|
|
236
279
|
@app.command()
|
237
280
|
def schedule(
|
238
|
-
name: str,
|
239
|
-
executor: str | None = None,
|
240
|
-
base_dir: str | None = None,
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
interval_params: str | None = None,
|
257
|
-
calendarinterval_params: str | None = None,
|
258
|
-
date_params: str | None = None,
|
259
|
-
storage_options: str | None = None,
|
260
|
-
overwrite: bool = False,
|
281
|
+
name: str = typer.Argument(..., help="Name of the pipeline to schedule"),
|
282
|
+
executor: str | None = typer.Option(None, help="Executor to use for running the job"),
|
283
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
284
|
+
inputs: str | None = typer.Option(None, help="Input parameters as JSON, dict string, or key=value pairs"),
|
285
|
+
final_vars: str | None = typer.Option(None, help="Final variables as JSON or list"),
|
286
|
+
config: str | None = typer.Option(None, help="Config for the hamilton pipeline executor"),
|
287
|
+
cache: str | None = typer.Option(None, help="Cache configuration as JSON or dict string"),
|
288
|
+
cron: str | None = typer.Option(None, help="Cron expression for scheduling"),
|
289
|
+
interval: str | None = typer.Option(None, help="Interval for scheduling (e.g., '5m', '1h')"),
|
290
|
+
date: str | None = typer.Option(None, help="Specific date and time for scheduling (ISO format)"),
|
291
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
292
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
293
|
+
with_adapter: str | None = typer.Option(None, help="Adapter configuration as JSON or dict string"),
|
294
|
+
overwrite: bool = typer.Option(False, help="Overwrite existing schedule if it exists"),
|
295
|
+
schedule_id: str | None = typer.Option(None, help="Custom ID for the schedule (autogenerated if not provided)"),
|
296
|
+
max_retries: int = typer.Option(3, help="Maximum number of retry attempts on failure"),
|
297
|
+
retry_delay: float = typer.Option(1.0, help="Base delay between retries in seconds"),
|
298
|
+
jitter_factor: float = typer.Option(0.1, help="Random factor applied to delay for jitter (0-1)"),
|
261
299
|
):
|
262
300
|
"""
|
263
|
-
Schedule a pipeline
|
301
|
+
Schedule a pipeline to run at specified times.
|
302
|
+
|
303
|
+
This command schedules a pipeline to run automatically based on various
|
304
|
+
scheduling triggers like cron expressions, time intervals, or specific dates.
|
264
305
|
|
265
306
|
Args:
|
266
|
-
name:
|
267
|
-
executor:
|
268
|
-
base_dir: Base directory
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
interval_params: Interval parameters as JSON or key=value pairs
|
285
|
-
calendarinterval_params: Calendar interval parameters as JSON or key=value pairs
|
286
|
-
date_params: Date parameters as JSON or key=value pairs
|
287
|
-
storage_options: Storage options as JSON, dict string, or key=value pairs
|
288
|
-
overwrite: Overwrite existing schedule
|
307
|
+
name: Pipeline name to schedule
|
308
|
+
executor: Type of executor to use
|
309
|
+
base_dir: Base directory containing pipelines and configurations
|
310
|
+
inputs: Input parameters for the pipeline
|
311
|
+
final_vars: Final variables to request from the pipeline
|
312
|
+
config: Configuration for the Hamilton executor
|
313
|
+
cache: Cache configuration
|
314
|
+
cron: Cron expression for scheduling (e.g., "0 * * * *")
|
315
|
+
interval: Interval for scheduling (e.g., "5m", "1h")
|
316
|
+
date: Specific date and time for scheduling (ISO format)
|
317
|
+
storage_options: Options for storage backends
|
318
|
+
log_level: Set the logging level
|
319
|
+
with_adapter: Configuration for adapters like trackers or monitors
|
320
|
+
overwrite: Overwrite existing schedule with same ID
|
321
|
+
schedule_id: Custom identifier for the schedule
|
322
|
+
max_retries: Maximum number of retry attempts on failure
|
323
|
+
retry_delay: Base delay between retries in seconds
|
324
|
+
jitter_factor: Random factor applied to delay for jitter (0-1)
|
289
325
|
|
290
326
|
Examples:
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
# Dict string inputs
|
295
|
-
pipeline schedule my_pipeline --inputs "{'key': 'value'}"
|
296
|
-
|
297
|
-
# Key-value pair inputs
|
298
|
-
pipeline schedule my_pipeline --inputs 'key1=value1,key2=value2'
|
299
|
-
|
300
|
-
# List final vars
|
301
|
-
pipeline schedule my_pipeline --final-vars '["var1", "var2"]'
|
327
|
+
# Schedule with cron expression (every hour)
|
328
|
+
$ pipeline schedule my_pipeline --trigger-type cron --crontab "0 * * * *"
|
302
329
|
|
303
|
-
|
304
|
-
|
330
|
+
# Schedule to run every 15 minutes
|
331
|
+
$ pipeline schedule my_pipeline --trigger-type interval --interval_params minutes=15
|
305
332
|
|
306
|
-
|
307
|
-
|
333
|
+
# Schedule to run at a specific date and time
|
334
|
+
$ pipeline schedule my_pipeline --trigger-type date --date_params run_date="2025-12-31 23:59:59"
|
308
335
|
|
309
|
-
|
310
|
-
|
336
|
+
# Schedule with custom inputs and cache settings
|
337
|
+
$ pipeline schedule my_pipeline --inputs '{"source": "database"}' --cache '{"type": "redis"}'
|
311
338
|
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
# Date schedule
|
316
|
-
pipeline schedule my_pipeline --trigger-type date --date_params run_date='2021-01-01 12:00:01'
|
339
|
+
# Create a schedule in paused state
|
340
|
+
$ pipeline schedule my_pipeline --crontab "0 9 * * 1-5" --paused
|
317
341
|
|
342
|
+
# Set a custom schedule ID
|
343
|
+
$ pipeline schedule my_pipeline --crontab "0 12 * * *" --schedule_id "daily-noon-run"
|
344
|
+
|
345
|
+
# Configure automatic retries for resilience
|
346
|
+
$ pipeline schedule my_pipeline --max-retries 5 --retry-delay 2.0 --jitter-factor 0.2
|
318
347
|
"""
|
319
|
-
if get_schedule_manager is None:
|
320
|
-
raise ValueError("APScheduler not installed. Please install it first.")
|
321
|
-
|
322
|
-
# Parse inputs
|
323
348
|
parsed_inputs = parse_dict_or_list_param(inputs, "dict")
|
324
349
|
parsed_config = parse_dict_or_list_param(config, "dict")
|
350
|
+
parsed_cache = parse_dict_or_list_param(cache, "dict")
|
325
351
|
parsed_final_vars = parse_dict_or_list_param(final_vars, "list")
|
326
352
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
353
|
+
parsed_with_adapter = parse_dict_or_list_param(with_adapter, "dict")
|
354
|
+
interval = duration_parser.parse(interval) if interval else None
|
355
|
+
cron = cron if cron else None
|
356
|
+
date = dt.datetime.fromisoformat(date) if date else None
|
327
357
|
|
328
|
-
|
329
|
-
cron_params_dict = parse_param_dict(cron_params)
|
330
|
-
interval_params_dict = parse_param_dict(interval_params)
|
331
|
-
calendarinterval_params_dict = parse_param_dict(calendarinterval_params)
|
332
|
-
date_params_dict = parse_param_dict(date_params)
|
333
|
-
|
334
|
-
# Combine all parameter dictionaries
|
335
|
-
kwargs = {
|
336
|
-
**cron_params_dict,
|
337
|
-
**interval_params_dict,
|
338
|
-
**calendarinterval_params_dict,
|
339
|
-
**date_params_dict,
|
340
|
-
}
|
341
|
-
|
342
|
-
# Add crontab if provided
|
343
|
-
if crontab is not None:
|
344
|
-
kwargs["crontab"] = crontab
|
345
|
-
|
346
|
-
# Convert numeric parameters
|
347
|
-
for key in ["weeks", "days", "hours", "minutes", "seconds"]:
|
348
|
-
if key in kwargs:
|
349
|
-
try:
|
350
|
-
kwargs[key] = float(kwargs[key])
|
351
|
-
except ValueError:
|
352
|
-
logger.warning(f"Could not convert {key} to float: {kwargs[key]}")
|
353
|
-
|
354
|
-
with Pipeline(
|
355
|
-
name=name,
|
358
|
+
with PipelineManager(
|
356
359
|
base_dir=base_dir,
|
357
360
|
storage_options=parsed_storage_options or {},
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
361
|
+
log_level=log_level,
|
362
|
+
) as manager:
|
363
|
+
# Combine common schedule kwargs
|
364
|
+
|
365
|
+
id_ = manager.schedule(
|
366
|
+
name=name,
|
362
367
|
inputs=parsed_inputs,
|
363
368
|
final_vars=parsed_final_vars,
|
364
369
|
config=parsed_config,
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
max_jitter=max_jitter,
|
372
|
-
max_running_jobs=max_running_jobs,
|
373
|
-
conflict_policy=conflict_policy,
|
370
|
+
cache=parsed_cache,
|
371
|
+
executor_cfg=executor,
|
372
|
+
with_adapter_cfg=parsed_with_adapter,
|
373
|
+
cron=cron,
|
374
|
+
interval=interval,
|
375
|
+
date=date,
|
374
376
|
overwrite=overwrite,
|
375
|
-
|
377
|
+
schedule_id=schedule_id,
|
378
|
+
max_retries=max_retries,
|
379
|
+
retry_delay=retry_delay,
|
380
|
+
jitter_factor=jitter_factor,
|
376
381
|
)
|
377
382
|
|
378
|
-
logger.info(f"
|
383
|
+
logger.info(f"Pipeline '{name}' scheduled with ID {id_}.")
|
379
384
|
|
380
385
|
|
381
386
|
@app.command()
|
382
387
|
def schedule_all(
|
383
|
-
executor: str | None = None,
|
384
|
-
base_dir: str | None = None,
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
with_tracker: bool = False,
|
389
|
-
with_opentelemetry: bool = False,
|
390
|
-
with_progressbar: bool = False,
|
391
|
-
paused: bool = False,
|
392
|
-
coalesce: str = "latest",
|
393
|
-
misfire_grace_time: float | None = None,
|
394
|
-
max_jitter: float | None = None,
|
395
|
-
max_running_jobs: int | None = None,
|
396
|
-
conflict_policy: str = "do_nothing",
|
397
|
-
storage_options: str | None = None,
|
398
|
-
overwrite: bool = False,
|
388
|
+
executor: str | None = typer.Option(None, help="Override executor specified in pipeline configs"),
|
389
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing pipelines and configurations"),
|
390
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
391
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
392
|
+
overwrite: bool = typer.Option(False, help="Overwrite existing schedules if they exist"),
|
399
393
|
):
|
400
394
|
"""
|
401
|
-
Schedule all pipelines
|
395
|
+
Schedule all pipelines based on their individual configurations.
|
396
|
+
|
397
|
+
This command reads the configuration files for all pipelines in the project
|
398
|
+
and schedules them based on their individual scheduling settings. This is useful
|
399
|
+
for setting up all scheduled pipelines at once after deployment or system restart.
|
402
400
|
|
403
401
|
Args:
|
404
|
-
executor:
|
405
|
-
base_dir: Base directory
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
with_tracker: Enable tracking with hamilton ui
|
410
|
-
with_opentelemetry: Enable OpenTelemetry tracing
|
411
|
-
with_progressbar: Enable progress bar
|
412
|
-
paused: Start the job in paused state
|
413
|
-
coalesce: Coalesce policy
|
414
|
-
misfire_grace_time: Misfire grace time
|
415
|
-
max_jitter: Maximum jitter
|
416
|
-
max_running_jobs: Maximum running jobs
|
417
|
-
conflict_policy: Conflict policy
|
418
|
-
storage_options: Storage options as JSON, dict string, or key=value pairs
|
419
|
-
overwrite: Overwrite existing schedule
|
402
|
+
executor: Override executor specified in pipeline configs
|
403
|
+
base_dir: Base directory containing pipelines and configurations
|
404
|
+
storage_options: Options for storage backends
|
405
|
+
log_level: Set the logging level
|
406
|
+
overwrite: Whether to overwrite existing schedules
|
420
407
|
|
421
408
|
Examples:
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
409
|
+
# Schedule all pipelines using their configurations
|
410
|
+
$ pipeline schedule-all
|
411
|
+
|
412
|
+
# Force overwrite of existing schedules
|
413
|
+
$ pipeline schedule-all --overwrite
|
414
|
+
|
415
|
+
# Override executor for all pipelines
|
416
|
+
$ pipeline schedule-all --executor distributed
|
426
417
|
|
418
|
+
# Set custom base directory
|
419
|
+
$ pipeline schedule-all --base-dir /path/to/project
|
420
|
+
"""
|
427
421
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
428
422
|
|
429
423
|
with PipelineManager(
|
430
424
|
base_dir=base_dir,
|
431
425
|
storage_options=parsed_storage_options or {},
|
426
|
+
log_level=log_level,
|
432
427
|
) as manager:
|
433
428
|
manager.schedule_all(
|
434
|
-
executor=executor,
|
435
|
-
inputs=inputs,
|
436
|
-
final_vars=final_vars,
|
437
|
-
config=config,
|
438
|
-
with_tracker=with_tracker,
|
439
|
-
with_opentelemetry=with_opentelemetry,
|
440
|
-
with_progressbar=with_progressbar,
|
441
|
-
paused=paused,
|
442
|
-
coalesce=coalesce,
|
443
|
-
misfire_grace_time=misfire_grace_time,
|
444
|
-
max_jitter=max_jitter,
|
445
|
-
max_running_jobs=max_running_jobs,
|
446
|
-
conflict_policy=conflict_policy,
|
447
429
|
overwrite=overwrite,
|
430
|
+
executor_cfg=executor
|
448
431
|
)
|
432
|
+
logger.info("Scheduled all pipelines based on their configurations.")
|
449
433
|
|
450
434
|
|
451
435
|
@app.command()
|
452
436
|
def new(
|
453
|
-
name: str,
|
454
|
-
base_dir: str | None = None,
|
455
|
-
storage_options: str | None = None,
|
456
|
-
|
437
|
+
name: str = typer.Argument(..., help="Name of the pipeline to create"),
|
438
|
+
base_dir: str | None = typer.Option(None, help="Base directory for the pipeline"),
|
439
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
440
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
441
|
+
overwrite: bool = typer.Option(False, help="Overwrite existing pipeline if it exists"),
|
457
442
|
):
|
458
443
|
"""
|
459
|
-
Create a new pipeline.
|
444
|
+
Create a new pipeline structure.
|
445
|
+
|
446
|
+
This command creates a new pipeline with the necessary directory structure,
|
447
|
+
configuration file, and skeleton module file. It prepares all the required
|
448
|
+
components for you to start implementing your pipeline logic.
|
460
449
|
|
461
450
|
Args:
|
462
|
-
name: Name
|
463
|
-
base_dir: Base directory
|
464
|
-
|
465
|
-
|
451
|
+
name: Name for the new pipeline
|
452
|
+
base_dir: Base directory to create the pipeline in
|
453
|
+
storage_options: Options for storage backends
|
454
|
+
log_level: Set the logging level
|
455
|
+
overwrite: Whether to overwrite existing pipeline with the same name
|
466
456
|
|
467
457
|
Examples:
|
468
|
-
|
458
|
+
# Create a new pipeline with default settings
|
459
|
+
$ pipeline new my_new_pipeline
|
460
|
+
|
461
|
+
# Create a pipeline, overwriting if it exists
|
462
|
+
$ pipeline new my_new_pipeline --overwrite
|
463
|
+
|
464
|
+
# Create a pipeline in a specific directory
|
465
|
+
$ pipeline new my_new_pipeline --base-dir /path/to/project
|
469
466
|
"""
|
470
467
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
471
|
-
|
472
468
|
with PipelineManager(
|
473
469
|
base_dir=base_dir,
|
474
470
|
storage_options=parsed_storage_options or {},
|
471
|
+
log_level=log_level,
|
475
472
|
) as manager:
|
476
473
|
manager.new(name=name, overwrite=overwrite)
|
474
|
+
logger.info(f"New pipeline structure created for '{name}'.")
|
477
475
|
|
478
476
|
|
479
477
|
@app.command()
|
480
478
|
def delete(
|
481
|
-
name: str,
|
482
|
-
base_dir: str | None = None,
|
483
|
-
cfg: bool = False,
|
484
|
-
module: bool = False,
|
485
|
-
|
486
|
-
|
479
|
+
name: str = typer.Argument(..., help="Name of the pipeline to delete"),
|
480
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
481
|
+
cfg: bool = typer.Option(False, "--cfg", "-c", help="Delete only the configuration file"),
|
482
|
+
module: bool = typer.Option(False, "--module", "-m", help="Delete only the pipeline module"),
|
483
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
484
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
487
485
|
):
|
488
486
|
"""
|
489
|
-
Delete
|
487
|
+
Delete a pipeline's configuration and/or module files.
|
488
|
+
|
489
|
+
This command removes a pipeline's configuration file and/or module file from the project.
|
490
|
+
If neither --cfg nor --module is specified, both will be deleted.
|
490
491
|
|
491
492
|
Args:
|
492
493
|
name: Name of the pipeline to delete
|
493
|
-
base_dir: Base directory
|
494
|
-
cfg:
|
495
|
-
module:
|
496
|
-
|
497
|
-
|
494
|
+
base_dir: Base directory containing the pipeline
|
495
|
+
cfg: Delete only the configuration file
|
496
|
+
module: Delete only the pipeline module
|
497
|
+
storage_options: Options for storage backends
|
498
|
+
log_level: Set the logging level
|
498
499
|
|
499
500
|
Examples:
|
500
|
-
|
501
|
+
# Delete a pipeline (both config and module)
|
502
|
+
$ pipeline delete my_pipeline
|
503
|
+
|
504
|
+
# Delete only the configuration file
|
505
|
+
$ pipeline delete my_pipeline --cfg
|
506
|
+
|
507
|
+
# Delete only the module file
|
508
|
+
$ pipeline delete my_pipeline --module
|
501
509
|
"""
|
502
510
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
503
511
|
|
504
|
-
|
505
|
-
|
512
|
+
# If neither flag is set, default to deleting both
|
513
|
+
delete_cfg = cfg or not (cfg or module)
|
514
|
+
delete_module = module or not (cfg or module)
|
515
|
+
|
516
|
+
with PipelineManager(
|
506
517
|
base_dir=base_dir,
|
507
518
|
storage_options=parsed_storage_options or {},
|
508
|
-
|
509
|
-
|
519
|
+
log_level=log_level,
|
520
|
+
) as manager:
|
521
|
+
manager.delete(name=name, cfg=delete_cfg, module=delete_module)
|
522
|
+
|
523
|
+
deleted_parts = []
|
524
|
+
if delete_cfg:
|
525
|
+
deleted_parts.append("config")
|
526
|
+
if delete_module:
|
527
|
+
deleted_parts.append("module")
|
528
|
+
logger.info(f"Pipeline '{name}' deleted ({', '.join(deleted_parts)})." if deleted_parts else f"Pipeline '{name}' - nothing specified to delete.")
|
510
529
|
|
511
530
|
|
512
531
|
@app.command()
|
513
532
|
def show_dag(
|
514
|
-
name: str
|
533
|
+
name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
|
534
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
535
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
536
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
537
|
+
format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf). If 'raw', returns object."),
|
515
538
|
):
|
516
539
|
"""
|
517
|
-
Show the DAG of
|
540
|
+
Show the DAG (Directed Acyclic Graph) of a pipeline.
|
541
|
+
|
542
|
+
This command generates and displays a visual representation of the pipeline's
|
543
|
+
execution graph, showing how nodes are connected and dependencies between them.
|
518
544
|
|
519
545
|
Args:
|
520
|
-
name: Name of the pipeline to
|
521
|
-
base_dir: Base directory
|
522
|
-
storage_options:
|
523
|
-
|
546
|
+
name: Name of the pipeline to visualize
|
547
|
+
base_dir: Base directory containing the pipeline
|
548
|
+
storage_options: Options for storage backends
|
549
|
+
log_level: Set the logging level
|
550
|
+
format: Output format for the visualization
|
524
551
|
|
525
552
|
Examples:
|
526
|
-
|
553
|
+
# Show pipeline DAG in PNG format (default)
|
554
|
+
$ pipeline show-dag my_pipeline
|
555
|
+
|
556
|
+
# Generate SVG format visualization
|
557
|
+
$ pipeline show-dag my_pipeline --format svg
|
558
|
+
|
559
|
+
# Get raw graphviz object
|
560
|
+
$ pipeline show-dag my_pipeline --format raw
|
527
561
|
"""
|
528
562
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
529
|
-
|
563
|
+
is_raw = format.lower() == "raw"
|
530
564
|
|
531
|
-
with
|
532
|
-
name=name,
|
565
|
+
with PipelineManager(
|
533
566
|
base_dir=base_dir,
|
534
567
|
storage_options=parsed_storage_options or {},
|
535
|
-
|
536
|
-
|
568
|
+
log_level=log_level,
|
569
|
+
) as manager:
|
570
|
+
# Manager's show_dag likely handles rendering or returning raw object
|
571
|
+
try:
|
572
|
+
graph_or_none = manager.show_dag(name=name, format=format if not is_raw else "png", raw=is_raw)
|
573
|
+
if is_raw and graph_or_none:
|
574
|
+
print("Graphviz object returned (not rendered):")
|
575
|
+
# print(graph_or_none) # Or handle as needed
|
576
|
+
elif not is_raw:
|
577
|
+
logger.info(f"DAG for pipeline '{name}' displayed/saved (format: {format}).")
|
578
|
+
except ImportError:
|
579
|
+
logger.error("Graphviz is not installed. Cannot show/save DAG. Install with: pip install graphviz")
|
580
|
+
except Exception as e:
|
581
|
+
logger.error(f"Failed to generate DAG for pipeline '{name}': {e}")
|
537
582
|
|
538
583
|
|
539
584
|
@app.command()
|
540
585
|
def save_dag(
|
541
|
-
name: str
|
586
|
+
name: str = typer.Argument(..., help="Name of the pipeline to visualize"),
|
587
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
588
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
589
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
590
|
+
format: str = typer.Option("png", help="Output format (e.g., png, svg, pdf)"),
|
591
|
+
output_path: str | None = typer.Option(None, help="Custom path to save the file (default: <name>.<format>)"),
|
542
592
|
):
|
543
593
|
"""
|
544
|
-
Save the DAG of
|
594
|
+
Save the DAG (Directed Acyclic Graph) of a pipeline to a file.
|
595
|
+
|
596
|
+
This command generates a visual representation of the pipeline's execution graph
|
597
|
+
and saves it to a file in the specified format.
|
545
598
|
|
546
599
|
Args:
|
547
|
-
name: Name of the pipeline to
|
548
|
-
base_dir: Base directory
|
549
|
-
storage_options:
|
550
|
-
|
600
|
+
name: Name of the pipeline to visualize
|
601
|
+
base_dir: Base directory containing the pipeline
|
602
|
+
storage_options: Options for storage backends
|
603
|
+
log_level: Set the logging level
|
604
|
+
format: Output format for the visualization
|
605
|
+
output_path: Custom file path to save the output (defaults to pipeline name)
|
551
606
|
|
552
607
|
Examples:
|
553
|
-
|
608
|
+
# Save pipeline DAG in PNG format (default)
|
609
|
+
$ pipeline save-dag my_pipeline
|
610
|
+
|
611
|
+
# Save in SVG format
|
612
|
+
$ pipeline save-dag my_pipeline --format svg
|
613
|
+
|
614
|
+
# Save to a custom location
|
615
|
+
$ pipeline save-dag my_pipeline --output-path ./visualizations/my_graph.png
|
554
616
|
"""
|
555
617
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
556
|
-
|
557
|
-
|
558
|
-
with Pipeline(
|
559
|
-
name=name,
|
618
|
+
with PipelineManager(
|
560
619
|
base_dir=base_dir,
|
561
620
|
storage_options=parsed_storage_options or {},
|
562
|
-
|
563
|
-
|
621
|
+
log_level=log_level,
|
622
|
+
) as manager:
|
623
|
+
try:
|
624
|
+
file_path = manager.save_dag(name=name, format=format, output_path=output_path)
|
625
|
+
logger.info(f"DAG for pipeline '{name}' saved to {file_path}.")
|
626
|
+
except ImportError:
|
627
|
+
logger.error("Graphviz is not installed. Cannot save DAG. Install with: pip install graphviz")
|
628
|
+
except Exception as e:
|
629
|
+
logger.error(f"Failed to save DAG for pipeline '{name}': {e}")
|
564
630
|
|
565
631
|
|
566
632
|
@app.command()
|
567
|
-
def show_pipelines(
|
633
|
+
def show_pipelines(
|
634
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
|
635
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
636
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
637
|
+
format: str = typer.Option("table", help="Output format (table, json, yaml)"),
|
638
|
+
):
|
568
639
|
"""
|
569
|
-
List all available pipelines.
|
640
|
+
List all available pipelines in the project.
|
641
|
+
|
642
|
+
This command displays a list of all pipelines defined in the project,
|
643
|
+
providing an overview of what pipelines are available to run or schedule.
|
570
644
|
|
571
645
|
Args:
|
572
|
-
base_dir: Base directory
|
573
|
-
storage_options:
|
646
|
+
base_dir: Base directory containing pipelines
|
647
|
+
storage_options: Options for storage backends
|
648
|
+
log_level: Set the logging level
|
649
|
+
format: Output format for the list (table, json, yaml)
|
574
650
|
|
575
651
|
Examples:
|
576
|
-
|
652
|
+
# List all pipelines in table format (default)
|
653
|
+
$ pipeline show-pipelines
|
654
|
+
|
655
|
+
# Output in JSON format
|
656
|
+
$ pipeline show-pipelines --format json
|
657
|
+
|
658
|
+
# List pipelines from a specific directory
|
659
|
+
$ pipeline show-pipelines --base-dir /path/to/project
|
577
660
|
"""
|
578
661
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
579
662
|
with PipelineManager(
|
580
663
|
base_dir=base_dir,
|
581
664
|
storage_options=parsed_storage_options or {},
|
665
|
+
log_level=log_level,
|
582
666
|
) as manager:
|
583
|
-
manager.show_pipelines()
|
667
|
+
manager.show_pipelines(format=format)
|
584
668
|
|
585
669
|
|
586
670
|
@app.command()
|
587
671
|
def show_summary(
|
588
|
-
name: str | None = None,
|
589
|
-
cfg: bool = True,
|
590
|
-
|
591
|
-
|
592
|
-
|
672
|
+
name: str | None = typer.Option(None, help="Name of specific pipeline to show (all pipelines if not specified)"),
|
673
|
+
cfg: bool = typer.Option(True, help="Include configuration details"),
|
674
|
+
code: bool = typer.Option(True, help="Include code/module details"),
|
675
|
+
project: bool = typer.Option(True, help="Include project context"),
|
676
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing pipelines"),
|
677
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
678
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
679
|
+
to_html: bool = typer.Option(False, help="Output summary as HTML"),
|
680
|
+
to_svg: bool = typer.Option(False, help="Output summary as SVG (if applicable)"),
|
681
|
+
output_file: str | None = typer.Option(None, help="Save output to specified file instead of printing"),
|
593
682
|
):
|
594
683
|
"""
|
595
|
-
Show
|
684
|
+
Show summary information for one or all pipelines.
|
685
|
+
|
686
|
+
This command displays detailed information about pipelines including their
|
687
|
+
configuration, code structure, and project context. You can view information
|
688
|
+
for a specific pipeline or get an overview of all pipelines.
|
596
689
|
|
597
690
|
Args:
|
598
|
-
name: Name of
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
691
|
+
name: Name of specific pipeline to summarize (all if not specified)
|
692
|
+
cfg: Include configuration details
|
693
|
+
code: Include code/module details
|
694
|
+
project: Include project context information
|
695
|
+
base_dir: Base directory containing pipelines
|
696
|
+
storage_options: Options for storage backends
|
697
|
+
log_level: Set the logging level
|
698
|
+
to_html: Generate HTML output instead of text
|
699
|
+
to_svg: Generate SVG output (where applicable)
|
700
|
+
output_file: File path to save the output instead of printing to console
|
603
701
|
|
604
702
|
Examples:
|
605
|
-
|
703
|
+
# Show summary for all pipelines
|
704
|
+
$ pipeline show-summary
|
705
|
+
|
706
|
+
# Show summary for a specific pipeline
|
707
|
+
$ pipeline show-summary --name my_pipeline
|
708
|
+
|
709
|
+
# Show only configuration information
|
710
|
+
$ pipeline show-summary --name my_pipeline --cfg --no-code --no-project
|
711
|
+
|
712
|
+
# Generate HTML report
|
713
|
+
$ pipeline show-summary --to-html --output-file pipeline_report.html
|
606
714
|
"""
|
607
715
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
608
|
-
|
609
716
|
with PipelineManager(
|
610
717
|
base_dir=base_dir,
|
611
718
|
storage_options=parsed_storage_options or {},
|
719
|
+
log_level=log_level,
|
612
720
|
) as manager:
|
613
|
-
manager.show_summary
|
721
|
+
# Assumes manager.show_summary handles printing/returning formatted output
|
722
|
+
summary_output = manager.show_summary(
|
723
|
+
name=name,
|
724
|
+
cfg=cfg,
|
725
|
+
code=code,
|
726
|
+
project=project,
|
727
|
+
to_html=to_html,
|
728
|
+
to_svg=to_svg,
|
729
|
+
)
|
730
|
+
|
731
|
+
if summary_output:
|
732
|
+
if output_file:
|
733
|
+
with open(output_file, 'w') as f:
|
734
|
+
f.write(summary_output)
|
735
|
+
logger.info(f"Summary saved to {output_file}")
|
736
|
+
else:
|
737
|
+
print(summary_output)
|
738
|
+
# Otherwise, assume manager printed the summary
|
739
|
+
|
614
740
|
|
615
741
|
@app.command()
|
616
742
|
def add_hook(
|
617
|
-
name: str,
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
base_dir: str | None = None,
|
622
|
-
storage_options: str | None = None,
|
743
|
+
name: str = typer.Argument(..., help="Name of the pipeline to add the hook to"),
|
744
|
+
function_name: str = typer.Option(..., "--function", "-f", help="Name of the hook function defined in the pipeline module"),
|
745
|
+
type: Annotated[HookType, typer.Option(help="Type of hook to add")] = HookType.MQTT_BUILD_CONFIG,
|
746
|
+
to: str | None = typer.Option(None, help="Target node name or tag (required for node hooks)"),
|
747
|
+
base_dir: str | None = typer.Option(None, help="Base directory containing the pipeline"),
|
748
|
+
storage_options: str | None = typer.Option(None, help="Storage options as JSON, dict string, or key=value pairs"),
|
749
|
+
log_level: str | None = typer.Option(None, help="Logging level (debug, info, warning, error, critical)"),
|
623
750
|
):
|
624
751
|
"""
|
625
|
-
Add a hook to
|
752
|
+
Add a hook to a pipeline configuration.
|
753
|
+
|
754
|
+
This command adds a hook function to a pipeline's configuration. Hooks are functions
|
755
|
+
that are called at specific points during pipeline execution to perform additional
|
756
|
+
tasks like logging, monitoring, or data validation.
|
626
757
|
|
627
758
|
Args:
|
628
759
|
name: Name of the pipeline to add the hook to
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
base_dir: Base directory
|
633
|
-
storage_options:
|
760
|
+
function_name: Name of the hook function (must be defined in the pipeline module)
|
761
|
+
type: Type of hook (determines when the hook is called during execution)
|
762
|
+
to: Target node or tag (required for node-specific hooks)
|
763
|
+
base_dir: Base directory containing the pipeline
|
764
|
+
storage_options: Options for storage backends
|
765
|
+
log_level: Set the logging level
|
634
766
|
|
635
767
|
Examples:
|
636
|
-
|
768
|
+
# Add a post-run hook
|
769
|
+
$ pipeline add-hook my_pipeline --function log_results
|
770
|
+
|
771
|
+
# Add a pre-run hook
|
772
|
+
$ pipeline add-hook my_pipeline --function validate_inputs --type PRE_RUN
|
773
|
+
|
774
|
+
# Add a node-specific hook (executed before a specific node runs)
|
775
|
+
$ pipeline add-hook my_pipeline --function validate_data --type NODE_PRE_EXECUTE --to data_processor
|
776
|
+
|
777
|
+
# Add a hook for all nodes with a specific tag
|
778
|
+
$ pipeline add-hook my_pipeline --function log_metrics --type NODE_POST_EXECUTE --to @metrics
|
637
779
|
"""
|
638
780
|
parsed_storage_options = parse_dict_or_list_param(storage_options, "dict")
|
639
781
|
|
640
|
-
|
641
|
-
|
782
|
+
# Validate 'to' argument for node hooks
|
783
|
+
if type in (HookType.NODE_PRE_EXECUTE, HookType.NODE_POST_EXECUTE) and not to:
|
784
|
+
raise typer.BadParameter("The '--to' option (target node/tag) is required for node hooks.")
|
642
785
|
|
643
786
|
with PipelineManager(
|
644
787
|
base_dir=base_dir,
|
645
788
|
storage_options=parsed_storage_options or {},
|
789
|
+
log_level=log_level,
|
646
790
|
) as manager:
|
647
|
-
|
791
|
+
try:
|
792
|
+
manager.add_hook(
|
793
|
+
name=name,
|
794
|
+
type=type,
|
795
|
+
to=to,
|
796
|
+
function_name=function_name,
|
797
|
+
)
|
798
|
+
logger.info(f"Hook '{function_name}' added to pipeline '{name}' (type: {type.value}).")
|
799
|
+
except Exception as e:
|
800
|
+
logger.error(f"Failed to add hook to pipeline '{name}': {e}")
|