FlowerPower 0.20.0__py3-none-any.whl → 0.30.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flowerpower/__init__.py +2 -6
- flowerpower/cfg/__init__.py +4 -11
- flowerpower/cfg/base.py +29 -25
- flowerpower/cfg/pipeline/__init__.py +3 -3
- flowerpower/cfg/pipeline/_schedule.py +32 -0
- flowerpower/cfg/pipeline/adapter.py +0 -5
- flowerpower/cfg/pipeline/builder.py +377 -0
- flowerpower/cfg/pipeline/run.py +89 -0
- flowerpower/cfg/project/__init__.py +8 -21
- flowerpower/cfg/project/adapter.py +0 -12
- flowerpower/cli/__init__.py +2 -28
- flowerpower/cli/pipeline.py +10 -4
- flowerpower/flowerpower.py +275 -585
- flowerpower/pipeline/base.py +19 -10
- flowerpower/pipeline/io.py +52 -46
- flowerpower/pipeline/manager.py +149 -91
- flowerpower/pipeline/pipeline.py +159 -87
- flowerpower/pipeline/registry.py +68 -33
- flowerpower/pipeline/visualizer.py +4 -4
- flowerpower/plugins/{_io → io}/__init__.py +1 -1
- flowerpower/settings/__init__.py +0 -2
- flowerpower/settings/{backend.py → _backend.py} +0 -19
- flowerpower/settings/logging.py +1 -1
- flowerpower/utils/logging.py +24 -12
- flowerpower/utils/misc.py +17 -0
- flowerpower-0.30.0.dist-info/METADATA +451 -0
- flowerpower-0.30.0.dist-info/RECORD +42 -0
- flowerpower/cfg/pipeline/schedule.py +0 -74
- flowerpower/cfg/project/job_queue.py +0 -111
- flowerpower/cli/job_queue.py +0 -1329
- flowerpower/cli/mqtt.py +0 -174
- flowerpower/job_queue/__init__.py +0 -205
- flowerpower/job_queue/base.py +0 -611
- flowerpower/job_queue/rq/__init__.py +0 -10
- flowerpower/job_queue/rq/_trigger.py +0 -37
- flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +0 -226
- flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +0 -228
- flowerpower/job_queue/rq/manager.py +0 -1893
- flowerpower/job_queue/rq/setup.py +0 -154
- flowerpower/job_queue/rq/utils.py +0 -69
- flowerpower/mqtt.py +0 -12
- flowerpower/plugins/mqtt/__init__.py +0 -12
- flowerpower/plugins/mqtt/cfg.py +0 -17
- flowerpower/plugins/mqtt/manager.py +0 -962
- flowerpower/settings/job_queue.py +0 -31
- flowerpower-0.20.0.dist-info/METADATA +0 -693
- flowerpower-0.20.0.dist-info/RECORD +0 -58
- {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/WHEEL +0 -0
- {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/entry_points.txt +0 -0
- {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/licenses/LICENSE +0 -0
- {flowerpower-0.20.0.dist-info → flowerpower-0.30.0.dist-info}/top_level.txt +0 -0
@@ -1,693 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.4
|
2
|
-
Name: FlowerPower
|
3
|
-
Version: 0.20.0
|
4
|
-
Summary: A simple workflow framework. Hamilton + RQ = FlowerPower
|
5
|
-
Author-email: "Volker L." <ligno.blades@gmail.com>
|
6
|
-
Project-URL: Homepage, https://github.com/legout/flowerpower
|
7
|
-
Project-URL: Bug Tracker, https://github.com/legout/flowerpower/issues
|
8
|
-
Keywords: hamilton,workflow,pipeline,scheduler,rq,dask,ray
|
9
|
-
Requires-Python: >=3.11
|
10
|
-
Description-Content-Type: text/markdown
|
11
|
-
License-File: LICENSE
|
12
|
-
Requires-Dist: duration-parser>=1.0.1
|
13
|
-
Requires-Dist: fsspec>=2024.10.0
|
14
|
-
Requires-Dist: fsspec-utils>=0.1.0
|
15
|
-
Requires-Dist: humanize>=4.12.2
|
16
|
-
Requires-Dist: msgspec>=0.19.0
|
17
|
-
Requires-Dist: munch>=4.0.0
|
18
|
-
Requires-Dist: pyyaml>=6.0.1
|
19
|
-
Requires-Dist: rich>=13.9.3
|
20
|
-
Requires-Dist: s3fs>=2024.10.0
|
21
|
-
Requires-Dist: sf-hamilton-sdk>=0.5.2
|
22
|
-
Requires-Dist: sf-hamilton[rich,tqdm,visualization]>=1.69.0
|
23
|
-
Requires-Dist: typer>=0.12.3
|
24
|
-
Provides-Extra: io
|
25
|
-
Requires-Dist: flowerpower-io>=0.1.1; extra == "io"
|
26
|
-
Provides-Extra: io-legacy
|
27
|
-
Requires-Dist: flowerpower-io[legacy]>=0.1.1; extra == "io-legacy"
|
28
|
-
Provides-Extra: mongodb
|
29
|
-
Requires-Dist: pymongo>=4.7.2; extra == "mongodb"
|
30
|
-
Provides-Extra: mqtt
|
31
|
-
Requires-Dist: paho-mqtt>=2.1.0; extra == "mqtt"
|
32
|
-
Requires-Dist: orjson>=3.10.11; extra == "mqtt"
|
33
|
-
Requires-Dist: mmh3>=5.1.0; extra == "mqtt"
|
34
|
-
Provides-Extra: opentelemetry
|
35
|
-
Requires-Dist: opentelemetry-api>=1.5.0; extra == "opentelemetry"
|
36
|
-
Requires-Dist: opentelemetry-sdk>=1.5.0; extra == "opentelemetry"
|
37
|
-
Requires-Dist: opentelemetry-exporter-jaeger>=1.21.0; extra == "opentelemetry"
|
38
|
-
Provides-Extra: ray
|
39
|
-
Requires-Dist: ray>=2.34.0; extra == "ray"
|
40
|
-
Provides-Extra: redis
|
41
|
-
Requires-Dist: redis>=5.0.4; extra == "redis"
|
42
|
-
Provides-Extra: rq
|
43
|
-
Requires-Dist: rq>=2.3.1; extra == "rq"
|
44
|
-
Requires-Dist: rq-scheduler>=0.14.0; extra == "rq"
|
45
|
-
Requires-Dist: cron-descriptor>=1.4.5; extra == "rq"
|
46
|
-
Provides-Extra: tui
|
47
|
-
Requires-Dist: textual>=0.85.2; extra == "tui"
|
48
|
-
Provides-Extra: ui
|
49
|
-
Requires-Dist: sf-hamilton-ui>=0.0.11; extra == "ui"
|
50
|
-
Provides-Extra: webserver
|
51
|
-
Requires-Dist: sanic>=24.6.0; extra == "webserver"
|
52
|
-
Requires-Dist: sanic-ext>=23.12.0; extra == "webserver"
|
53
|
-
Requires-Dist: orjson>=3.10.11; extra == "webserver"
|
54
|
-
Provides-Extra: openlineage
|
55
|
-
Requires-Dist: openlineage-python>=1.32.0; extra == "openlineage"
|
56
|
-
Dynamic: license-file
|
57
|
-
|
58
|
-
<div align="center">
|
59
|
-
<h1>FlowerPower 🌸 - Build & Orchestrate Data Pipelines</h1>
|
60
|
-
<h3>Simple Workflow Framework - Hamilton + RQ = FlowerPower</h3>
|
61
|
-
<img src="./image.png" alt="FlowerPower Logo" width="400" height="300">
|
62
|
-
</div>
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
[](https://pypi.org/project/flowerpower/) <!-- Placeholder -->
|
67
|
-
[](https://github.com/legout/flowerpower/blob/main/LICENSE)
|
68
|
-
[](https://deepwiki.com/legout/flowerpower)
|
69
|
-
|
70
|
-
|
71
|
-
**FlowerPower** is a Python framework designed for building, configuring, scheduling, and executing data processing pipelines with ease and flexibility. It promotes a modular, configuration-driven approach, allowing you to focus on your pipeline logic while FlowerPower handles the orchestration.
|
72
|
-
|
73
|
-
It leverages the [Hamilton](https://github.com/apache/hamilton) library for defining dataflows in a clean, functional way within your Python pipeline scripts. Pipelines are defined in Python modules and configured using YAML files, making it easy to manage and understand your data workflows.
|
74
|
-
FlowerPower integrates with [RQ (Redis Queue)](https://github.com/rq/rq) for job queue management, enabling you to schedule and manage your pipeline runs efficiently. The framework features a clean separation between pipeline execution and job queue management, with a unified project interface that makes it easy to work with both synchronous and asynchronous execution modes. It also provides a web UI (Hamilton UI) for monitoring and managing your pipelines.
|
75
|
-
FlowerPower is designed to be extensible, allowing you to easily add custom I/O plugins and adapt to different deployment scenarios. This flexibility makes it suitable for a wide range of data processing tasks, from simple ETL jobs to complex data workflows.
|
76
|
-
|
77
|
-
|
78
|
-
## ✨ Key Features
|
79
|
-
|
80
|
-
* **Modular Pipeline Design:** Thanks to [Hamilton](https://github.com/apache/hamilton), you can define your data processing logic in Python modules, using functions as nodes in a directed acyclic graph (DAG).
|
81
|
-
* **Configuration-Driven:** Define pipeline parameters, execution logic, and scheduling declaratively using simple YAML files.
|
82
|
-
* **Job Queue Integration:** Built-in support for asynchronous execution with **RQ (Redis Queue)** for distributed task queues, background processing, and time-based scheduling.
|
83
|
-
* **Extensible I/O Plugins:** Connect to various data sources and destinations (CSV, JSON, Parquet, DeltaTable, DuckDB, PostgreSQL, MySQL, MSSQL, Oracle, MQTT, SQLite, and more).
|
84
|
-
* **Unified Project Interface:** Interact with your pipelines via:
|
85
|
-
* **FlowerPowerProject API:** A unified interface for both synchronous and asynchronous pipeline execution, job queue management, and worker control.
|
86
|
-
* **Command Line Interface (CLI):** For running, managing, and inspecting pipelines.
|
87
|
-
* **Web UI:** A graphical interface for monitoring and managing pipelines and schedules. ([Hamilton UI](https://hamilton.dagworks.io/en/latest/hamilton-ui/ui/))
|
88
|
-
* **Filesystem Abstraction:** Simplified file handling with support for local and remote filesystems (e.g., S3, GCS).
|
89
|
-
|
90
|
-
## 📦 Installation
|
91
|
-
|
92
|
-
We recommend using [uv](https://github.com/astral-sh/uv) for installing FlowerPower and managing your project environments. `uv` is an extremely fast Python package installer and resolver.
|
93
|
-
|
94
|
-
```bash
|
95
|
-
# Create and activate a virtual environment (recommended)
|
96
|
-
uv venv
|
97
|
-
source .venv/bin/activate # Or .\.venv\Scripts\activate on Windows
|
98
|
-
|
99
|
-
# Install FlowerPower
|
100
|
-
uv pip install flowerpower
|
101
|
-
|
102
|
-
# Optional: Install additional dependencies for specific features
|
103
|
-
uv pip install flowerpower[rq] # For RQ job queue support
|
104
|
-
uv pip install flowerpower[io] # For I/O plugins (CSV, JSON, Parquet, DeltaTable, DuckDB, PostgreSQL, MySQL, MSSQL, Oracle, SQLite)
|
105
|
-
uv pip install flowerpower[ui] # For Hamilton UI
|
106
|
-
uv pip install flowerpower[all] # Install all optional dependencies
|
107
|
-
```
|
108
|
-
|
109
|
-
*(Note: Specify required Python versions if known, e.g., Python 3.8+)*
|
110
|
-
|
111
|
-
## 🚀 Getting Started
|
112
|
-
|
113
|
-
Let's build a simple "Hello World" pipeline.
|
114
|
-
|
115
|
-
### 1. Initialize Your Project:
|
116
|
-
|
117
|
-
You can quickly set up the standard FlowerPower project structure using the CLI or Python.
|
118
|
-
|
119
|
-
**Using the CLI:**
|
120
|
-
|
121
|
-
Navigate to your desired parent directory and run:
|
122
|
-
```bash
|
123
|
-
flowerpower init --name hello-flowerpower-project
|
124
|
-
```
|
125
|
-
|
126
|
-
|
127
|
-
**Using Python:**
|
128
|
-
|
129
|
-
Alternatively, you can initialize programmatically:
|
130
|
-
```python
|
131
|
-
from flowerpower import FlowerPowerProject
|
132
|
-
|
133
|
-
# Initialize a new project
|
134
|
-
project = FlowerPowerProject.init(
|
135
|
-
name='hello-flowerpower-project',
|
136
|
-
job_queue_type='rq'
|
137
|
-
)
|
138
|
-
```
|
139
|
-
|
140
|
-
This will create a `hello-flowerpower-project` directory with the necessary `conf/` and `pipelines/` subdirectories and default configuration files.
|
141
|
-
|
142
|
-
```
|
143
|
-
hello-flowerpower-project/
|
144
|
-
├── conf/
|
145
|
-
│ ├── project.yml
|
146
|
-
│ └── pipelines/
|
147
|
-
└── pipelines/
|
148
|
-
```
|
149
|
-
|
150
|
-
Now, navigate into your new project directory:
|
151
|
-
|
152
|
-
```bash
|
153
|
-
cd hello-flowerpower-project
|
154
|
-
```
|
155
|
-
|
156
|
-
**Configure Project (`conf/project.yml`):**
|
157
|
-
|
158
|
-
Open `conf/project.yml` and define your project name and job queue backend. FlowerPower now uses RQ (Redis Queue) as its job queue system:
|
159
|
-
|
160
|
-
```yaml
|
161
|
-
name: hello-flowerpower
|
162
|
-
job_queue:
|
163
|
-
type: rq
|
164
|
-
backend:
|
165
|
-
type: redis
|
166
|
-
host: localhost
|
167
|
-
port: 6379
|
168
|
-
# ... other redis options
|
169
|
-
queues:
|
170
|
-
- default
|
171
|
-
- high
|
172
|
-
- low
|
173
|
-
# adapter: ... # Optional adapter configurations (e.g., Hamilton Tracker, MLflow), see `conf/project.yml` for details
|
174
|
-
```
|
175
|
-
|
176
|
-
### 2. Create Your Pipeline
|
177
|
-
|
178
|
-
You can create a new pipeline using the CLI or programmatically.
|
179
|
-
|
180
|
-
**Using the CLI:**
|
181
|
-
|
182
|
-
```bash
|
183
|
-
flowerpower pipeline new hello_world
|
184
|
-
```
|
185
|
-
|
186
|
-
**Using Python:**
|
187
|
-
|
188
|
-
You can create pipelines programmatically using the FlowerPowerProject interface:
|
189
|
-
|
190
|
-
```python
|
191
|
-
from flowerpower import FlowerPowerProject
|
192
|
-
|
193
|
-
# Load the project
|
194
|
-
project = FlowerPowerProject.load('.')
|
195
|
-
|
196
|
-
# Create a new pipeline
|
197
|
-
project.pipeline_manager.new(name='hello_world')
|
198
|
-
```
|
199
|
-
|
200
|
-
This will create a new file `hello_world.py` in the `pipelines/` directory and a corresponding configuration file `hello_world.yml` in `conf/pipelines/`.
|
201
|
-
|
202
|
-
**Implement Pipeline (`pipelines/hello_world.py`):**
|
203
|
-
|
204
|
-
Open `pipelines/hello_world.py` and write your pipeline logic using Python and Hamilton. FlowerPower makes configuration easily accessible.
|
205
|
-
|
206
|
-
```python
|
207
|
-
# FlowerPower pipeline hello_world.py
|
208
|
-
# Created on 2025-05-03 22:34:09
|
209
|
-
|
210
|
-
####################################################################################################
|
211
|
-
# Import necessary libraries
|
212
|
-
# NOTE: Remove or comment out imports that are not used in the pipeline
|
213
|
-
|
214
|
-
from hamilton.function_modifiers import parameterize
|
215
|
-
|
216
|
-
from pathlib import Path
|
217
|
-
|
218
|
-
from flowerpower.cfg import Config
|
219
|
-
|
220
|
-
####################################################################################################
|
221
|
-
# Load pipeline parameters. Do not modify this section.
|
222
|
-
|
223
|
-
PARAMS = Config.load(
|
224
|
-
Path(__file__).parents[1], pipeline_name="hello_world"
|
225
|
-
).pipeline.h_params
|
226
|
-
|
227
|
-
|
228
|
-
####################################################################################################
|
229
|
-
# Helper functions.
|
230
|
-
# This functions have to start with an underscore (_).
|
231
|
-
|
232
|
-
|
233
|
-
####################################################################################################
|
234
|
-
# Pipeline functions
|
235
|
-
|
236
|
-
@parameterize(**PARAMS.greeting_message) # Inject 'message' from params
|
237
|
-
def greeting_message(message: str) -> str:
|
238
|
-
"""Provides the greeting part."""
|
239
|
-
return f"{message},"
|
240
|
-
|
241
|
-
@parameterize(**PARAMS.target_name) # Inject 'name' from params
|
242
|
-
def target_name(name: str) -> str:
|
243
|
-
"""Provides the target name."""
|
244
|
-
return f"{name}!"
|
245
|
-
|
246
|
-
def full_greeting(greeting_message: str, target_name: str) -> str:
|
247
|
-
"""Combines the greeting and target."""
|
248
|
-
print(f"Generating greeting: {greeting_message} {target_name}")
|
249
|
-
return f"{greeting_message} {target_name}"
|
250
|
-
|
251
|
-
# You can add more complex Hamilton functions here...
|
252
|
-
```
|
253
|
-
|
254
|
-
**Configure Pipeline (`conf/pipelines/hello_world.yml`):**
|
255
|
-
|
256
|
-
Open `conf/pipelines/hello_world.yml` and specify parameters, run configurations, and scheduling for your pipeline.
|
257
|
-
|
258
|
-
```yaml
|
259
|
-
# adapter: ... # Pipeline-specific adapter overrides
|
260
|
-
|
261
|
-
params: # Parameters accessible in your Python code
|
262
|
-
greeting_message:
|
263
|
-
message: "Hello"
|
264
|
-
target_name:
|
265
|
-
name: "World"
|
266
|
-
|
267
|
-
run: # How to execute the pipeline
|
268
|
-
final_vars: # Specify the desired output(s) from your Hamilton DAG
|
269
|
-
- full_greeting
|
270
|
-
# inputs: # Optional: Specify input variables to the pipeline
|
271
|
-
# message: "Hello"
|
272
|
-
# config: ... # Runtime configuration overrides for Hamilton
|
273
|
-
# executor: ... # Execution backend (e.g., threadpool, multiprocessing)
|
274
|
-
|
275
|
-
schedule: # Optional: How often to run the pipeline
|
276
|
-
cron: "0 * * * *" # Run hourly
|
277
|
-
# interval: # e.g., { "minutes": 15 }
|
278
|
-
# date: # e.g., "2025-12-31 23:59:59"
|
279
|
-
```
|
280
|
-
### 3. Run Your Pipeline 🏃♀️
|
281
|
-
|
282
|
-
FlowerPower offers flexibility in how you execute your pipelines:
|
283
|
-
- **Synchronous Execution:** Run the pipeline directly.
|
284
|
-
- **Asynchronous Execution:** Use job queues for scheduling, background execution, or distributed processing.
|
285
|
-
|
286
|
-
#### 1. Synchronous Execution:
|
287
|
-
|
288
|
-
For quick testing or local runs, you can execute your pipeline synchronously. This is useful for debugging or running pipelines in a local environment.
|
289
|
-
|
290
|
-
* **Via CLI:**
|
291
|
-
```bash
|
292
|
-
# Run the pipeline synchronously
|
293
|
-
flowerpower pipeline run hello_world --base_dir .
|
294
|
-
```
|
295
|
-
* **Via Python:**
|
296
|
-
```python
|
297
|
-
from flowerpower import FlowerPowerProject
|
298
|
-
|
299
|
-
# Load the project
|
300
|
-
project = FlowerPowerProject.load('.')
|
301
|
-
|
302
|
-
# Execute the pipeline synchronously
|
303
|
-
result = project.run('hello_world')
|
304
|
-
```
|
305
|
-
|
306
|
-
#### 2. Asynchronous Execution (Job Queues):
|
307
|
-
|
308
|
-
For scheduling, background execution, or distributed processing, leverage FlowerPower's job queue integration with RQ (Redis Queue). This is ideal for distributed task queues where workers can pick up jobs.
|
309
|
-
|
310
|
-
First, install the RQ dependencies:
|
311
|
-
```bash
|
312
|
-
# Install RQ (Redis Queue) support
|
313
|
-
uv pip install flowerpower[rq]
|
314
|
-
```
|
315
|
-
|
316
|
-
* **Note:** Ensure you have Redis running for RQ job queue functionality.
|
317
|
-
|
318
|
-
**a) Configuring the RQ Job Queue Backend:**
|
319
|
-
|
320
|
-
Configuration of the job queue backend is done in your `conf/project.yml`. FlowerPower uses RQ (Redis Queue) as its job queue backend:
|
321
|
-
|
322
|
-
* **RQ (Redis Queue) Requirements:**
|
323
|
-
* A **Redis server** running for job queuing and task coordination.
|
324
|
-
* Configure in `conf/project.yml`:
|
325
|
-
```yaml
|
326
|
-
job_queue:
|
327
|
-
type: rq
|
328
|
-
backend:
|
329
|
-
type: redis
|
330
|
-
host: localhost
|
331
|
-
port: 6379
|
332
|
-
database: 0
|
333
|
-
# Optional: username, password for Redis auth
|
334
|
-
username: your_username # if needed
|
335
|
-
password: your_password # if needed
|
336
|
-
queues:
|
337
|
-
- default
|
338
|
-
- high
|
339
|
-
- low
|
340
|
-
```
|
341
|
-
|
342
|
-
You can override the job queue backend configuration using environment variables, the `settings` module, or by modifying the configuration programmatically. This is useful for testing or when you want to avoid hardcoding values in your configuration files.
|
343
|
-
|
344
|
-
* **Using the `settings` module:**
|
345
|
-
Override RQ backend configuration:
|
346
|
-
```python
|
347
|
-
from flowerpower import settings
|
348
|
-
|
349
|
-
# Override RQ backend configuration
|
350
|
-
settings.RQ_BACKEND_USERNAME = 'your_username'
|
351
|
-
settings.RQ_BACKEND_PASSWORD = 'your_password'
|
352
|
-
```
|
353
|
-
See the `flowerpower/settings/job_queue.py` file for all available settings.
|
354
|
-
|
355
|
-
* **Programmatic Configuration:**
|
356
|
-
Modify configuration via the FlowerPowerProject:
|
357
|
-
```python
|
358
|
-
from flowerpower import FlowerPowerProject
|
359
|
-
|
360
|
-
project = FlowerPowerProject.load('.')
|
361
|
-
project.job_queue_manager.cfg.backend.username = 'your_username'
|
362
|
-
project.job_queue_manager.cfg.backend.password = 'your_password'
|
363
|
-
```
|
364
|
-
|
365
|
-
* **Using Environment Variables:**
|
366
|
-
Use a `.env` file or set them in your environment:
|
367
|
-
```
|
368
|
-
FP_JOB_QUEUE_TYPE=rq
|
369
|
-
|
370
|
-
# RQ (Redis Queue) backend
|
371
|
-
FP_RQ_BACKEND_USERNAME=your_username
|
372
|
-
FP_RQ_BACKEND_PASSWORD=your_password
|
373
|
-
FP_RQ_BACKEND_HOST=localhost
|
374
|
-
FP_RQ_BACKEND_PORT=6379
|
375
|
-
```
|
376
|
-
|
377
|
-
|
378
|
-
**b) Add Job to Queue:**
|
379
|
-
Run your pipeline using the job queue system. This allows you to schedule jobs, run them in the background, or distribute them across multiple workers.
|
380
|
-
|
381
|
-
* **Via CLI:**
|
382
|
-
```bash
|
383
|
-
# Submit the pipeline to the job queue and return the job ID (non-blocking)
|
384
|
-
flowerpower pipeline add-job hello_world --base_dir .
|
385
|
-
|
386
|
-
# Run the pipeline via job queue and wait for result (blocking)
|
387
|
-
flowerpower pipeline run-job hello_world --base_dir .
|
388
|
-
```
|
389
|
-
* **Via Python:**
|
390
|
-
|
391
|
-
```python
|
392
|
-
from flowerpower import FlowerPowerProject
|
393
|
-
|
394
|
-
# Load the project
|
395
|
-
project = FlowerPowerProject.load('.')
|
396
|
-
|
397
|
-
# Enqueue the pipeline for execution (non-blocking)
|
398
|
-
job_id = project.enqueue('hello_world')
|
399
|
-
|
400
|
-
# Schedule the pipeline for future/recurring execution
|
401
|
-
schedule_id = project.schedule('hello_world', cron="0 9 * * *") # Daily at 9 AM
|
402
|
-
```
|
403
|
-
|
404
|
-
These commands will add the pipeline to the job queue, allowing it to be executed in the background or at scheduled intervals. The jobs will be processed by one or more workers, depending on your job queue configuration. You have to start the job queue workers separately.
|
405
|
-
|
406
|
-
|
407
|
-
**c) Start Job Queue Workers:**
|
408
|
-
To process jobs in the queue, you need to start one or more workers.
|
409
|
-
|
410
|
-
* **Via CLI:**
|
411
|
-
```bash
|
412
|
-
flowerpower job-queue start-worker --base_dir . # Start the job queue worker
|
413
|
-
```
|
414
|
-
|
415
|
-
* **Via Python:**
|
416
|
-
```python
|
417
|
-
from flowerpower import FlowerPowerProject
|
418
|
-
|
419
|
-
# Load the project
|
420
|
-
project = FlowerPowerProject.load('.')
|
421
|
-
|
422
|
-
# Start a single worker (blocking)
|
423
|
-
project.start_worker()
|
424
|
-
|
425
|
-
# Start a worker pool (multiple workers)
|
426
|
-
project.start_worker_pool(num_workers=4, background=True)
|
427
|
-
```
|
428
|
-
|
429
|
-
|
430
|
-
## Local Development Setup (Docker):
|
431
|
-
|
432
|
-
To easily set up required services like Redis, PostgreSQL, or MQTT locally for testing job queues, a basic `docker-compose.yml` file is provided in the `docker/` directory. This file includes configurations for various services useful during development.
|
433
|
-
|
434
|
-
```bash
|
435
|
-
# Navigate to the docker directory and start services
|
436
|
-
cd docker
|
437
|
-
docker-compose up -d redis postgres # Example: Start Redis and PostgreSQL
|
438
|
-
```
|
439
|
-
*(Note: Review and adapt `docker/docker-compose.yml` for your specific needs. It's intended for development, not production.)*
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
## ⚙️ Configuration Overview
|
444
|
-
|
445
|
-
FlowerPower uses a layered configuration system:
|
446
|
-
|
447
|
-
* **`conf/project.yml`:** Defines global settings for your project, including the RQ job queue backend configuration and integrated `adapter`s (like Hamilton Tracker, MLflow, etc.).
|
448
|
-
* **`conf/pipelines/*.yml`:** Each file defines a specific pipeline. It contains:
|
449
|
-
* `params`: Input parameters for your Hamilton functions.
|
450
|
-
* `run`: Execution details like target outputs (`final_vars`), Hamilton runtime `config`, and `executor` settings.
|
451
|
-
* `schedule`: Defines when the pipeline should run automatically (using `cron`, `interval`, or `date`).
|
452
|
-
* `adapter`: Pipeline-specific overrides for adapter settings.
|
453
|
-
|
454
|
-
## 🛠️ Basic Usage
|
455
|
-
|
456
|
-
You can interact with FlowerPower pipelines through multiple interfaces:
|
457
|
-
|
458
|
-
**Python API (Recommended):**
|
459
|
-
```python
|
460
|
-
from flowerpower import FlowerPowerProject
|
461
|
-
|
462
|
-
# Load the project
|
463
|
-
project = FlowerPowerProject.load('.')
|
464
|
-
|
465
|
-
# Run a pipeline synchronously
|
466
|
-
result = project.run('hello_world')
|
467
|
-
|
468
|
-
# Enqueue a pipeline for background execution
|
469
|
-
job_id = project.enqueue('hello_world')
|
470
|
-
|
471
|
-
# Schedule a pipeline
|
472
|
-
schedule_id = project.schedule('hello_world', cron="0 9 * * *")
|
473
|
-
|
474
|
-
# Start workers
|
475
|
-
project.start_worker_pool(num_workers=4, background=True)
|
476
|
-
```
|
477
|
-
|
478
|
-
**CLI:**
|
479
|
-
```bash
|
480
|
-
# Run a pipeline manually
|
481
|
-
flowerpower pipeline run hello_world --base_dir .
|
482
|
-
|
483
|
-
# Add a job to the queue
|
484
|
-
flowerpower pipeline add-job hello_world --base_dir .
|
485
|
-
|
486
|
-
# Schedule a pipeline
|
487
|
-
flowerpower pipeline schedule hello_world --base_dir .
|
488
|
-
|
489
|
-
# Start job queue worker
|
490
|
-
flowerpower job-queue start-worker --base_dir .
|
491
|
-
|
492
|
-
# List all available commands
|
493
|
-
flowerpower --help
|
494
|
-
```
|
495
|
-
|
496
|
-
## 🔧 Direct Module Usage
|
497
|
-
|
498
|
-
While the unified `FlowerPowerProject` interface is recommended for most use cases, you can also use the pipeline and job queue modules directly for more granular control or when you only need specific functionality.
|
499
|
-
|
500
|
-
### Pipeline-Only Usage
|
501
|
-
|
502
|
-
If you only need pipeline execution without job queue functionality, you can use the `PipelineManager` directly:
|
503
|
-
|
504
|
-
```python
|
505
|
-
from flowerpower.pipeline import PipelineManager
|
506
|
-
|
507
|
-
# Initialize pipeline manager
|
508
|
-
pm = PipelineManager(base_dir='.')
|
509
|
-
|
510
|
-
# Create a new pipeline
|
511
|
-
pm.new(name='my_pipeline')
|
512
|
-
|
513
|
-
# Run a pipeline synchronously
|
514
|
-
result = pm.run(
|
515
|
-
name='my_pipeline',
|
516
|
-
inputs={'param': 'value'},
|
517
|
-
final_vars=['output_var']
|
518
|
-
)
|
519
|
-
|
520
|
-
# List available pipelines
|
521
|
-
pipelines = pm.list()
|
522
|
-
print(f"Available pipelines: {pipelines}")
|
523
|
-
|
524
|
-
# Get pipeline information
|
525
|
-
info = pm.get('my_pipeline')
|
526
|
-
print(f"Pipeline config: {info}")
|
527
|
-
|
528
|
-
# Delete a pipeline
|
529
|
-
pm.delete('old_pipeline')
|
530
|
-
```
|
531
|
-
|
532
|
-
**When to use Pipeline-only approach:**
|
533
|
-
- Simple synchronous workflows
|
534
|
-
- Testing and development
|
535
|
-
- When you don't need background processing or scheduling
|
536
|
-
- Lightweight applications with minimal dependencies
|
537
|
-
|
538
|
-
### Job Queue-Only Usage
|
539
|
-
|
540
|
-
If you need job queue functionality for general task processing (not necessarily pipelines), you can use the job queue managers directly:
|
541
|
-
|
542
|
-
```python
|
543
|
-
import datetime as dt
|
544
|
-
from flowerpower.job_queue import JobQueueManager
|
545
|
-
|
546
|
-
# Initialize job queue manager with RQ backend
|
547
|
-
jqm = JobQueueManager(
|
548
|
-
type='rq',
|
549
|
-
name='my_worker',
|
550
|
-
base_dir='.'
|
551
|
-
)
|
552
|
-
|
553
|
-
# Define a simple task function
|
554
|
-
def add_numbers(x: int, y: int) -> int:
|
555
|
-
"""Simple task that adds two numbers."""
|
556
|
-
return x + y
|
557
|
-
|
558
|
-
def process_data(data: dict) -> dict:
|
559
|
-
"""More complex task that processes data."""
|
560
|
-
result = {
|
561
|
-
'processed': True,
|
562
|
-
'count': len(data.get('items', [])),
|
563
|
-
'timestamp': str(dt.datetime.now())
|
564
|
-
}
|
565
|
-
return result
|
566
|
-
|
567
|
-
# Enqueue jobs for immediate execution
|
568
|
-
job1 = jqm.enqueue(add_numbers, 5, 10)
|
569
|
-
job2 = jqm.enqueue(process_data, {'items': [1, 2, 3, 4, 5]})
|
570
|
-
|
571
|
-
# Enqueue jobs with delays
|
572
|
-
job3 = jqm.enqueue_in(300, add_numbers, 20, 30) # Run in 5 minutes
|
573
|
-
job4 = jqm.enqueue_at(dt.datetime(2025, 1, 1, 9, 0), process_data, {'items': []})
|
574
|
-
|
575
|
-
# Schedule recurring jobs
|
576
|
-
schedule_id = jqm.add_schedule(
|
577
|
-
func=process_data,
|
578
|
-
func_kwargs={'data': {'items': []}},
|
579
|
-
cron="0 */6 * * *", # Every 6 hours
|
580
|
-
schedule_id="data_processing_job"
|
581
|
-
)
|
582
|
-
|
583
|
-
# Start a worker to process jobs (blocking)
|
584
|
-
jqm.start_worker()
|
585
|
-
|
586
|
-
# Or start multiple workers in background
|
587
|
-
jqm.start_worker_pool(num_workers=4, background=True)
|
588
|
-
|
589
|
-
# Get job results
|
590
|
-
result1 = jqm.get_job_result(job1)
|
591
|
-
print(f"Addition result: {result1}")
|
592
|
-
|
593
|
-
# Clean up
|
594
|
-
jqm.stop_worker_pool()
|
595
|
-
```
|
596
|
-
|
597
|
-
**Alternatively, use RQManager directly for more RQ-specific features:**
|
598
|
-
|
599
|
-
```python
|
600
|
-
from flowerpower.job_queue.rq import RQManager
|
601
|
-
|
602
|
-
# Initialize RQ manager with custom configuration
|
603
|
-
rq_manager = RQManager(
|
604
|
-
name='specialized_worker',
|
605
|
-
base_dir='.',
|
606
|
-
log_level='DEBUG'
|
607
|
-
)
|
608
|
-
|
609
|
-
# Use RQ-specific features
|
610
|
-
job = rq_manager.add_job(
|
611
|
-
func=add_numbers,
|
612
|
-
func_args=(100, 200),
|
613
|
-
queue_name='high_priority',
|
614
|
-
timeout=300,
|
615
|
-
retry=3,
|
616
|
-
result_ttl=3600
|
617
|
-
)
|
618
|
-
|
619
|
-
# Start worker for specific queues
|
620
|
-
rq_manager.start_worker(
|
621
|
-
queue_names=['high_priority', 'default'],
|
622
|
-
background=True
|
623
|
-
)
|
624
|
-
|
625
|
-
# Monitor jobs and queues
|
626
|
-
jobs = rq_manager.get_jobs()
|
627
|
-
schedules = rq_manager.get_schedules()
|
628
|
-
|
629
|
-
print(f"Active jobs: {len(jobs)}")
|
630
|
-
print(f"Active schedules: {len(schedules)}")
|
631
|
-
```
|
632
|
-
|
633
|
-
**When to use Job Queue-only approach:**
|
634
|
-
- General task processing and background jobs
|
635
|
-
- When you need fine-grained control over job queue behavior
|
636
|
-
- Microservices that only handle specific job types
|
637
|
-
- Integration with existing RQ-based systems
|
638
|
-
- When you don't need Hamilton-based pipeline functionality
|
639
|
-
|
640
|
-
### Combining Both Approaches
|
641
|
-
|
642
|
-
You can also combine both managers for custom workflows:
|
643
|
-
|
644
|
-
```python
|
645
|
-
from flowerpower.pipeline import PipelineManager
|
646
|
-
from flowerpower.job_queue import JobQueueManager
|
647
|
-
|
648
|
-
# Initialize both managers
|
649
|
-
pm = PipelineManager(base_dir='.')
|
650
|
-
jqm = JobQueueManager(type='rq', name='combined_worker', base_dir='.')
|
651
|
-
|
652
|
-
# Create a custom function that runs a pipeline
|
653
|
-
def run_pipeline_task(pipeline_name: str, inputs: dict = None):
|
654
|
-
"""Custom task that executes a pipeline."""
|
655
|
-
result = pm.run(pipeline_name, inputs=inputs)
|
656
|
-
return result
|
657
|
-
|
658
|
-
# Enqueue pipeline execution as a job
|
659
|
-
job_id = jqm.enqueue(
|
660
|
-
run_pipeline_task,
|
661
|
-
'my_pipeline',
|
662
|
-
{'param': 'value'}
|
663
|
-
)
|
664
|
-
|
665
|
-
# Start worker to process the pipeline jobs
|
666
|
-
jqm.start_worker()
|
667
|
-
```
|
668
|
-
|
669
|
-
**Benefits of FlowerPowerProject vs Direct Usage:**
|
670
|
-
|
671
|
-
| Approach | Benefits | Use Cases |
|
672
|
-
|----------|----------|-----------|
|
673
|
-
| **FlowerPowerProject** | - Unified interface<br>- Automatic dependency injection<br>- Simplified configuration<br>- Best practices built-in | - Most applications<br>- Rapid development<br>- Full feature integration |
|
674
|
-
| **Pipeline-only** | - Lightweight<br>- No Redis dependency<br>- Simple synchronous execution | - Testing<br>- Simple workflows<br>- No background processing needed |
|
675
|
-
| **Job Queue-only** | - Fine-grained control<br>- Custom job types<br>- Existing RQ integration | - Microservices<br>- Custom task processing<br>- Non-pipeline jobs |
|
676
|
-
|
677
|
-
## 🖥️ UI
|
678
|
-
|
679
|
-
The FlowerPower web UI (Hamilton UI) provides a graphical interface for monitoring and managing your pipelines. It allows you to visualize pipeline runs, schedules, and potentially manage configurations.
|
680
|
-
|
681
|
-
```bash
|
682
|
-
# Start the web UI
|
683
|
-
flowerpower ui
|
684
|
-
```
|
685
|
-
|
686
|
-
## 📖 Documentation
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
## 📜 License
|
692
|
-
|
693
|
-
This project is licensed under the MIT License - see the `LICENSE` file for details. (Placeholder - update with actual license)
|