FlowerPower 0.9.12.4__tar.gz → 1.0.0b1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. flowerpower-1.0.0b1/PKG-INFO +324 -0
  2. flowerpower-1.0.0b1/README.md +236 -0
  3. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/pyproject.toml +22 -30
  4. flowerpower-1.0.0b1/src/FlowerPower.egg-info/PKG-INFO +324 -0
  5. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/FlowerPower.egg-info/SOURCES.txt +39 -15
  6. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/FlowerPower.egg-info/requires.txt +20 -8
  7. flowerpower-1.0.0b1/src/flowerpower/__init__.py +20 -0
  8. flowerpower-1.0.0b1/src/flowerpower/cfg/__init__.py +254 -0
  9. flowerpower-1.0.0b1/src/flowerpower/cfg/base.py +137 -0
  10. flowerpower-1.0.0b1/src/flowerpower/cfg/pipeline/__init__.py +254 -0
  11. flowerpower-1.0.0b1/src/flowerpower/cfg/pipeline/adapter.py +66 -0
  12. flowerpower-1.0.0b1/src/flowerpower/cfg/pipeline/run.py +47 -0
  13. flowerpower-1.0.0b1/src/flowerpower/cfg/pipeline/schedule.py +74 -0
  14. flowerpower-1.0.0b1/src/flowerpower/cfg/project/__init__.py +149 -0
  15. flowerpower-1.0.0b1/src/flowerpower/cfg/project/adapter.py +57 -0
  16. flowerpower-1.0.0b1/src/flowerpower/cfg/project/job_queue.py +165 -0
  17. flowerpower-1.0.0b1/src/flowerpower/cli/__init__.py +145 -0
  18. flowerpower-1.0.0b1/src/flowerpower/cli/job_queue.py +878 -0
  19. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/cli/mqtt.py +49 -4
  20. flowerpower-1.0.0b1/src/flowerpower/cli/pipeline.py +800 -0
  21. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/cli/utils.py +55 -0
  22. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/flowerpower.py +12 -7
  23. flowerpower-1.0.0b1/src/flowerpower/fs/__init__.py +28 -0
  24. flowerpower-1.0.0b1/src/flowerpower/fs/base.py +643 -0
  25. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/fs/ext.py +797 -216
  26. flowerpower-1.0.0b1/src/flowerpower/fs/storage_options.py +1352 -0
  27. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/base.py +13 -18
  28. flowerpower-1.0.0b1/src/flowerpower/io/loader/__init__.py +28 -0
  29. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/deltatable.py +7 -10
  30. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/metadata.py +1 -0
  31. flowerpower-1.0.0b1/src/flowerpower/io/saver/__init__.py +28 -0
  32. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/deltatable.py +4 -3
  33. flowerpower-1.0.0b1/src/flowerpower/job_queue/__init__.py +252 -0
  34. flowerpower-1.0.0b1/src/flowerpower/job_queue/apscheduler/__init__.py +11 -0
  35. flowerpower-1.0.0b1/src/flowerpower/job_queue/apscheduler/_setup/datastore.py +110 -0
  36. flowerpower-1.0.0b1/src/flowerpower/job_queue/apscheduler/_setup/eventbroker.py +93 -0
  37. flowerpower-1.0.0b1/src/flowerpower/job_queue/apscheduler/manager.py +1063 -0
  38. flowerpower-1.0.0b1/src/flowerpower/job_queue/apscheduler/setup.py +524 -0
  39. flowerpower-1.0.0b1/src/flowerpower/job_queue/apscheduler/trigger.py +169 -0
  40. flowerpower-1.0.0b1/src/flowerpower/job_queue/apscheduler/utils.py +309 -0
  41. flowerpower-1.0.0b1/src/flowerpower/job_queue/base.py +382 -0
  42. flowerpower-1.0.0b1/src/flowerpower/job_queue/rq/__init__.py +10 -0
  43. flowerpower-1.0.0b1/src/flowerpower/job_queue/rq/_trigger.py +37 -0
  44. flowerpower-1.0.0b1/src/flowerpower/job_queue/rq/concurrent_workers/gevent_worker.py +226 -0
  45. flowerpower-1.0.0b1/src/flowerpower/job_queue/rq/concurrent_workers/thread_worker.py +231 -0
  46. flowerpower-1.0.0b1/src/flowerpower/job_queue/rq/manager.py +1449 -0
  47. flowerpower-1.0.0b1/src/flowerpower/job_queue/rq/setup.py +150 -0
  48. flowerpower-1.0.0b1/src/flowerpower/job_queue/rq/utils.py +69 -0
  49. flowerpower-1.0.0b1/src/flowerpower/pipeline/__init__.py +5 -0
  50. flowerpower-1.0.0b1/src/flowerpower/pipeline/base.py +118 -0
  51. flowerpower-1.0.0b1/src/flowerpower/pipeline/io.py +407 -0
  52. flowerpower-1.0.0b1/src/flowerpower/pipeline/job_queue.py +505 -0
  53. flowerpower-1.0.0b1/src/flowerpower/pipeline/manager.py +1586 -0
  54. flowerpower-1.0.0b1/src/flowerpower/pipeline/registry.py +560 -0
  55. flowerpower-1.0.0b1/src/flowerpower/pipeline/runner.py +560 -0
  56. flowerpower-1.0.0b1/src/flowerpower/pipeline/visualizer.py +142 -0
  57. flowerpower-1.0.0b1/src/flowerpower/plugins/mqtt/__init__.py +12 -0
  58. flowerpower-1.0.0b1/src/flowerpower/plugins/mqtt/cfg.py +16 -0
  59. flowerpower-1.0.0b1/src/flowerpower/plugins/mqtt/manager.py +789 -0
  60. flowerpower-1.0.0b1/src/flowerpower/settings.py +110 -0
  61. flowerpower-1.0.0b1/src/flowerpower/utils/logging.py +21 -0
  62. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/utils/misc.py +57 -9
  63. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/utils/sql.py +122 -24
  64. flowerpower-1.0.0b1/src/flowerpower/utils/templates.py +50 -0
  65. flowerpower-1.0.0b1/src/flowerpower/web/app.py +0 -0
  66. flowerpower-0.9.12.4/PKG-INFO +0 -575
  67. flowerpower-0.9.12.4/README.md +0 -497
  68. flowerpower-0.9.12.4/src/FlowerPower.egg-info/PKG-INFO +0 -575
  69. flowerpower-0.9.12.4/src/flowerpower/__init__.py +0 -5
  70. flowerpower-0.9.12.4/src/flowerpower/cfg/__init__.py +0 -202
  71. flowerpower-0.9.12.4/src/flowerpower/cfg/base.py +0 -39
  72. flowerpower-0.9.12.4/src/flowerpower/cfg/pipeline/run.py +0 -18
  73. flowerpower-0.9.12.4/src/flowerpower/cfg/pipeline/schedule.py +0 -84
  74. flowerpower-0.9.12.4/src/flowerpower/cfg/pipeline/tracker.py +0 -14
  75. flowerpower-0.9.12.4/src/flowerpower/cfg/project/open_telemetry.py +0 -8
  76. flowerpower-0.9.12.4/src/flowerpower/cfg/project/tracker.py +0 -11
  77. flowerpower-0.9.12.4/src/flowerpower/cfg/project/worker.py +0 -19
  78. flowerpower-0.9.12.4/src/flowerpower/cli/__init__.py +0 -88
  79. flowerpower-0.9.12.4/src/flowerpower/cli/pipeline.py +0 -605
  80. flowerpower-0.9.12.4/src/flowerpower/cli/scheduler.py +0 -309
  81. flowerpower-0.9.12.4/src/flowerpower/event_handler.py +0 -23
  82. flowerpower-0.9.12.4/src/flowerpower/fs/__init__.py +0 -10
  83. flowerpower-0.9.12.4/src/flowerpower/fs/base.py +0 -319
  84. flowerpower-0.9.12.4/src/flowerpower/fs/storage_options.py +0 -310
  85. flowerpower-0.9.12.4/src/flowerpower/mqtt.py +0 -525
  86. flowerpower-0.9.12.4/src/flowerpower/pipeline.py +0 -2419
  87. flowerpower-0.9.12.4/src/flowerpower/scheduler.py +0 -680
  88. flowerpower-0.9.12.4/src/flowerpower/tui.py +0 -79
  89. flowerpower-0.9.12.4/src/flowerpower/utils/datastore.py +0 -186
  90. flowerpower-0.9.12.4/src/flowerpower/utils/eventbroker.py +0 -127
  91. flowerpower-0.9.12.4/src/flowerpower/utils/executor.py +0 -58
  92. flowerpower-0.9.12.4/src/flowerpower/utils/templates.py +0 -174
  93. flowerpower-0.9.12.4/src/flowerpower/utils/trigger.py +0 -140
  94. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/setup.cfg +0 -0
  95. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/FlowerPower.egg-info/dependency_links.txt +0 -0
  96. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/FlowerPower.egg-info/entry_points.txt +0 -0
  97. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/FlowerPower.egg-info/top_level.txt +0 -0
  98. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/cli/cfg.py +0 -0
  99. /flowerpower-0.9.12.4/src/flowerpower/cfg/pipeline/params.py → /flowerpower-1.0.0b1/src/flowerpower/cli/worker.py +0 -0
  100. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/_duckdb.py +0 -0
  101. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/csv.py +0 -0
  102. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/duckdb.py +0 -0
  103. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/json.py +0 -0
  104. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/mqtt.py +0 -0
  105. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/mssql.py +0 -0
  106. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/mysql.py +0 -0
  107. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/oracle.py +0 -0
  108. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/parquet.py +0 -0
  109. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/postgres.py +0 -0
  110. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/pydala.py +0 -0
  111. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/loader/sqlite.py +0 -0
  112. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/_duckdb.py +0 -0
  113. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/csv.py +0 -0
  114. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/duckdb.py +0 -0
  115. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/json.py +0 -0
  116. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/mqtt.py +0 -0
  117. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/mssql.py +0 -0
  118. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/mysql.py +0 -0
  119. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/oracle.py +0 -0
  120. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/parquet.py +0 -0
  121. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/postgres.py +0 -0
  122. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/pydala.py +0 -0
  123. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/io/saver/sqlite.py +0 -0
  124. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/utils/monkey.py +0 -0
  125. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/utils/open_telemetry.py +0 -0
  126. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/utils/polars.py +0 -0
  127. {flowerpower-0.9.12.4 → flowerpower-1.0.0b1}/src/flowerpower/utils/scheduler.py +0 -0
@@ -0,0 +1,324 @@
1
+ Metadata-Version: 2.4
2
+ Name: FlowerPower
3
+ Version: 1.0.0b1
4
+ Summary: A simple workflow framework. Hamilton + APScheduler = FlowerPower
5
+ Author-email: "Volker L." <ligno.blades@gmail.com>
6
+ Project-URL: Homepage, https://github.com/legout/flowerpower
7
+ Project-URL: Bug Tracker, https://github.com/legout/flowerpower/issues
8
+ Keywords: hamilton,workflow,pipeline,scheduler,apscheduler,dask,ray
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ Requires-Dist: aiobotocore<2.18.0
12
+ Requires-Dist: aiosqlite>=0.21.0
13
+ Requires-Dist: dill>=0.3.8
14
+ Requires-Dist: duration-parser>=1.0.1
15
+ Requires-Dist: fsspec>=2024.10.0
16
+ Requires-Dist: humanize>=4.12.2
17
+ Requires-Dist: msgspec>=0.19.0
18
+ Requires-Dist: munch>=4.0.0
19
+ Requires-Dist: orjson>=3.10.15
20
+ Requires-Dist: pyarrow<19.0.0
21
+ Requires-Dist: pydantic>=2.10.2
22
+ Requires-Dist: python-dotenv>=1.0.1
23
+ Requires-Dist: pyyaml>=6.0.1
24
+ Requires-Dist: rich>=13.9.3
25
+ Requires-Dist: s3fs>=2024.10.0
26
+ Requires-Dist: sf-hamilton-sdk>=0.5.2
27
+ Requires-Dist: sf-hamilton[rich,tqdm,visualization]>=1.69.0
28
+ Requires-Dist: typer>=0.12.3
29
+ Provides-Extra: apscheduler
30
+ Requires-Dist: aiosqlite>=0.21.0; extra == "apscheduler"
31
+ Requires-Dist: apscheduler==4.0.0a5; extra == "apscheduler"
32
+ Requires-Dist: asyncpg>=0.29.0; extra == "apscheduler"
33
+ Requires-Dist: greenlet>=3.0.3; extra == "apscheduler"
34
+ Requires-Dist: sqlalchemy>=2.0.30; extra == "apscheduler"
35
+ Requires-Dist: cron-descriptor>=1.4.5; extra == "apscheduler"
36
+ Provides-Extra: io
37
+ Requires-Dist: adbc-driver-manager>=1.4.0; extra == "io"
38
+ Requires-Dist: datafusion>=43.1.0; extra == "io"
39
+ Requires-Dist: deltalake>=0.24.0; extra == "io"
40
+ Requires-Dist: duckdb>=1.1.3; extra == "io"
41
+ Requires-Dist: orjson>=3.10.12; extra == "io"
42
+ Requires-Dist: pandas>=2.2.3; extra == "io"
43
+ Requires-Dist: polars>=1.15.0; extra == "io"
44
+ Requires-Dist: pyarrow>=18.1.0; extra == "io"
45
+ Requires-Dist: pydala2>=0.9.4.5; extra == "io"
46
+ Requires-Dist: redis>=5.2.1; extra == "io"
47
+ Requires-Dist: sherlock>=0.4.1; extra == "io"
48
+ Provides-Extra: io-legacy
49
+ Requires-Dist: adbc-driver-manager>=1.4.0; extra == "io-legacy"
50
+ Requires-Dist: datafusion>=43.1.0; extra == "io-legacy"
51
+ Requires-Dist: deltalake>=0.24.0; extra == "io-legacy"
52
+ Requires-Dist: duckdb>=1.1.3; extra == "io-legacy"
53
+ Requires-Dist: orjson>=3.10.12; extra == "io-legacy"
54
+ Requires-Dist: pandas>=2.2.3; extra == "io-legacy"
55
+ Requires-Dist: polars-lts-cpu>=1.15.0; extra == "io-legacy"
56
+ Requires-Dist: pyarrow>=18.1.0; extra == "io-legacy"
57
+ Requires-Dist: pydala2>=0.9.4.5; extra == "io-legacy"
58
+ Requires-Dist: redis>=5.2.1; extra == "io-legacy"
59
+ Requires-Dist: sherlock>=0.4.1; extra == "io-legacy"
60
+ Provides-Extra: mongodb
61
+ Requires-Dist: pymongo>=4.7.2; extra == "mongodb"
62
+ Provides-Extra: mqtt
63
+ Requires-Dist: paho-mqtt>=2.1.0; extra == "mqtt"
64
+ Requires-Dist: orjson>=3.10.11; extra == "mqtt"
65
+ Requires-Dist: mmh3>=5.1.0; extra == "mqtt"
66
+ Provides-Extra: opentelemetry
67
+ Requires-Dist: opentelemetry-api>=1.5.0; extra == "opentelemetry"
68
+ Requires-Dist: opentelemetry-sdk>=1.5.0; extra == "opentelemetry"
69
+ Requires-Dist: opentelemetry-exporter-jaeger>=1.21.0; extra == "opentelemetry"
70
+ Provides-Extra: ray
71
+ Requires-Dist: ray>=2.34.0; extra == "ray"
72
+ Provides-Extra: redis
73
+ Requires-Dist: redis>=5.0.4; extra == "redis"
74
+ Provides-Extra: rq
75
+ Requires-Dist: rq>=2.3.1; extra == "rq"
76
+ Requires-Dist: rq-scheduler>=0.14.0; extra == "rq"
77
+ Requires-Dist: cron-descriptor>=1.4.5; extra == "rq"
78
+ Provides-Extra: tui
79
+ Requires-Dist: textual>=0.85.2; extra == "tui"
80
+ Provides-Extra: ui
81
+ Requires-Dist: sf-hamilton-ui>=0.0.11; extra == "ui"
82
+ Provides-Extra: webserver
83
+ Requires-Dist: sanic>=24.6.0; extra == "webserver"
84
+ Requires-Dist: sanic-ext>=23.12.0; extra == "webserver"
85
+ Requires-Dist: orjson>=3.10.11; extra == "webserver"
86
+ Provides-Extra: openlineage
87
+ Requires-Dist: openlineage-python>=1.32.0; extra == "openlineage"
88
+
89
+ <div align="center">
90
+ <h1>FlowerPower 🌸</h1>
91
+ <h3>Simple Workflow Framework - Hamilton + APScheduler = FlowerPower</h3>
92
+ <img src="./image.png" alt="FlowerPower Logo" width="400" height="300">
93
+ </div>
94
+
95
+ A powerful and flexible data pipeline framework that simplifies data processing workflows, job scheduling, and event-driven architectures. FlowerPower combines modern data processing capabilities with robust job queue management and MQTT integration.
96
+
97
+ ## ✨ Features
98
+
99
+ ### Core Features
100
+ - 📊 **Pipeline Management**: Build and run data processing pipelines with support for multiple data formats and computation engines
101
+ - 🔄 **Job Queue Integration**: Support for multiple job queue backends (RQ, APScheduler)
102
+ - 📡 **MQTT Integration**: Built-in support for MQTT-based event processing
103
+ - 🎯 **Resilient Execution**: Automatic retries with configurable backoff and jitter
104
+ - 📊 **Data Format Support**: Work with CSV, JSON, Parquet files and more
105
+ - 🗄️ **Database Connectivity**: Connect to PostgreSQL, MySQL, SQLite, DuckDB, Oracle, and MSSQL
106
+
107
+ ### Additional Features
108
+ - 🛠️ **CLI Tools**: Comprehensive command-line interface for all operations
109
+ - 📈 **Pipeline Visualization**: DAG visualization for pipeline understanding
110
+ - 🔍 **Monitoring**: Integration with OpenTelemetry for observability
111
+ - 🐳 **Docker Support**: Ready-to-use Docker configurations
112
+
113
+ ## 🚀 Quick Start
114
+
115
+ ### Installation
116
+
117
+ ```bash
118
+ # Using pip
119
+ pip install flowerpower
120
+
121
+ # For development installation
122
+ git clone https://github.com/yourusername/flowerpower.git
123
+ cd flowerpower
124
+ pip install -e ".[dev]"
125
+ ```
126
+
127
+ ### Create Your First Pipeline
128
+
129
+ 1. Initialize a new project:
130
+ ```bash
131
+ flowerpower init --name my-first-project
132
+ ```
133
+
134
+ 2. Create a simple pipeline in `pipelines/hello_world.py`:
135
+ ```python
136
+ import pandas as pd
137
+
138
+ def load_data() -> pd.DataFrame:
139
+ """Load sample data"""
140
+ return pd.DataFrame({
141
+ 'name': ['Alice', 'Bob', 'Charlie'],
142
+ 'age': [25, 30, 35]
143
+ })
144
+
145
+ def process_data(df: pd.DataFrame) -> pd.DataFrame:
146
+ """Add a greeting column"""
147
+ df['greeting'] = 'Hello, ' + df['name']
148
+ return df
149
+
150
+ def save_output(df: pd.DataFrame) -> None:
151
+ """Save the processed data"""
152
+ print(df)
153
+ ```
154
+
155
+ 3. Run the pipeline:
156
+ ```bash
157
+ flowerpower pipeline run hello_world
158
+ ```
159
+
160
+ ## 💡 Key Concepts
161
+
162
+ ### Pipeline Management
163
+
164
+ Pipelines are the core building blocks of FlowerPower. They can be:
165
+ - Run directly
166
+ - Scheduled
167
+ - Triggered by MQTT messages
168
+ - Executed as background jobs
169
+
170
+ ```bash
171
+ # Run a pipeline
172
+ flowerpower pipeline run my_pipeline --inputs '{"source": "data.csv"}'
173
+
174
+ # Schedule a pipeline
175
+ flowerpower pipeline schedule my_pipeline --cron "0 * * * *"
176
+
177
+ # Show pipeline structure
178
+ flowerpower pipeline show-dag my_pipeline
179
+ ```
180
+
181
+ ### Job Queue Integration
182
+
183
+ FlowerPower supports multiple job queue backends:
184
+
185
+ ```bash
186
+ # Start a worker with RQ backend
187
+ flowerpower job-queue start-worker --type rq
188
+
189
+ # Start APScheduler worker
190
+ flowerpower job-queue start-worker --type apscheduler
191
+
192
+ # Add a job with retry configuration
193
+ flowerpower job-queue add-job my_pipeline \
194
+ --max-retries 3 \
195
+ --retry-delay 2.0 \
196
+ --jitter-factor 0.1
197
+ ```
198
+
199
+ ### MQTT Integration
200
+
201
+ Connect your pipelines to MQTT message brokers:
202
+
203
+ ```bash
204
+ # Run a pipeline when messages arrive
205
+ flowerpower mqtt run-pipeline-on-message my_pipeline \
206
+ --topic "sensors/data" \
207
+ --max-retries 3 \
208
+ --retry-delay 1.0
209
+
210
+ # Start a custom message listener
211
+ flowerpower mqtt start-listener \
212
+ --on-message process_message \
213
+ --topic "events/#"
214
+ ```
215
+
216
+ ## 📁 Project Structure
217
+
218
+ ```
219
+ my-project/
220
+ ├── conf/
221
+ │ ├── project.yml # Project configuration
222
+ │ └── pipelines/ # Pipeline configurations
223
+ │ └── my_pipeline.yml
224
+ ├── pipelines/ # Pipeline implementations
225
+ │ └── my_pipeline.py
226
+ └── data/ # Data files (optional)
227
+ ```
228
+
229
+ ## 🔌 Data Connectors
230
+
231
+ ### Supported File Formats
232
+ - CSV
233
+ - JSON
234
+ - Parquet
235
+ - Pydala Datasets
236
+
237
+ ### Supported Databases
238
+ - PostgreSQL
239
+ - MySQL
240
+ - SQLite
241
+ - Oracle
242
+ - Microsoft SQL Server
243
+ - DuckDB
244
+
245
+ ## 🐳 Docker Support
246
+
247
+ Run FlowerPower in containers:
248
+
249
+ ```bash
250
+ cd docker
251
+ docker-compose up
252
+ ```
253
+
254
+ The Docker setup includes:
255
+ - Python worker environment
256
+ - MQTT broker (Mosquitto)
257
+ - Built-in configuration
258
+
259
+ ## 🛠️ Configuration
260
+
261
+ ### Pipeline Configuration
262
+ ```yaml
263
+ # conf/pipelines/my_pipeline.yml
264
+ name: my_pipeline
265
+ description: Example pipeline configuration
266
+ inputs:
267
+ source_data:
268
+ type: csv
269
+ path: data/input.csv
270
+ outputs:
271
+ processed_data:
272
+ type: parquet
273
+ path: data/output.parquet
274
+ ```
275
+
276
+ ### Job Queue Configuration
277
+ ```yaml
278
+ # conf/project.yml
279
+ job_queue:
280
+ type: rq # or apscheduler
281
+ redis_url: redis://localhost:6379
282
+ max_retries: 3
283
+ retry_delay: 1.0
284
+ ```
285
+
286
+ ## 📚 API Documentation
287
+
288
+ Visit our [API Documentation](docs/api.md) for detailed information about:
289
+ - Pipeline API
290
+ - Job Queue API
291
+ - MQTT Integration
292
+ - Data Connectors
293
+ - Configuration Options
294
+
295
+ ## 🧪 Running Tests
296
+
297
+ ```bash
298
+ # Run all tests
299
+ pytest tests/
300
+
301
+ # Run specific test category
302
+ pytest tests/test_pipeline/
303
+ ```
304
+
305
+ ## 🤝 Contributing
306
+
307
+ We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
308
+
309
+ 1. Fork the repository
310
+ 2. Create your feature branch
311
+ 3. Commit your changes
312
+ 4. Push to the branch
313
+ 5. Create a Pull Request
314
+
315
+ ## 📄 License
316
+
317
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
318
+
319
+ ## 🙏 Acknowledgments
320
+
321
+ - Built with [Hamilton](https://github.com/DAGWorks-Inc/hamilton) for pipeline execution
322
+ - Uses [RQ](https://python-rq.org/) and [APScheduler](https://apscheduler.readthedocs.io/) for job queues
323
+ - MQTT support via [Paho MQTT](https://www.eclipse.org/paho/)
324
+ - Database connectivity through SQLAlchemy and native connectors
@@ -0,0 +1,236 @@
1
+ <div align="center">
2
+ <h1>FlowerPower 🌸</h1>
3
+ <h3>Simple Workflow Framework - Hamilton + APScheduler = FlowerPower</h3>
4
+ <img src="./image.png" alt="FlowerPower Logo" width="400" height="300">
5
+ </div>
6
+
7
+ A powerful and flexible data pipeline framework that simplifies data processing workflows, job scheduling, and event-driven architectures. FlowerPower combines modern data processing capabilities with robust job queue management and MQTT integration.
8
+
9
+ ## ✨ Features
10
+
11
+ ### Core Features
12
+ - 📊 **Pipeline Management**: Build and run data processing pipelines with support for multiple data formats and computation engines
13
+ - 🔄 **Job Queue Integration**: Support for multiple job queue backends (RQ, APScheduler)
14
+ - 📡 **MQTT Integration**: Built-in support for MQTT-based event processing
15
+ - 🎯 **Resilient Execution**: Automatic retries with configurable backoff and jitter
16
+ - 📊 **Data Format Support**: Work with CSV, JSON, Parquet files and more
17
+ - 🗄️ **Database Connectivity**: Connect to PostgreSQL, MySQL, SQLite, DuckDB, Oracle, and MSSQL
18
+
19
+ ### Additional Features
20
+ - 🛠️ **CLI Tools**: Comprehensive command-line interface for all operations
21
+ - 📈 **Pipeline Visualization**: DAG visualization for pipeline understanding
22
+ - 🔍 **Monitoring**: Integration with OpenTelemetry for observability
23
+ - 🐳 **Docker Support**: Ready-to-use Docker configurations
24
+
25
+ ## 🚀 Quick Start
26
+
27
+ ### Installation
28
+
29
+ ```bash
30
+ # Using pip
31
+ pip install flowerpower
32
+
33
+ # For development installation
34
+ git clone https://github.com/yourusername/flowerpower.git
35
+ cd flowerpower
36
+ pip install -e ".[dev]"
37
+ ```
38
+
39
+ ### Create Your First Pipeline
40
+
41
+ 1. Initialize a new project:
42
+ ```bash
43
+ flowerpower init --name my-first-project
44
+ ```
45
+
46
+ 2. Create a simple pipeline in `pipelines/hello_world.py`:
47
+ ```python
48
+ import pandas as pd
49
+
50
+ def load_data() -> pd.DataFrame:
51
+ """Load sample data"""
52
+ return pd.DataFrame({
53
+ 'name': ['Alice', 'Bob', 'Charlie'],
54
+ 'age': [25, 30, 35]
55
+ })
56
+
57
+ def process_data(df: pd.DataFrame) -> pd.DataFrame:
58
+ """Add a greeting column"""
59
+ df['greeting'] = 'Hello, ' + df['name']
60
+ return df
61
+
62
+ def save_output(df: pd.DataFrame) -> None:
63
+ """Save the processed data"""
64
+ print(df)
65
+ ```
66
+
67
+ 3. Run the pipeline:
68
+ ```bash
69
+ flowerpower pipeline run hello_world
70
+ ```
71
+
72
+ ## 💡 Key Concepts
73
+
74
+ ### Pipeline Management
75
+
76
+ Pipelines are the core building blocks of FlowerPower. They can be:
77
+ - Run directly
78
+ - Scheduled
79
+ - Triggered by MQTT messages
80
+ - Executed as background jobs
81
+
82
+ ```bash
83
+ # Run a pipeline
84
+ flowerpower pipeline run my_pipeline --inputs '{"source": "data.csv"}'
85
+
86
+ # Schedule a pipeline
87
+ flowerpower pipeline schedule my_pipeline --cron "0 * * * *"
88
+
89
+ # Show pipeline structure
90
+ flowerpower pipeline show-dag my_pipeline
91
+ ```
92
+
93
+ ### Job Queue Integration
94
+
95
+ FlowerPower supports multiple job queue backends:
96
+
97
+ ```bash
98
+ # Start a worker with RQ backend
99
+ flowerpower job-queue start-worker --type rq
100
+
101
+ # Start APScheduler worker
102
+ flowerpower job-queue start-worker --type apscheduler
103
+
104
+ # Add a job with retry configuration
105
+ flowerpower job-queue add-job my_pipeline \
106
+ --max-retries 3 \
107
+ --retry-delay 2.0 \
108
+ --jitter-factor 0.1
109
+ ```
110
+
111
+ ### MQTT Integration
112
+
113
+ Connect your pipelines to MQTT message brokers:
114
+
115
+ ```bash
116
+ # Run a pipeline when messages arrive
117
+ flowerpower mqtt run-pipeline-on-message my_pipeline \
118
+ --topic "sensors/data" \
119
+ --max-retries 3 \
120
+ --retry-delay 1.0
121
+
122
+ # Start a custom message listener
123
+ flowerpower mqtt start-listener \
124
+ --on-message process_message \
125
+ --topic "events/#"
126
+ ```
127
+
128
+ ## 📁 Project Structure
129
+
130
+ ```
131
+ my-project/
132
+ ├── conf/
133
+ │ ├── project.yml # Project configuration
134
+ │ └── pipelines/ # Pipeline configurations
135
+ │ └── my_pipeline.yml
136
+ ├── pipelines/ # Pipeline implementations
137
+ │ └── my_pipeline.py
138
+ └── data/ # Data files (optional)
139
+ ```
140
+
141
+ ## 🔌 Data Connectors
142
+
143
+ ### Supported File Formats
144
+ - CSV
145
+ - JSON
146
+ - Parquet
147
+ - Pydala Datasets
148
+
149
+ ### Supported Databases
150
+ - PostgreSQL
151
+ - MySQL
152
+ - SQLite
153
+ - Oracle
154
+ - Microsoft SQL Server
155
+ - DuckDB
156
+
157
+ ## 🐳 Docker Support
158
+
159
+ Run FlowerPower in containers:
160
+
161
+ ```bash
162
+ cd docker
163
+ docker-compose up
164
+ ```
165
+
166
+ The Docker setup includes:
167
+ - Python worker environment
168
+ - MQTT broker (Mosquitto)
169
+ - Built-in configuration
170
+
171
+ ## 🛠️ Configuration
172
+
173
+ ### Pipeline Configuration
174
+ ```yaml
175
+ # conf/pipelines/my_pipeline.yml
176
+ name: my_pipeline
177
+ description: Example pipeline configuration
178
+ inputs:
179
+ source_data:
180
+ type: csv
181
+ path: data/input.csv
182
+ outputs:
183
+ processed_data:
184
+ type: parquet
185
+ path: data/output.parquet
186
+ ```
187
+
188
+ ### Job Queue Configuration
189
+ ```yaml
190
+ # conf/project.yml
191
+ job_queue:
192
+ type: rq # or apscheduler
193
+ redis_url: redis://localhost:6379
194
+ max_retries: 3
195
+ retry_delay: 1.0
196
+ ```
197
+
198
+ ## 📚 API Documentation
199
+
200
+ Visit our [API Documentation](docs/api.md) for detailed information about:
201
+ - Pipeline API
202
+ - Job Queue API
203
+ - MQTT Integration
204
+ - Data Connectors
205
+ - Configuration Options
206
+
207
+ ## 🧪 Running Tests
208
+
209
+ ```bash
210
+ # Run all tests
211
+ pytest tests/
212
+
213
+ # Run specific test category
214
+ pytest tests/test_pipeline/
215
+ ```
216
+
217
+ ## 🤝 Contributing
218
+
219
+ We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
220
+
221
+ 1. Fork the repository
222
+ 2. Create your feature branch
223
+ 3. Commit your changes
224
+ 4. Push to the branch
225
+ 5. Create a Pull Request
226
+
227
+ ## 📄 License
228
+
229
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
230
+
231
+ ## 🙏 Acknowledgments
232
+
233
+ - Built with [Hamilton](https://github.com/DAGWorks-Inc/hamilton) for pipeline execution
234
+ - Uses [RQ](https://python-rq.org/) and [APScheduler](https://apscheduler.readthedocs.io/) for job queues
235
+ - MQTT support via [Paho MQTT](https://www.eclipse.org/paho/)
236
+ - Database connectivity through SQLAlchemy and native connectors
@@ -4,15 +4,18 @@ description = "A simple workflow framework. Hamilton + APScheduler = FlowerPower
4
4
  authors = [{ name = "Volker L.", email = "ligno.blades@gmail.com" }]
5
5
  readme = "README.md"
6
6
  requires-python = ">= 3.11"
7
- version = "0.9.12.4"
7
+ version = "1.0.0b1"
8
8
  keywords = ["hamilton", "workflow", "pipeline", "scheduler", "apscheduler", "dask", "ray"]
9
9
  dependencies = [
10
10
  'aiobotocore<2.18.0',
11
+ "aiosqlite>=0.21.0",
11
12
  'dill>=0.3.8',
13
+ "duration-parser>=1.0.1",
12
14
  'fsspec>=2024.10.0',
15
+ "humanize>=4.12.2",
16
+ "msgspec>=0.19.0",
13
17
  'munch>=4.0.0',
14
18
  "orjson>=3.10.15",
15
- 'pendulum>=3.0.0',
16
19
  'pyarrow<19.0.0',
17
20
  'pydantic>=2.10.2',
18
21
  'python-dotenv>=1.0.1',
@@ -21,7 +24,6 @@ dependencies = [
21
24
  's3fs>=2024.10.0',
22
25
  'sf-hamilton-sdk>=0.5.2',
23
26
  'sf-hamilton[visualization,rich,tqdm]>=1.69.0',
24
- 'tqdm>=4.67.1',
25
27
  'typer>=0.12.3',
26
28
  ]
27
29
 
@@ -34,6 +36,8 @@ dependencies = [
34
36
  flowerpower = "flowerpower.cli:app"
35
37
 
36
38
  [project.optional-dependencies]
39
+ apscheduler = ['aiosqlite>=0.21.0', 'apscheduler==4.0.0a5', 'asyncpg>=0.29.0', 'greenlet>=3.0.3', 'sqlalchemy>=2.0.30', "cron-descriptor>=1.4.5",
40
+ ]
37
41
  io = [
38
42
  "adbc-driver-manager>=1.4.0",
39
43
  #"connectorx>=0.4.1",
@@ -63,19 +67,27 @@ io-legacy = [
63
67
  "sherlock>=0.4.1",
64
68
  ]
65
69
  mongodb = ["pymongo>=4.7.2"]
66
- mqtt = ["paho-mqtt>=2.1.0", "orjson>=3.10.11"]
70
+ mqtt = [
71
+ "paho-mqtt>=2.1.0",
72
+ "orjson>=3.10.11",
73
+ "mmh3>=5.1.0",
74
+ ]
67
75
  opentelemetry = [
68
76
  "opentelemetry-api>=1.5.0",
69
77
  "opentelemetry-sdk>=1.5.0",
70
- "opentelemetry-exporter-jaeger>=1.21.0", #"sf-hamilton[opentelemetry]>=1.83.3",
78
+ "opentelemetry-exporter-jaeger>=1.21.0", #"sf-hamilton[opentelemetry]>=1.83.3"
71
79
  ]
72
80
  ray = ["ray>=2.34.0"]
73
81
  redis = ["redis>=5.0.4"]
74
- scheduler = ['aiosqlite>=0.21.0', 'apscheduler>=4.0.0a5', 'asyncpg>=0.29.0', 'greenlet>=3.0.3', 'sqlalchemy>=2.0.30']
75
-
82
+ rq = [ "rq>=2.3.1",
83
+ "rq-scheduler>=0.14.0", "cron-descriptor>=1.4.5",
84
+ ]
76
85
  tui = ["textual>=0.85.2"]
77
86
  ui = ["sf-hamilton-ui>=0.0.11"]
78
87
  webserver = ["sanic>=24.6.0", "sanic-ext>=23.12.0", "orjson>=3.10.11"]
88
+ openlineage = [
89
+ "openlineage-python>=1.32.0",
90
+ ]
79
91
 
80
92
 
81
93
  [tool.uv]
@@ -83,32 +95,12 @@ dev-dependencies = [
83
95
  "ipython>=8.24.0",
84
96
  "isort>=5.13.2",
85
97
  "ruff>=0.7.1",
86
- "polars>=1.12.0",
87
- "duckdb>=1.1.3",
88
98
  "jupyterlab>=4.3.0",
89
- "deltalake>=0.21.0",
90
- "datafusion>=42.0.0",
91
- "orjson>=3.10.14",
92
- "joblib>=1.4.2",
93
- "sanic>=24.12.0",
94
- "sanic-ext>=23.12.0",
95
- "apscheduler>=4.0.0a5",
96
- "sqlalchemy>=2.0.37",
97
- "asyncpg>=0.30.0",
98
- "paho-mqtt>=2.1.0",
99
- "greenlet>=3.1.1",
100
- "obstore>=0.3.0",
101
99
  "pytest>=8.3.4",
102
100
  "mocker>=1.1.1",
103
101
  "marimo>=0.10.19",
104
- "panel>=1.6.0",
105
- "ipywidgets>=8.1.5",
106
- "scikit-learn>=1.6.1",
107
- "datamodel-code-generator>=0.27.2",
108
- "arro3-core>=0.4.5",
109
- "adbc-driver-manager>=1.4.0",
110
- "adbc-driver-sqlite>=1.4.0",
111
- "psycopg>=3.2.6",
112
- "asyncer>=0.0.8",
102
+ "pre-commit>=4.2.0",
113
103
  ]
114
104
  package = true
105
+
106
+