scalable-pypeline 1.2.2__tar.gz → 2.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scalable-pypeline-2.0.1/PKG-INFO +191 -0
- scalable-pypeline-2.0.1/README.md +175 -0
- scalable-pypeline-2.0.1/pypeline/__init__.py +1 -0
- scalable-pypeline-2.0.1/pypeline/barrier.py +34 -0
- scalable-pypeline-2.0.1/pypeline/composition.py +348 -0
- scalable-pypeline-2.0.1/pypeline/constants.py +72 -0
- scalable-pypeline-2.0.1/pypeline/dramatiq.py +470 -0
- scalable-pypeline-2.0.1/pypeline/extensions.py +17 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/pypeline/flask/__init__.py +3 -5
- scalable-pypeline-2.0.1/pypeline/flask/api/pipelines.py +222 -0
- scalable-pypeline-2.0.1/pypeline/flask/api/schedules.py +42 -0
- scalable-pypeline-2.0.1/pypeline/flask/decorators.py +41 -0
- scalable-pypeline-2.0.1/pypeline/flask/flask_pypeline.py +156 -0
- scalable-pypeline-2.0.1/pypeline/middleware.py +61 -0
- scalable-pypeline-2.0.1/pypeline/pipeline_config_schema.py +249 -0
- scalable-pypeline-2.0.1/pypeline/pypeline_yaml.py +458 -0
- scalable-pypeline-2.0.1/pypeline/schedule_config_schema.py +125 -0
- scalable-pypeline-2.0.1/pypeline/utils/config_utils.py +68 -0
- scalable-pypeline-2.0.1/pypeline/utils/module_utils.py +83 -0
- scalable-pypeline-2.0.1/pypeline/utils/pipeline_utils.py +161 -0
- scalable-pypeline-2.0.1/requirements.txt +6 -0
- scalable-pypeline-2.0.1/scalable_pypeline.egg-info/PKG-INFO +191 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/SOURCES.txt +8 -14
- scalable-pypeline-2.0.1/scalable_pypeline.egg-info/entry_points.txt +3 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/requires.txt +7 -13
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/setup.py +85 -73
- scalable-pypeline-1.2.2/PKG-INFO +0 -131
- scalable-pypeline-1.2.2/README.md +0 -115
- scalable-pypeline-1.2.2/pypeline/__init__.py +0 -1
- scalable-pypeline-1.2.2/pypeline/celery.py +0 -206
- scalable-pypeline-1.2.2/pypeline/celery_beat.py +0 -254
- scalable-pypeline-1.2.2/pypeline/constants.py +0 -105
- scalable-pypeline-1.2.2/pypeline/extensions.py +0 -16
- scalable-pypeline-1.2.2/pypeline/flask/api/pipelines.py +0 -261
- scalable-pypeline-1.2.2/pypeline/flask/api/schedules.py +0 -67
- scalable-pypeline-1.2.2/pypeline/flask/api/utils.py +0 -35
- scalable-pypeline-1.2.2/pypeline/flask/decorators.py +0 -76
- scalable-pypeline-1.2.2/pypeline/flask/flask_sermos.py +0 -156
- scalable-pypeline-1.2.2/pypeline/generators.py +0 -196
- scalable-pypeline-1.2.2/pypeline/logging_config.py +0 -171
- scalable-pypeline-1.2.2/pypeline/pipeline/chained_task.py +0 -70
- scalable-pypeline-1.2.2/pypeline/pipeline/generator.py +0 -254
- scalable-pypeline-1.2.2/pypeline/pipeline_config_schema.py +0 -236
- scalable-pypeline-1.2.2/pypeline/schedule_config_schema.py +0 -210
- scalable-pypeline-1.2.2/pypeline/sermos_yaml.py +0 -442
- scalable-pypeline-1.2.2/pypeline/utils/config_utils.py +0 -326
- scalable-pypeline-1.2.2/pypeline/utils/graph_utils.py +0 -144
- scalable-pypeline-1.2.2/pypeline/utils/module_utils.py +0 -119
- scalable-pypeline-1.2.2/pypeline/utils/task_utils.py +0 -552
- scalable-pypeline-1.2.2/requirements.txt +0 -16
- scalable-pypeline-1.2.2/scalable_pypeline.egg-info/PKG-INFO +0 -131
- scalable-pypeline-1.2.2/scalable_pypeline.egg-info/entry_points.txt +0 -2
- scalable-pypeline-1.2.2/tests/fixtures/__init__.py +0 -1
- scalable-pypeline-1.2.2/tests/fixtures/s3_fixtures.py +0 -52
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/LICENSE +0 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/MANIFEST.in +0 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/pypeline/flask/api/__init__.py +0 -0
- {scalable-pypeline-1.2.2/pypeline/pipeline → scalable-pypeline-2.0.1/pypeline/utils}/__init__.py +0 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/dependency_links.txt +0 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/top_level.txt +0 -0
- {scalable-pypeline-1.2.2 → scalable-pypeline-2.0.1}/setup.cfg +0 -0
- {scalable-pypeline-1.2.2/pypeline/utils → scalable-pypeline-2.0.1/tests/fixtures}/__init__.py +0 -0
@@ -0,0 +1,191 @@
|
|
1
|
+
Metadata-Version: 2.1
|
2
|
+
Name: scalable-pypeline
|
3
|
+
Version: 2.0.1
|
4
|
+
Summary: PypeLine - Python pipelines for the Real World
|
5
|
+
Home-page: https://gitlab.com/bravos2/pypeline
|
6
|
+
Author: Bravos Power Corporation
|
7
|
+
License: Apache License 2.0
|
8
|
+
Description-Content-Type: text/markdown
|
9
|
+
Provides-Extra: build
|
10
|
+
Provides-Extra: flask
|
11
|
+
Provides-Extra: web
|
12
|
+
Provides-Extra: workers
|
13
|
+
Provides-Extra: dev
|
14
|
+
Provides-Extra: test
|
15
|
+
License-File: LICENSE
|
16
|
+
|
17
|
+
```
|
18
|
+
______ __ ________ _____ _ _____ _ _ _____
|
19
|
+
| ___ \\ \ / /| ___ \| ___|| | |_ _|| \ | || ___|
|
20
|
+
| |_/ / \ V / | |_/ /| |__ | | | | | \| || |__
|
21
|
+
| __/ \ / | __/ | __| | | | | | . ` || __|
|
22
|
+
| | | | | | | |___ | |_____| |_ | |\ || |___
|
23
|
+
\_| \_/ \_| \____/ \_____/\___/ \_| \_/\____/
|
24
|
+
```
|
25
|
+
|
26
|
+
## Overview
|
27
|
+
|
28
|
+
PypeLine is a versatile open-source library designed to streamline the management of data workflows and APIs. With PypeLine, you can efficiently schedule cron jobs, execute complex Directed Acyclical Graph (DAG) pipelines, and set up a Flask API complete with OpenAPI documentation.
|
29
|
+
|
30
|
+
#### Key Features
|
31
|
+
- Cron Job Scheduling: Easily schedule recurring tasks with flexible cron job functionality, ensuring that your processes run reliably at specified intervals.
|
32
|
+
- DAG Pipelines: Define and execute DAGs to manage complex data workflows with dependencies. PypeLine handles the execution order and parallelism, ensuring that each task runs in the correct sequence.
|
33
|
+
- Flask API with OpenAPI: Quickly configure a RESTful API using Flask, with built-in support for OpenAPI documentation, allowing for clear, standardized documentation of your endpoints.
|
34
|
+
|
35
|
+
## Requirements
|
36
|
+
|
37
|
+
- RabbitMQ
|
38
|
+
- Redis
|
39
|
+
- Docker (optional for dev)
|
40
|
+
|
41
|
+
## Getting Started
|
42
|
+
|
43
|
+
Install PypeLines:
|
44
|
+
|
45
|
+
```commandline
|
46
|
+
pip install scalable-pypeline[flask,web,workers]>=1.2.3
|
47
|
+
```
|
48
|
+
|
49
|
+
Configure your Flask project (app.py)
|
50
|
+
|
51
|
+
```python
|
52
|
+
from flask import Flask
|
53
|
+
from pypeline.flask import FlaskPypeline
|
54
|
+
from pypeline_demo.api import bp
|
55
|
+
from pypeline_demo.config import Config
|
56
|
+
from pypeline_demo.extensions import dramatiq
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
def create_app():
|
61
|
+
app = Flask(__name__)
|
62
|
+
|
63
|
+
dramatiq.init_app(app)
|
64
|
+
|
65
|
+
# Initialize your app with a configuration
|
66
|
+
app.config.from_object(Config)
|
67
|
+
|
68
|
+
pypeline = FlaskPypeline()
|
69
|
+
pypeline.init_app(app, init_api=True)
|
70
|
+
|
71
|
+
# Register API blueprints you wish
|
72
|
+
app.extensions["pypeline_core_api"].register_blueprint(bp)
|
73
|
+
# Register application blueprints to application
|
74
|
+
app.register_blueprint(bp)
|
75
|
+
|
76
|
+
return app
|
77
|
+
|
78
|
+
|
79
|
+
if __name__ == "__main__":
|
80
|
+
app = create_app()
|
81
|
+
app.run(port=5001)
|
82
|
+
```
|
83
|
+
|
84
|
+
Configure Dramatiq extension (extensions.py)
|
85
|
+
|
86
|
+
```python
|
87
|
+
from pypeline.dramatiq import Dramatiq
|
88
|
+
|
89
|
+
|
90
|
+
dramatiq = Dramatiq()
|
91
|
+
```
|
92
|
+
|
93
|
+
Setup your yaml configuration for pypelines (pypeline.yaml)
|
94
|
+
|
95
|
+
```yaml
|
96
|
+
serviceConfig:
|
97
|
+
- name: pipeline-worker
|
98
|
+
registeredTasks:
|
99
|
+
- handler: pypeline_demo.pipeline.a
|
100
|
+
- handler: pypeline_demo.pipeline.b
|
101
|
+
- handler: pypeline_demo.pipeline.c
|
102
|
+
- handler: pypeline_demo.scheduled_tasks.cron_task
|
103
|
+
|
104
|
+
pipelines:
|
105
|
+
demo_pipeline:
|
106
|
+
name: Demo Pipeline
|
107
|
+
description: Pipeline to show examples of DAG Adjacency
|
108
|
+
schemaVersion: 1
|
109
|
+
config:
|
110
|
+
dagAdjacency:
|
111
|
+
a:
|
112
|
+
- b
|
113
|
+
- c
|
114
|
+
metadata:
|
115
|
+
maxRetry: 1
|
116
|
+
retryBackoff: 180
|
117
|
+
retryBackoffMax: 300
|
118
|
+
retryJitter: true
|
119
|
+
maxTtl: 10800
|
120
|
+
queue: new-queue
|
121
|
+
taskDefinitions:
|
122
|
+
a:
|
123
|
+
handler: pypeline_demo.pipeline.a
|
124
|
+
b:
|
125
|
+
handler: pypeline_demo.pipeline.b
|
126
|
+
c:
|
127
|
+
handler: pypeline_demo.pipeline.c
|
128
|
+
scheduledTasks:
|
129
|
+
cron-task:
|
130
|
+
name: Example cron task
|
131
|
+
enabled: true
|
132
|
+
config:
|
133
|
+
task: pypeline_demo.scheduled_tasks.cron_task
|
134
|
+
queue: new-queue
|
135
|
+
schedule:
|
136
|
+
minute: '*'
|
137
|
+
hour: '*'
|
138
|
+
dayOfWeek: '*'
|
139
|
+
dayOfMonth: '*'
|
140
|
+
monthOfYear: '*'
|
141
|
+
schemaVersion: 1
|
142
|
+
```
|
143
|
+
|
144
|
+
Setup your modules to be executed by yaml (pipeline.py && scheduled_tasks.py)
|
145
|
+
|
146
|
+
```python
|
147
|
+
import time
|
148
|
+
|
149
|
+
|
150
|
+
def a(event):
|
151
|
+
print("A")
|
152
|
+
|
153
|
+
|
154
|
+
def b(event):
|
155
|
+
print("B")
|
156
|
+
time.sleep(10)
|
157
|
+
|
158
|
+
|
159
|
+
def c(event):
|
160
|
+
print("C")
|
161
|
+
```
|
162
|
+
|
163
|
+
```python
|
164
|
+
def cron_task():
|
165
|
+
print("HI")
|
166
|
+
```
|
167
|
+
|
168
|
+
Configure your environment variables (demo.env)
|
169
|
+
|
170
|
+
```env
|
171
|
+
SERMOS_BASE_URL=local
|
172
|
+
PYPELINE_CLIENT_PKG_NAME=pypeline_demo
|
173
|
+
REDIS_URL=redis://:password@localhost:6379/0
|
174
|
+
RABBITMQ_URL=amqp://admin:password@localhost:5672
|
175
|
+
```
|
176
|
+
|
177
|
+
Start Rabbit & Redis as your message broker and backend results storage. We use `docker compose` for this.
|
178
|
+
|
179
|
+
## DEMO PROJECT COMING SOON!
|
180
|
+
|
181
|
+
|
182
|
+
## Testing
|
183
|
+
|
184
|
+
If you are developing pypeline and want to test this package,
|
185
|
+
install the test dependencies:
|
186
|
+
|
187
|
+
$ pip install -e .[test]
|
188
|
+
|
189
|
+
Now, run the tests:
|
190
|
+
|
191
|
+
$ tox
|
@@ -0,0 +1,175 @@
|
|
1
|
+
```
|
2
|
+
______ __ ________ _____ _ _____ _ _ _____
|
3
|
+
| ___ \\ \ / /| ___ \| ___|| | |_ _|| \ | || ___|
|
4
|
+
| |_/ / \ V / | |_/ /| |__ | | | | | \| || |__
|
5
|
+
| __/ \ / | __/ | __| | | | | | . ` || __|
|
6
|
+
| | | | | | | |___ | |_____| |_ | |\ || |___
|
7
|
+
\_| \_/ \_| \____/ \_____/\___/ \_| \_/\____/
|
8
|
+
```
|
9
|
+
|
10
|
+
## Overview
|
11
|
+
|
12
|
+
PypeLine is a versatile open-source library designed to streamline the management of data workflows and APIs. With PypeLine, you can efficiently schedule cron jobs, execute complex Directed Acyclical Graph (DAG) pipelines, and set up a Flask API complete with OpenAPI documentation.
|
13
|
+
|
14
|
+
#### Key Features
|
15
|
+
- Cron Job Scheduling: Easily schedule recurring tasks with flexible cron job functionality, ensuring that your processes run reliably at specified intervals.
|
16
|
+
- DAG Pipelines: Define and execute DAGs to manage complex data workflows with dependencies. PypeLine handles the execution order and parallelism, ensuring that each task runs in the correct sequence.
|
17
|
+
- Flask API with OpenAPI: Quickly configure a RESTful API using Flask, with built-in support for OpenAPI documentation, allowing for clear, standardized documentation of your endpoints.
|
18
|
+
|
19
|
+
## Requirements
|
20
|
+
|
21
|
+
- RabbitMQ
|
22
|
+
- Redis
|
23
|
+
- Docker (optional for dev)
|
24
|
+
|
25
|
+
## Getting Started
|
26
|
+
|
27
|
+
Install PypeLines:
|
28
|
+
|
29
|
+
```commandline
|
30
|
+
pip install scalable-pypeline[flask,web,workers]>=1.2.3
|
31
|
+
```
|
32
|
+
|
33
|
+
Configure your Flask project (app.py)
|
34
|
+
|
35
|
+
```python
|
36
|
+
from flask import Flask
|
37
|
+
from pypeline.flask import FlaskPypeline
|
38
|
+
from pypeline_demo.api import bp
|
39
|
+
from pypeline_demo.config import Config
|
40
|
+
from pypeline_demo.extensions import dramatiq
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
def create_app():
|
45
|
+
app = Flask(__name__)
|
46
|
+
|
47
|
+
dramatiq.init_app(app)
|
48
|
+
|
49
|
+
# Initialize your app with a configuration
|
50
|
+
app.config.from_object(Config)
|
51
|
+
|
52
|
+
pypeline = FlaskPypeline()
|
53
|
+
pypeline.init_app(app, init_api=True)
|
54
|
+
|
55
|
+
# Register API blueprints you wish
|
56
|
+
app.extensions["pypeline_core_api"].register_blueprint(bp)
|
57
|
+
# Register application blueprints to application
|
58
|
+
app.register_blueprint(bp)
|
59
|
+
|
60
|
+
return app
|
61
|
+
|
62
|
+
|
63
|
+
if __name__ == "__main__":
|
64
|
+
app = create_app()
|
65
|
+
app.run(port=5001)
|
66
|
+
```
|
67
|
+
|
68
|
+
Configure Dramatiq extension (extensions.py)
|
69
|
+
|
70
|
+
```python
|
71
|
+
from pypeline.dramatiq import Dramatiq
|
72
|
+
|
73
|
+
|
74
|
+
dramatiq = Dramatiq()
|
75
|
+
```
|
76
|
+
|
77
|
+
Setup your yaml configuration for pypelines (pypeline.yaml)
|
78
|
+
|
79
|
+
```yaml
|
80
|
+
serviceConfig:
|
81
|
+
- name: pipeline-worker
|
82
|
+
registeredTasks:
|
83
|
+
- handler: pypeline_demo.pipeline.a
|
84
|
+
- handler: pypeline_demo.pipeline.b
|
85
|
+
- handler: pypeline_demo.pipeline.c
|
86
|
+
- handler: pypeline_demo.scheduled_tasks.cron_task
|
87
|
+
|
88
|
+
pipelines:
|
89
|
+
demo_pipeline:
|
90
|
+
name: Demo Pipeline
|
91
|
+
description: Pipeline to show examples of DAG Adjacency
|
92
|
+
schemaVersion: 1
|
93
|
+
config:
|
94
|
+
dagAdjacency:
|
95
|
+
a:
|
96
|
+
- b
|
97
|
+
- c
|
98
|
+
metadata:
|
99
|
+
maxRetry: 1
|
100
|
+
retryBackoff: 180
|
101
|
+
retryBackoffMax: 300
|
102
|
+
retryJitter: true
|
103
|
+
maxTtl: 10800
|
104
|
+
queue: new-queue
|
105
|
+
taskDefinitions:
|
106
|
+
a:
|
107
|
+
handler: pypeline_demo.pipeline.a
|
108
|
+
b:
|
109
|
+
handler: pypeline_demo.pipeline.b
|
110
|
+
c:
|
111
|
+
handler: pypeline_demo.pipeline.c
|
112
|
+
scheduledTasks:
|
113
|
+
cron-task:
|
114
|
+
name: Example cron task
|
115
|
+
enabled: true
|
116
|
+
config:
|
117
|
+
task: pypeline_demo.scheduled_tasks.cron_task
|
118
|
+
queue: new-queue
|
119
|
+
schedule:
|
120
|
+
minute: '*'
|
121
|
+
hour: '*'
|
122
|
+
dayOfWeek: '*'
|
123
|
+
dayOfMonth: '*'
|
124
|
+
monthOfYear: '*'
|
125
|
+
schemaVersion: 1
|
126
|
+
```
|
127
|
+
|
128
|
+
Setup your modules to be executed by yaml (pipeline.py && scheduled_tasks.py)
|
129
|
+
|
130
|
+
```python
|
131
|
+
import time
|
132
|
+
|
133
|
+
|
134
|
+
def a(event):
|
135
|
+
print("A")
|
136
|
+
|
137
|
+
|
138
|
+
def b(event):
|
139
|
+
print("B")
|
140
|
+
time.sleep(10)
|
141
|
+
|
142
|
+
|
143
|
+
def c(event):
|
144
|
+
print("C")
|
145
|
+
```
|
146
|
+
|
147
|
+
```python
|
148
|
+
def cron_task():
|
149
|
+
print("HI")
|
150
|
+
```
|
151
|
+
|
152
|
+
Configure your environment variables (demo.env)
|
153
|
+
|
154
|
+
```env
|
155
|
+
SERMOS_BASE_URL=local
|
156
|
+
PYPELINE_CLIENT_PKG_NAME=pypeline_demo
|
157
|
+
REDIS_URL=redis://:password@localhost:6379/0
|
158
|
+
RABBITMQ_URL=amqp://admin:password@localhost:5672
|
159
|
+
```
|
160
|
+
|
161
|
+
Start Rabbit & Redis as your message broker and backend results storage. We use `docker compose` for this.
|
162
|
+
|
163
|
+
## DEMO PROJECT COMING SOON!
|
164
|
+
|
165
|
+
|
166
|
+
## Testing
|
167
|
+
|
168
|
+
If you are developing pypeline and want to test this package,
|
169
|
+
install the test dependencies:
|
170
|
+
|
171
|
+
$ pip install -e .[test]
|
172
|
+
|
173
|
+
Now, run the tests:
|
174
|
+
|
175
|
+
$ tox
|
@@ -0,0 +1 @@
|
|
1
|
+
__version__ = "2.0.1"
|
@@ -0,0 +1,34 @@
|
|
1
|
+
import time
|
2
|
+
|
3
|
+
import redis
|
4
|
+
|
5
|
+
|
6
|
+
class LockingParallelBarrier:
|
7
|
+
def __init__(self, redis_url, task_key="task_counter", lock_key="task_lock"):
|
8
|
+
# Connect to Redis using the provided URL
|
9
|
+
self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True)
|
10
|
+
self.task_key = task_key
|
11
|
+
self.lock_key = lock_key
|
12
|
+
|
13
|
+
def acquire_lock(self, timeout=5):
|
14
|
+
"""Acquire a lock using Redis."""
|
15
|
+
while True:
|
16
|
+
if self.redis.set(self.lock_key, "locked", nx=True, ex=timeout):
|
17
|
+
return True
|
18
|
+
time.sleep(0.1)
|
19
|
+
|
20
|
+
def release_lock(self):
|
21
|
+
"""Release the lock in Redis."""
|
22
|
+
self.redis.delete(self.lock_key)
|
23
|
+
|
24
|
+
def set_task_count(self, count):
|
25
|
+
"""Initialize the task counter in Redis."""
|
26
|
+
self.redis.set(self.task_key, count)
|
27
|
+
|
28
|
+
def decrement_task_count(self):
|
29
|
+
"""Decrement the task counter in Redis."""
|
30
|
+
return self.redis.decr(self.task_key)
|
31
|
+
|
32
|
+
def get_task_count(self):
|
33
|
+
"""Get the current value of the task counter."""
|
34
|
+
return int(self.redis.get(self.task_key) or 0)
|