scalable-pypeline 1.2.3__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. scalable-pypeline-2.0.1/PKG-INFO +191 -0
  2. scalable-pypeline-2.0.1/README.md +175 -0
  3. scalable-pypeline-2.0.1/pypeline/__init__.py +1 -0
  4. scalable-pypeline-2.0.1/pypeline/barrier.py +34 -0
  5. scalable-pypeline-2.0.1/pypeline/composition.py +348 -0
  6. scalable-pypeline-2.0.1/pypeline/constants.py +72 -0
  7. scalable-pypeline-2.0.1/pypeline/dramatiq.py +470 -0
  8. scalable-pypeline-2.0.1/pypeline/extensions.py +17 -0
  9. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/pypeline/flask/__init__.py +3 -5
  10. scalable-pypeline-2.0.1/pypeline/flask/api/pipelines.py +222 -0
  11. scalable-pypeline-2.0.1/pypeline/flask/api/schedules.py +42 -0
  12. scalable-pypeline-2.0.1/pypeline/flask/decorators.py +41 -0
  13. scalable-pypeline-2.0.1/pypeline/flask/flask_pypeline.py +156 -0
  14. scalable-pypeline-2.0.1/pypeline/middleware.py +61 -0
  15. scalable-pypeline-2.0.1/pypeline/pipeline_config_schema.py +249 -0
  16. scalable-pypeline-2.0.1/pypeline/pypeline_yaml.py +458 -0
  17. scalable-pypeline-2.0.1/pypeline/schedule_config_schema.py +125 -0
  18. scalable-pypeline-2.0.1/pypeline/utils/config_utils.py +68 -0
  19. scalable-pypeline-2.0.1/pypeline/utils/module_utils.py +83 -0
  20. scalable-pypeline-2.0.1/pypeline/utils/pipeline_utils.py +161 -0
  21. scalable-pypeline-2.0.1/requirements.txt +6 -0
  22. scalable-pypeline-2.0.1/scalable_pypeline.egg-info/PKG-INFO +191 -0
  23. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/SOURCES.txt +8 -14
  24. scalable-pypeline-2.0.1/scalable_pypeline.egg-info/entry_points.txt +3 -0
  25. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/requires.txt +7 -13
  26. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/setup.py +85 -73
  27. scalable-pypeline-1.2.3/PKG-INFO +0 -131
  28. scalable-pypeline-1.2.3/README.md +0 -115
  29. scalable-pypeline-1.2.3/pypeline/__init__.py +0 -1
  30. scalable-pypeline-1.2.3/pypeline/celery.py +0 -206
  31. scalable-pypeline-1.2.3/pypeline/celery_beat.py +0 -254
  32. scalable-pypeline-1.2.3/pypeline/constants.py +0 -105
  33. scalable-pypeline-1.2.3/pypeline/extensions.py +0 -16
  34. scalable-pypeline-1.2.3/pypeline/flask/api/pipelines.py +0 -261
  35. scalable-pypeline-1.2.3/pypeline/flask/api/schedules.py +0 -67
  36. scalable-pypeline-1.2.3/pypeline/flask/api/utils.py +0 -35
  37. scalable-pypeline-1.2.3/pypeline/flask/decorators.py +0 -76
  38. scalable-pypeline-1.2.3/pypeline/flask/flask_sermos.py +0 -156
  39. scalable-pypeline-1.2.3/pypeline/generators.py +0 -196
  40. scalable-pypeline-1.2.3/pypeline/logging_config.py +0 -171
  41. scalable-pypeline-1.2.3/pypeline/pipeline/chained_task.py +0 -70
  42. scalable-pypeline-1.2.3/pypeline/pipeline/generator.py +0 -254
  43. scalable-pypeline-1.2.3/pypeline/pipeline_config_schema.py +0 -236
  44. scalable-pypeline-1.2.3/pypeline/schedule_config_schema.py +0 -210
  45. scalable-pypeline-1.2.3/pypeline/sermos_yaml.py +0 -442
  46. scalable-pypeline-1.2.3/pypeline/utils/config_utils.py +0 -326
  47. scalable-pypeline-1.2.3/pypeline/utils/graph_utils.py +0 -144
  48. scalable-pypeline-1.2.3/pypeline/utils/module_utils.py +0 -119
  49. scalable-pypeline-1.2.3/pypeline/utils/task_utils.py +0 -552
  50. scalable-pypeline-1.2.3/requirements.txt +0 -16
  51. scalable-pypeline-1.2.3/scalable_pypeline.egg-info/PKG-INFO +0 -131
  52. scalable-pypeline-1.2.3/scalable_pypeline.egg-info/entry_points.txt +0 -2
  53. scalable-pypeline-1.2.3/tests/fixtures/__init__.py +0 -1
  54. scalable-pypeline-1.2.3/tests/fixtures/s3_fixtures.py +0 -52
  55. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/LICENSE +0 -0
  56. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/MANIFEST.in +0 -0
  57. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/pypeline/flask/api/__init__.py +0 -0
  58. {scalable-pypeline-1.2.3/pypeline/pipeline → scalable-pypeline-2.0.1/pypeline/utils}/__init__.py +0 -0
  59. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/dependency_links.txt +0 -0
  60. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/scalable_pypeline.egg-info/top_level.txt +0 -0
  61. {scalable-pypeline-1.2.3 → scalable-pypeline-2.0.1}/setup.cfg +0 -0
  62. {scalable-pypeline-1.2.3/pypeline/utils → scalable-pypeline-2.0.1/tests/fixtures}/__init__.py +0 -0
@@ -0,0 +1,191 @@
1
+ Metadata-Version: 2.1
2
+ Name: scalable-pypeline
3
+ Version: 2.0.1
4
+ Summary: PypeLine - Python pipelines for the Real World
5
+ Home-page: https://gitlab.com/bravos2/pypeline
6
+ Author: Bravos Power Corporation
7
+ License: Apache License 2.0
8
+ Description-Content-Type: text/markdown
9
+ Provides-Extra: build
10
+ Provides-Extra: flask
11
+ Provides-Extra: web
12
+ Provides-Extra: workers
13
+ Provides-Extra: dev
14
+ Provides-Extra: test
15
+ License-File: LICENSE
16
+
17
+ ```
18
+ ______ __ ________ _____ _ _____ _ _ _____
19
+ | ___ \\ \ / /| ___ \| ___|| | |_ _|| \ | || ___|
20
+ | |_/ / \ V / | |_/ /| |__ | | | | | \| || |__
21
+ | __/ \ / | __/ | __| | | | | | . ` || __|
22
+ | | | | | | | |___ | |_____| |_ | |\ || |___
23
+ \_| \_/ \_| \____/ \_____/\___/ \_| \_/\____/
24
+ ```
25
+
26
+ ## Overview
27
+
28
+ PypeLine is a versatile open-source library designed to streamline the management of data workflows and APIs. With PypeLine, you can efficiently schedule cron jobs, execute complex Directed Acyclical Graph (DAG) pipelines, and set up a Flask API complete with OpenAPI documentation.
29
+
30
+ #### Key Features
31
+ - Cron Job Scheduling: Easily schedule recurring tasks with flexible cron job functionality, ensuring that your processes run reliably at specified intervals.
32
+ - DAG Pipelines: Define and execute DAGs to manage complex data workflows with dependencies. PypeLine handles the execution order and parallelism, ensuring that each task runs in the correct sequence.
33
+ - Flask API with OpenAPI: Quickly configure a RESTful API using Flask, with built-in support for OpenAPI documentation, allowing for clear, standardized documentation of your endpoints.
34
+
35
+ ## Requirements
36
+
37
+ - RabbitMQ
38
+ - Redis
39
+ - Docker (optional for dev)
40
+
41
+ ## Getting Started
42
+
43
+ Install PypeLines:
44
+
45
+ ```commandline
46
+ pip install scalable-pypeline[flask,web,workers]>=1.2.3
47
+ ```
48
+
49
+ Configure your Flask project (app.py)
50
+
51
+ ```python
52
+ from flask import Flask
53
+ from pypeline.flask import FlaskPypeline
54
+ from pypeline_demo.api import bp
55
+ from pypeline_demo.config import Config
56
+ from pypeline_demo.extensions import dramatiq
57
+
58
+
59
+
60
+ def create_app():
61
+ app = Flask(__name__)
62
+
63
+ dramatiq.init_app(app)
64
+
65
+ # Initialize your app with a configuration
66
+ app.config.from_object(Config)
67
+
68
+ pypeline = FlaskPypeline()
69
+ pypeline.init_app(app, init_api=True)
70
+
71
+ # Register API blueprints you wish
72
+ app.extensions["pypeline_core_api"].register_blueprint(bp)
73
+ # Register application blueprints to application
74
+ app.register_blueprint(bp)
75
+
76
+ return app
77
+
78
+
79
+ if __name__ == "__main__":
80
+ app = create_app()
81
+ app.run(port=5001)
82
+ ```
83
+
84
+ Configure Dramatiq extension (extensions.py)
85
+
86
+ ```python
87
+ from pypeline.dramatiq import Dramatiq
88
+
89
+
90
+ dramatiq = Dramatiq()
91
+ ```
92
+
93
+ Setup your yaml configuration for pypelines (pypeline.yaml)
94
+
95
+ ```yaml
96
+ serviceConfig:
97
+ - name: pipeline-worker
98
+ registeredTasks:
99
+ - handler: pypeline_demo.pipeline.a
100
+ - handler: pypeline_demo.pipeline.b
101
+ - handler: pypeline_demo.pipeline.c
102
+ - handler: pypeline_demo.scheduled_tasks.cron_task
103
+
104
+ pipelines:
105
+ demo_pipeline:
106
+ name: Demo Pipeline
107
+ description: Pipeline to show examples of DAG Adjacency
108
+ schemaVersion: 1
109
+ config:
110
+ dagAdjacency:
111
+ a:
112
+ - b
113
+ - c
114
+ metadata:
115
+ maxRetry: 1
116
+ retryBackoff: 180
117
+ retryBackoffMax: 300
118
+ retryJitter: true
119
+ maxTtl: 10800
120
+ queue: new-queue
121
+ taskDefinitions:
122
+ a:
123
+ handler: pypeline_demo.pipeline.a
124
+ b:
125
+ handler: pypeline_demo.pipeline.b
126
+ c:
127
+ handler: pypeline_demo.pipeline.c
128
+ scheduledTasks:
129
+ cron-task:
130
+ name: Example cron task
131
+ enabled: true
132
+ config:
133
+ task: pypeline_demo.scheduled_tasks.cron_task
134
+ queue: new-queue
135
+ schedule:
136
+ minute: '*'
137
+ hour: '*'
138
+ dayOfWeek: '*'
139
+ dayOfMonth: '*'
140
+ monthOfYear: '*'
141
+ schemaVersion: 1
142
+ ```
143
+
144
+ Setup your modules to be executed by yaml (pipeline.py && scheduled_tasks.py)
145
+
146
+ ```python
147
+ import time
148
+
149
+
150
+ def a(event):
151
+ print("A")
152
+
153
+
154
+ def b(event):
155
+ print("B")
156
+ time.sleep(10)
157
+
158
+
159
+ def c(event):
160
+ print("C")
161
+ ```
162
+
163
+ ```python
164
+ def cron_task():
165
+ print("HI")
166
+ ```
167
+
168
+ Configure your environment variables (demo.env)
169
+
170
+ ```env
171
+ SERMOS_BASE_URL=local
172
+ PYPELINE_CLIENT_PKG_NAME=pypeline_demo
173
+ REDIS_URL=redis://:password@localhost:6379/0
174
+ RABBITMQ_URL=amqp://admin:password@localhost:5672
175
+ ```
176
+
177
+ Start Rabbit & Redis as your message broker and backend results storage. We use `docker compose` for this.
178
+
179
+ ## DEMO PROJECT COMING SOON!
180
+
181
+
182
+ ## Testing
183
+
184
+ If you are developing pypeline and want to test this package,
185
+ install the test dependencies:
186
+
187
+ $ pip install -e .[test]
188
+
189
+ Now, run the tests:
190
+
191
+ $ tox
@@ -0,0 +1,175 @@
1
+ ```
2
+ ______ __ ________ _____ _ _____ _ _ _____
3
+ | ___ \\ \ / /| ___ \| ___|| | |_ _|| \ | || ___|
4
+ | |_/ / \ V / | |_/ /| |__ | | | | | \| || |__
5
+ | __/ \ / | __/ | __| | | | | | . ` || __|
6
+ | | | | | | | |___ | |_____| |_ | |\ || |___
7
+ \_| \_/ \_| \____/ \_____/\___/ \_| \_/\____/
8
+ ```
9
+
10
+ ## Overview
11
+
12
+ PypeLine is a versatile open-source library designed to streamline the management of data workflows and APIs. With PypeLine, you can efficiently schedule cron jobs, execute complex Directed Acyclical Graph (DAG) pipelines, and set up a Flask API complete with OpenAPI documentation.
13
+
14
+ #### Key Features
15
+ - Cron Job Scheduling: Easily schedule recurring tasks with flexible cron job functionality, ensuring that your processes run reliably at specified intervals.
16
+ - DAG Pipelines: Define and execute DAGs to manage complex data workflows with dependencies. PypeLine handles the execution order and parallelism, ensuring that each task runs in the correct sequence.
17
+ - Flask API with OpenAPI: Quickly configure a RESTful API using Flask, with built-in support for OpenAPI documentation, allowing for clear, standardized documentation of your endpoints.
18
+
19
+ ## Requirements
20
+
21
+ - RabbitMQ
22
+ - Redis
23
+ - Docker (optional for dev)
24
+
25
+ ## Getting Started
26
+
27
+ Install PypeLines:
28
+
29
+ ```commandline
30
+ pip install scalable-pypeline[flask,web,workers]>=1.2.3
31
+ ```
32
+
33
+ Configure your Flask project (app.py)
34
+
35
+ ```python
36
+ from flask import Flask
37
+ from pypeline.flask import FlaskPypeline
38
+ from pypeline_demo.api import bp
39
+ from pypeline_demo.config import Config
40
+ from pypeline_demo.extensions import dramatiq
41
+
42
+
43
+
44
+ def create_app():
45
+ app = Flask(__name__)
46
+
47
+ dramatiq.init_app(app)
48
+
49
+ # Initialize your app with a configuration
50
+ app.config.from_object(Config)
51
+
52
+ pypeline = FlaskPypeline()
53
+ pypeline.init_app(app, init_api=True)
54
+
55
+ # Register API blueprints you wish
56
+ app.extensions["pypeline_core_api"].register_blueprint(bp)
57
+ # Register application blueprints to application
58
+ app.register_blueprint(bp)
59
+
60
+ return app
61
+
62
+
63
+ if __name__ == "__main__":
64
+ app = create_app()
65
+ app.run(port=5001)
66
+ ```
67
+
68
+ Configure Dramatiq extension (extensions.py)
69
+
70
+ ```python
71
+ from pypeline.dramatiq import Dramatiq
72
+
73
+
74
+ dramatiq = Dramatiq()
75
+ ```
76
+
77
+ Setup your yaml configuration for pypelines (pypeline.yaml)
78
+
79
+ ```yaml
80
+ serviceConfig:
81
+ - name: pipeline-worker
82
+ registeredTasks:
83
+ - handler: pypeline_demo.pipeline.a
84
+ - handler: pypeline_demo.pipeline.b
85
+ - handler: pypeline_demo.pipeline.c
86
+ - handler: pypeline_demo.scheduled_tasks.cron_task
87
+
88
+ pipelines:
89
+ demo_pipeline:
90
+ name: Demo Pipeline
91
+ description: Pipeline to show examples of DAG Adjacency
92
+ schemaVersion: 1
93
+ config:
94
+ dagAdjacency:
95
+ a:
96
+ - b
97
+ - c
98
+ metadata:
99
+ maxRetry: 1
100
+ retryBackoff: 180
101
+ retryBackoffMax: 300
102
+ retryJitter: true
103
+ maxTtl: 10800
104
+ queue: new-queue
105
+ taskDefinitions:
106
+ a:
107
+ handler: pypeline_demo.pipeline.a
108
+ b:
109
+ handler: pypeline_demo.pipeline.b
110
+ c:
111
+ handler: pypeline_demo.pipeline.c
112
+ scheduledTasks:
113
+ cron-task:
114
+ name: Example cron task
115
+ enabled: true
116
+ config:
117
+ task: pypeline_demo.scheduled_tasks.cron_task
118
+ queue: new-queue
119
+ schedule:
120
+ minute: '*'
121
+ hour: '*'
122
+ dayOfWeek: '*'
123
+ dayOfMonth: '*'
124
+ monthOfYear: '*'
125
+ schemaVersion: 1
126
+ ```
127
+
128
+ Setup your modules to be executed by yaml (pipeline.py && scheduled_tasks.py)
129
+
130
+ ```python
131
+ import time
132
+
133
+
134
+ def a(event):
135
+ print("A")
136
+
137
+
138
+ def b(event):
139
+ print("B")
140
+ time.sleep(10)
141
+
142
+
143
+ def c(event):
144
+ print("C")
145
+ ```
146
+
147
+ ```python
148
+ def cron_task():
149
+ print("HI")
150
+ ```
151
+
152
+ Configure your environment variables (demo.env)
153
+
154
+ ```env
155
+ SERMOS_BASE_URL=local
156
+ PYPELINE_CLIENT_PKG_NAME=pypeline_demo
157
+ REDIS_URL=redis://:password@localhost:6379/0
158
+ RABBITMQ_URL=amqp://admin:password@localhost:5672
159
+ ```
160
+
161
+ Start Rabbit & Redis as your message broker and backend results storage. We use `docker compose` for this.
162
+
163
+ ## DEMO PROJECT COMING SOON!
164
+
165
+
166
+ ## Testing
167
+
168
+ If you are developing pypeline and want to test this package,
169
+ install the test dependencies:
170
+
171
+ $ pip install -e .[test]
172
+
173
+ Now, run the tests:
174
+
175
+ $ tox
@@ -0,0 +1 @@
1
+ __version__ = "2.0.1"
@@ -0,0 +1,34 @@
1
+ import time
2
+
3
+ import redis
4
+
5
+
6
+ class LockingParallelBarrier:
7
+ def __init__(self, redis_url, task_key="task_counter", lock_key="task_lock"):
8
+ # Connect to Redis using the provided URL
9
+ self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True)
10
+ self.task_key = task_key
11
+ self.lock_key = lock_key
12
+
13
+ def acquire_lock(self, timeout=5):
14
+ """Acquire a lock using Redis."""
15
+ while True:
16
+ if self.redis.set(self.lock_key, "locked", nx=True, ex=timeout):
17
+ return True
18
+ time.sleep(0.1)
19
+
20
+ def release_lock(self):
21
+ """Release the lock in Redis."""
22
+ self.redis.delete(self.lock_key)
23
+
24
+ def set_task_count(self, count):
25
+ """Initialize the task counter in Redis."""
26
+ self.redis.set(self.task_key, count)
27
+
28
+ def decrement_task_count(self):
29
+ """Decrement the task counter in Redis."""
30
+ return self.redis.decr(self.task_key)
31
+
32
+ def get_task_count(self):
33
+ """Get the current value of the task counter."""
34
+ return int(self.redis.get(self.task_key) or 0)