scalable-pypeline 1.2.3__py2.py3-none-any.whl → 2.0.2__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypeline/__init__.py +1 -1
- pypeline/barrier.py +34 -0
- pypeline/composition.py +349 -0
- pypeline/constants.py +51 -84
- pypeline/dramatiq.py +470 -0
- pypeline/extensions.py +9 -8
- pypeline/flask/__init__.py +3 -5
- pypeline/flask/api/pipelines.py +109 -148
- pypeline/flask/api/schedules.py +14 -39
- pypeline/flask/decorators.py +18 -53
- pypeline/flask/flask_pypeline.py +156 -0
- pypeline/middleware.py +61 -0
- pypeline/pipeline_config_schema.py +105 -92
- pypeline/pypeline_yaml.py +458 -0
- pypeline/schedule_config_schema.py +35 -120
- pypeline/utils/config_utils.py +52 -310
- pypeline/utils/module_utils.py +35 -71
- pypeline/utils/pipeline_utils.py +161 -0
- scalable_pypeline-2.0.2.dist-info/METADATA +217 -0
- scalable_pypeline-2.0.2.dist-info/RECORD +27 -0
- scalable_pypeline-2.0.2.dist-info/entry_points.txt +3 -0
- tests/fixtures/__init__.py +0 -1
- pypeline/celery.py +0 -206
- pypeline/celery_beat.py +0 -254
- pypeline/flask/api/utils.py +0 -35
- pypeline/flask/flask_sermos.py +0 -156
- pypeline/generators.py +0 -196
- pypeline/logging_config.py +0 -171
- pypeline/pipeline/__init__.py +0 -0
- pypeline/pipeline/chained_task.py +0 -70
- pypeline/pipeline/generator.py +0 -254
- pypeline/sermos_yaml.py +0 -442
- pypeline/utils/graph_utils.py +0 -144
- pypeline/utils/task_utils.py +0 -552
- scalable_pypeline-1.2.3.dist-info/METADATA +0 -163
- scalable_pypeline-1.2.3.dist-info/RECORD +0 -33
- scalable_pypeline-1.2.3.dist-info/entry_points.txt +0 -2
- tests/fixtures/s3_fixtures.py +0 -52
- {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/LICENSE +0 -0
- {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/WHEEL +0 -0
- {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/top_level.txt +0 -0
@@ -1,163 +0,0 @@
|
|
1
|
-
Metadata-Version: 2.1
|
2
|
-
Name: scalable-pypeline
|
3
|
-
Version: 1.2.3
|
4
|
-
Summary: PypeLine - Python pipelines for the Real World
|
5
|
-
Home-page: https://gitlab.com/bravos2/pypeline
|
6
|
-
Author: Bravos Power Corporation
|
7
|
-
License: Apache License 2.0
|
8
|
-
Description-Content-Type: text/markdown
|
9
|
-
License-File: LICENSE
|
10
|
-
Requires-Dist: PyYAML (<6,>=5.2)
|
11
|
-
Requires-Dist: click (==8.0.4)
|
12
|
-
Requires-Dist: marshmallow (<4,>=3.2.1)
|
13
|
-
Requires-Dist: requests (>=2.24.0)
|
14
|
-
Requires-Dist: redis (<5,>=4.5.4)
|
15
|
-
Requires-Dist: rhodb[redis] (<6,>=5.1.1)
|
16
|
-
Requires-Dist: attrs (<20,>=19)
|
17
|
-
Requires-Dist: boto3 (<2,>=1.11)
|
18
|
-
Requires-Dist: croniter (<2,>=1.0.15)
|
19
|
-
Requires-Dist: celery-dyrygent (==0.8.0)
|
20
|
-
Requires-Dist: itsdangerous (==2.0.1)
|
21
|
-
Provides-Extra: build
|
22
|
-
Requires-Dist: wheel ; extra == 'build'
|
23
|
-
Requires-Dist: twine ; extra == 'build'
|
24
|
-
Provides-Extra: dev
|
25
|
-
Requires-Dist: honcho (>=1.0.1) ; extra == 'dev'
|
26
|
-
Requires-Dist: awscli (>=1.11) ; extra == 'dev'
|
27
|
-
Requires-Dist: pylint (>=2.5.3) ; extra == 'dev'
|
28
|
-
Requires-Dist: pip-licenses ; extra == 'dev'
|
29
|
-
Provides-Extra: flask
|
30
|
-
Requires-Dist: Werkzeug (==2.0.3) ; extra == 'flask'
|
31
|
-
Requires-Dist: Flask (<2,>=1.1.2) ; extra == 'flask'
|
32
|
-
Requires-Dist: flask-smorest (<0.29,>=0.23.0) ; extra == 'flask'
|
33
|
-
Requires-Dist: Jinja2 (==3.0.3) ; extra == 'flask'
|
34
|
-
Provides-Extra: test
|
35
|
-
Requires-Dist: pytest-cov (<3,>=2.6.1) ; extra == 'test'
|
36
|
-
Requires-Dist: tox (<4,>=3.14.1) ; extra == 'test'
|
37
|
-
Requires-Dist: mock (<2,>=1) ; extra == 'test'
|
38
|
-
Requires-Dist: moto (>=1.3.16) ; extra == 'test'
|
39
|
-
Requires-Dist: responses (<0.11,>=0.10.16) ; extra == 'test'
|
40
|
-
Requires-Dist: fakeredis (<3,>=2.10.3) ; extra == 'test'
|
41
|
-
Requires-Dist: importlib-metadata (<5,>=4.12) ; extra == 'test'
|
42
|
-
Provides-Extra: web
|
43
|
-
Requires-Dist: gunicorn ; extra == 'web'
|
44
|
-
Requires-Dist: gevent (<22,>=21.12.0) ; extra == 'web'
|
45
|
-
Provides-Extra: workers
|
46
|
-
Requires-Dist: celery[redis] (<6,>=5.1.2) ; extra == 'workers'
|
47
|
-
Requires-Dist: networkx (>=2.4) ; extra == 'workers'
|
48
|
-
|
49
|
-
# PypeLine
|
50
|
-
|
51
|
-
## Quickstart
|
52
|
-
|
53
|
-
1. Add `pypeline` as a dependency to your Python application
|
54
|
-
1. Install extras depending on what you are building:
|
55
|
-
|
56
|
-
1. `flask` - Convenient interface for Flask applications
|
57
|
-
1. `web` - Some standard web server dependencies we like
|
58
|
-
1. `workers` - Installs [Celery](https://docs.celeryproject.org/en/stable/getting-started/introduction.html) and [networkx](https://networkx.org/documentation/stable/index.html), which are required if using pipelines.
|
59
|
-
|
60
|
-
## Overview
|
61
|
-
|
62
|
-
PypeLines is a fork of [Sermos] (https://gitlab.com/sermos/sermos). PypeLines diverges from Sermos as a SAAS platform and is intented as a suite for job management in conjuction with or indepent from a Flask Web App. Common job management workflow's include running pipelines, scheduled tasks, and other various types of jobs. Pypelines is designed to make these systems faster and more intuitive to create for Python developers.
|
63
|
-
|
64
|
-
Under the hood we are simply extending various Celery capabilities like their existing complex workflows and make them suitable for large scale pipelines that can be run in production. To do this PypeLines uses a custom Celery configuration and a library known as [Celery-Dyrygent](https://github.com/ovh/celery-dyrygent) to help orchestrate thousands of tasks at once.
|
65
|
-
|
66
|
-
### Pypeline
|
67
|
-
|
68
|
-
- Celery Configuration
|
69
|
-
- Pipelines
|
70
|
-
- CronJobs
|
71
|
-
- APIs
|
72
|
-
- Utilities
|
73
|
-
|
74
|
-
### Your Application
|
75
|
-
|
76
|
-
This is where all of your code lives and only has a few _requirements_:
|
77
|
-
|
78
|
-
1. It is a base application written in Python.
|
79
|
-
1. Scheduled tasks and Pipeline nodes must be Python Methods that accept
|
80
|
-
at least one positional argument: `event`
|
81
|
-
1. A `sermos.yaml` file, which is a configuration file for running scheduled tasks and pipelines.
|
82
|
-
|
83
|
-
## Celery
|
84
|
-
|
85
|
-
Pypelines provides sensical default configurations for the use of
|
86
|
-
[Celery](http://www.celeryproject.org/). The default deployment uses RabbitMQ,
|
87
|
-
and is recommended. This library can be implemented in any other workflow
|
88
|
-
(e.g. Kafka) as desired.
|
89
|
-
|
90
|
-
There are two core aspects of Celery that pypeline handles and differ from a
|
91
|
-
standard Celery deployment.
|
92
|
-
|
93
|
-
### ChainedTask
|
94
|
-
|
95
|
-
In `celery.py` when imported it will configure Celery and also run
|
96
|
-
`GenerateCeleryTasks().generate()`, which will use the `sermos.yaml` config
|
97
|
-
to turn customer methods into decorated Celery tasks.
|
98
|
-
|
99
|
-
Part of this process includes adding `ChainedTask` as the _base_ for all of
|
100
|
-
these dynamically generated tasks.
|
101
|
-
|
102
|
-
`ChainedTask` is a Celery `Task` that injects `tools` and `event` into the
|
103
|
-
signature of all dynamically generated tasks.
|
104
|
-
|
105
|
-
### SermosScheduler
|
106
|
-
|
107
|
-
We allow users to set new scheduled / recurring tasks on-the-fly. Celery's
|
108
|
-
default `beat_scheduler` does not support this behavior and would require the
|
109
|
-
Beat process be killed/restarted upon every change. Instead, we set our
|
110
|
-
custom `sermos.celery_beat:SermosScheduler` as the `beat_scheduler`,
|
111
|
-
which takes care of watching the database for new/modified entries and reloads
|
112
|
-
dynamically.
|
113
|
-
|
114
|
-
## Workers / Tasks / Pipeline Nodes
|
115
|
-
|
116
|
-
PypeLine handles decorating the tasks, generating the correct Celery
|
117
|
-
chains, etc.
|
118
|
-
|
119
|
-
Customer code has one requirement: write a python method that accepts one
|
120
|
-
positional argument: `event`
|
121
|
-
|
122
|
-
e.g.
|
123
|
-
|
124
|
-
def demo_pipeline_node_a(event):
|
125
|
-
logger.info(f"RUNNING demo_pipeline_node_a: {event}")
|
126
|
-
return
|
127
|
-
|
128
|
-
### Generators
|
129
|
-
|
130
|
-
_TODO_: This needs to be updated both in code and documentation. Leaving here
|
131
|
-
because it's valuable to update in the future.
|
132
|
-
|
133
|
-
A common task associated with processing batches of documents is generating
|
134
|
-
the list of files to process. `pypeline.generators` contains two helpful
|
135
|
-
classes to generate lists of files from S3 and from a local file system.
|
136
|
-
|
137
|
-
`S3KeyGenerator` will produce a list of object keys in S3. Example:
|
138
|
-
|
139
|
-
gen = S3KeyGenerator('access-key', 'secret-key')
|
140
|
-
files = gen.list_files(
|
141
|
-
'bucket-name',
|
142
|
-
'folder/path/',
|
143
|
-
offset=0,
|
144
|
-
limit=4,
|
145
|
-
return_full_path=False
|
146
|
-
)
|
147
|
-
|
148
|
-
`LocalKeyGenerator` will produce a list of file names on a local file system.
|
149
|
-
Example:
|
150
|
-
|
151
|
-
gen = LocalKeyGenerator()
|
152
|
-
files = gen.list_files('/path/to/list/')
|
153
|
-
|
154
|
-
## Testing
|
155
|
-
|
156
|
-
If you are developing pypeline and want to test this package,
|
157
|
-
install the test dependencies:
|
158
|
-
|
159
|
-
$ pip install -e .[test]
|
160
|
-
|
161
|
-
Now, run the tests:
|
162
|
-
|
163
|
-
$ tox
|
@@ -1,33 +0,0 @@
|
|
1
|
-
pypeline/__init__.py,sha256=NcjoxDLYOmbp4fP2sdOI7frUmFsP_OmIRLppFUOMwJQ,22
|
2
|
-
pypeline/celery.py,sha256=rbMCQQqevhdcyFYJSyoQT2b_NAbL3Inc9S789AtzN_w,9038
|
3
|
-
pypeline/celery_beat.py,sha256=KNmEpZEFOadVTwtRJtgX0AirSSNq65PFLJer4Hsq-xw,10759
|
4
|
-
pypeline/constants.py,sha256=0FLdlV8VZ3FtoTJiriCH8-YQHJuvoSLYYi_4QXHVQYg,4148
|
5
|
-
pypeline/extensions.py,sha256=_tv62NtJDrwQUM9mablHUmYyBf8TXlIZmf1LM1rlHtc,599
|
6
|
-
pypeline/generators.py,sha256=kRcJCohOMMljar6_nAaICTdNX1cNGyfkSvtl52Pd_hc,6776
|
7
|
-
pypeline/logging_config.py,sha256=QbUbSqLtxUOlqLkY0sWxVe9VGFKjghZ-MrlduZmtgLw,5503
|
8
|
-
pypeline/pipeline_config_schema.py,sha256=ggVs_cc8KVjKI4jg2TwKqoX82Nt2ET3oUbWk8o1agI4,8900
|
9
|
-
pypeline/schedule_config_schema.py,sha256=rGN-tGC9fj-Nc_CVasBUQAOzn2OvYyj0V_S6Az3sq6I,7385
|
10
|
-
pypeline/sermos_yaml.py,sha256=A0rFabU0ib9-IY64HZuYvB7sL06sTl84myTYmNSLK3g,17336
|
11
|
-
pypeline/flask/__init__.py,sha256=_kh1kQcpuU7yNYH36AtMsaCMYru5l43lYsRBe0lTgKE,523
|
12
|
-
pypeline/flask/decorators.py,sha256=zH9OB6DqxLUDSDBMs1Nd3Pt3qYdsAOoD8RL2MliRyRs,2298
|
13
|
-
pypeline/flask/flask_sermos.py,sha256=D-mTlJENm8MCYgNmCFwHeZb43XoUBsTO8ER1YqnSL3M,5791
|
14
|
-
pypeline/flask/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
15
|
-
pypeline/flask/api/pipelines.py,sha256=AT1he7IpykUiDDj7B2tKT0t4wwWQcBZIEeFXWOSHAYU,9026
|
16
|
-
pypeline/flask/api/schedules.py,sha256=AHiGYzZL__1sq5KZho75VwPWCn9Pz_ooNM179uKuQ7Q,2314
|
17
|
-
pypeline/flask/api/utils.py,sha256=bkUBZIbJHaG9nYahHI2J0NqoqoSlQ6j4aArY9q2ggqE,1302
|
18
|
-
pypeline/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
|
-
pypeline/pipeline/chained_task.py,sha256=G7pakmdUZK29E9X3_I5I2cTxTcEuRlXi_BeDAMJt8Qg,2961
|
20
|
-
pypeline/pipeline/generator.py,sha256=Nt65IIs5jc2XCQqWUaOoJIdpaEvQvxJgQSnj9DuYX3s,11509
|
21
|
-
pypeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
pypeline/utils/config_utils.py,sha256=SwpYy6Xk2apDK2GEb61ctHJh7q8IUo3bHOegE_xsDDk,13029
|
23
|
-
pypeline/utils/graph_utils.py,sha256=prs0ATCYCxWGWx8s9_gg3PtwJA1DORIIwUlMfKRjHJM,4642
|
24
|
-
pypeline/utils/module_utils.py,sha256=X4O2TdBvvoboK6PxzK18UuKbod9l2BfTIrALk_dI0tM,4166
|
25
|
-
pypeline/utils/task_utils.py,sha256=p66d4xcuPuBsqtcnaXgqfNXU3-ZSe4lvN11MkJ5_8XY,22222
|
26
|
-
tests/fixtures/__init__.py,sha256=vHbv5BMJXGb6XX764sChg5Ax7fixPuijiYNBuxgVTUQ,41
|
27
|
-
tests/fixtures/s3_fixtures.py,sha256=jbsp0WeIibLtjdV1nPSkEuJf1n6e9O7LO-kNFkMqylo,1694
|
28
|
-
scalable_pypeline-1.2.3.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
|
29
|
-
scalable_pypeline-1.2.3.dist-info/METADATA,sha256=mt_3JLC-Ux3KNwue-BwQtRIz2ITMz7j2D3x-J999Yj4,6174
|
30
|
-
scalable_pypeline-1.2.3.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
31
|
-
scalable_pypeline-1.2.3.dist-info/entry_points.txt,sha256=ZDh7vdDaHrZD0RwUCiZidXg5-d2fBOYcEo7E6CL4g0U,56
|
32
|
-
scalable_pypeline-1.2.3.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
|
33
|
-
scalable_pypeline-1.2.3.dist-info/RECORD,,
|
tests/fixtures/s3_fixtures.py
DELETED
@@ -1,52 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import pytest
|
3
|
-
import json
|
4
|
-
from boto3 import Session
|
5
|
-
|
6
|
-
|
7
|
-
@pytest.fixture(scope="session")
|
8
|
-
def source_documents(dummy_bucket):
|
9
|
-
session = Session(
|
10
|
-
aws_access_key_id='foo',
|
11
|
-
aws_secret_access_key='bar',
|
12
|
-
region_name='us-east-1'
|
13
|
-
)
|
14
|
-
s3 = session.client('s3')
|
15
|
-
# Add PDFs
|
16
|
-
for f_key in os.listdir('tests/fixtures/pdfs'):
|
17
|
-
if f_key == '.DS_Store':
|
18
|
-
continue
|
19
|
-
with open('tests/fixtures/pdfs/' + f_key, 'rb') as f:
|
20
|
-
object_key = 'pdfs/' + f_key
|
21
|
-
s3.upload_fileobj(f, dummy_bucket.name, object_key)
|
22
|
-
|
23
|
-
# Add 1 hidden file for testing.
|
24
|
-
dstore = 'foo'
|
25
|
-
with open(dstore, 'w') as newfile:
|
26
|
-
newfile.write('bar')
|
27
|
-
s3.upload_file(dstore, dummy_bucket.name, 'pdfs/.DS_Store')
|
28
|
-
os.remove(dstore)
|
29
|
-
|
30
|
-
# Add TIFFs
|
31
|
-
for f_key in os.listdir('tests/fixtures/tiffs'):
|
32
|
-
if f_key == '.DS_Store':
|
33
|
-
continue
|
34
|
-
with open('tests/fixtures/tiffs/' + f_key, 'rb') as f:
|
35
|
-
object_key = 'tiffs/' + f_key
|
36
|
-
s3.upload_fileobj(f, dummy_bucket.name, object_key)
|
37
|
-
|
38
|
-
# Add PNGs
|
39
|
-
for f_key in os.listdir('tests/fixtures/pngs'):
|
40
|
-
if f_key == '.DS_Store':
|
41
|
-
continue
|
42
|
-
with open('tests/fixtures/pngs/' + f_key, 'rb') as f:
|
43
|
-
object_key = 'pngs/' + f_key
|
44
|
-
s3.upload_fileobj(f, dummy_bucket.name, object_key)
|
45
|
-
|
46
|
-
# Add HTML files
|
47
|
-
for f_key in os.listdir('tests/fixtures/html'):
|
48
|
-
if f_key == '.DS_Store':
|
49
|
-
continue
|
50
|
-
with open('tests/fixtures/html/' + f_key, 'rb') as f:
|
51
|
-
object_key = 'html/' + f_key
|
52
|
-
s3.upload_fileobj(f, dummy_bucket.name, object_key)
|
File without changes
|
File without changes
|
File without changes
|