scalable-pypeline 1.2.3__py2.py3-none-any.whl → 2.0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pypeline/__init__.py +1 -1
  2. pypeline/barrier.py +34 -0
  3. pypeline/composition.py +348 -0
  4. pypeline/constants.py +51 -84
  5. pypeline/dramatiq.py +470 -0
  6. pypeline/extensions.py +9 -8
  7. pypeline/flask/__init__.py +3 -5
  8. pypeline/flask/api/pipelines.py +109 -148
  9. pypeline/flask/api/schedules.py +14 -39
  10. pypeline/flask/decorators.py +18 -53
  11. pypeline/flask/flask_pypeline.py +156 -0
  12. pypeline/middleware.py +61 -0
  13. pypeline/pipeline_config_schema.py +105 -92
  14. pypeline/pypeline_yaml.py +458 -0
  15. pypeline/schedule_config_schema.py +35 -120
  16. pypeline/utils/config_utils.py +52 -310
  17. pypeline/utils/module_utils.py +35 -71
  18. pypeline/utils/pipeline_utils.py +161 -0
  19. scalable_pypeline-2.0.1.dist-info/METADATA +217 -0
  20. scalable_pypeline-2.0.1.dist-info/RECORD +27 -0
  21. scalable_pypeline-2.0.1.dist-info/entry_points.txt +3 -0
  22. tests/fixtures/__init__.py +0 -1
  23. pypeline/celery.py +0 -206
  24. pypeline/celery_beat.py +0 -254
  25. pypeline/flask/api/utils.py +0 -35
  26. pypeline/flask/flask_sermos.py +0 -156
  27. pypeline/generators.py +0 -196
  28. pypeline/logging_config.py +0 -171
  29. pypeline/pipeline/__init__.py +0 -0
  30. pypeline/pipeline/chained_task.py +0 -70
  31. pypeline/pipeline/generator.py +0 -254
  32. pypeline/sermos_yaml.py +0 -442
  33. pypeline/utils/graph_utils.py +0 -144
  34. pypeline/utils/task_utils.py +0 -552
  35. scalable_pypeline-1.2.3.dist-info/METADATA +0 -163
  36. scalable_pypeline-1.2.3.dist-info/RECORD +0 -33
  37. scalable_pypeline-1.2.3.dist-info/entry_points.txt +0 -2
  38. tests/fixtures/s3_fixtures.py +0 -52
  39. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.1.dist-info}/LICENSE +0 -0
  40. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.1.dist-info}/WHEEL +0 -0
  41. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.1.dist-info}/top_level.txt +0 -0
@@ -1,163 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: scalable-pypeline
3
- Version: 1.2.3
4
- Summary: PypeLine - Python pipelines for the Real World
5
- Home-page: https://gitlab.com/bravos2/pypeline
6
- Author: Bravos Power Corporation
7
- License: Apache License 2.0
8
- Description-Content-Type: text/markdown
9
- License-File: LICENSE
10
- Requires-Dist: PyYAML (<6,>=5.2)
11
- Requires-Dist: click (==8.0.4)
12
- Requires-Dist: marshmallow (<4,>=3.2.1)
13
- Requires-Dist: requests (>=2.24.0)
14
- Requires-Dist: redis (<5,>=4.5.4)
15
- Requires-Dist: rhodb[redis] (<6,>=5.1.1)
16
- Requires-Dist: attrs (<20,>=19)
17
- Requires-Dist: boto3 (<2,>=1.11)
18
- Requires-Dist: croniter (<2,>=1.0.15)
19
- Requires-Dist: celery-dyrygent (==0.8.0)
20
- Requires-Dist: itsdangerous (==2.0.1)
21
- Provides-Extra: build
22
- Requires-Dist: wheel ; extra == 'build'
23
- Requires-Dist: twine ; extra == 'build'
24
- Provides-Extra: dev
25
- Requires-Dist: honcho (>=1.0.1) ; extra == 'dev'
26
- Requires-Dist: awscli (>=1.11) ; extra == 'dev'
27
- Requires-Dist: pylint (>=2.5.3) ; extra == 'dev'
28
- Requires-Dist: pip-licenses ; extra == 'dev'
29
- Provides-Extra: flask
30
- Requires-Dist: Werkzeug (==2.0.3) ; extra == 'flask'
31
- Requires-Dist: Flask (<2,>=1.1.2) ; extra == 'flask'
32
- Requires-Dist: flask-smorest (<0.29,>=0.23.0) ; extra == 'flask'
33
- Requires-Dist: Jinja2 (==3.0.3) ; extra == 'flask'
34
- Provides-Extra: test
35
- Requires-Dist: pytest-cov (<3,>=2.6.1) ; extra == 'test'
36
- Requires-Dist: tox (<4,>=3.14.1) ; extra == 'test'
37
- Requires-Dist: mock (<2,>=1) ; extra == 'test'
38
- Requires-Dist: moto (>=1.3.16) ; extra == 'test'
39
- Requires-Dist: responses (<0.11,>=0.10.16) ; extra == 'test'
40
- Requires-Dist: fakeredis (<3,>=2.10.3) ; extra == 'test'
41
- Requires-Dist: importlib-metadata (<5,>=4.12) ; extra == 'test'
42
- Provides-Extra: web
43
- Requires-Dist: gunicorn ; extra == 'web'
44
- Requires-Dist: gevent (<22,>=21.12.0) ; extra == 'web'
45
- Provides-Extra: workers
46
- Requires-Dist: celery[redis] (<6,>=5.1.2) ; extra == 'workers'
47
- Requires-Dist: networkx (>=2.4) ; extra == 'workers'
48
-
49
- # PypeLine
50
-
51
- ## Quickstart
52
-
53
- 1. Add `pypeline` as a dependency to your Python application
54
- 1. Install extras depending on what you are building:
55
-
56
- 1. `flask` - Convenient interface for Flask applications
57
- 1. `web` - Some standard web server dependencies we like
58
- 1. `workers` - Installs [Celery](https://docs.celeryproject.org/en/stable/getting-started/introduction.html) and [networkx](https://networkx.org/documentation/stable/index.html), which are required if using pipelines.
59
-
60
- ## Overview
61
-
62
- PypeLines is a fork of [Sermos] (https://gitlab.com/sermos/sermos). PypeLines diverges from Sermos as a SAAS platform and is intented as a suite for job management in conjuction with or indepent from a Flask Web App. Common job management workflow's include running pipelines, scheduled tasks, and other various types of jobs. Pypelines is designed to make these systems faster and more intuitive to create for Python developers.
63
-
64
- Under the hood we are simply extending various Celery capabilities like their existing complex workflows and make them suitable for large scale pipelines that can be run in production. To do this PypeLines uses a custom Celery configuration and a library known as [Celery-Dyrygent](https://github.com/ovh/celery-dyrygent) to help orchestrate thousands of tasks at once.
65
-
66
- ### Pypeline
67
-
68
- - Celery Configuration
69
- - Pipelines
70
- - CronJobs
71
- - APIs
72
- - Utilities
73
-
74
- ### Your Application
75
-
76
- This is where all of your code lives and only has a few _requirements_:
77
-
78
- 1. It is a base application written in Python.
79
- 1. Scheduled tasks and Pipeline nodes must be Python Methods that accept
80
- at least one positional argument: `event`
81
- 1. A `sermos.yaml` file, which is a configuration file for running scheduled tasks and pipelines.
82
-
83
- ## Celery
84
-
85
- Pypelines provides sensical default configurations for the use of
86
- [Celery](http://www.celeryproject.org/). The default deployment uses RabbitMQ,
87
- and is recommended. This library can be implemented in any other workflow
88
- (e.g. Kafka) as desired.
89
-
90
- There are two core aspects of Celery that pypeline handles and differ from a
91
- standard Celery deployment.
92
-
93
- ### ChainedTask
94
-
95
- In `celery.py` when imported it will configure Celery and also run
96
- `GenerateCeleryTasks().generate()`, which will use the `sermos.yaml` config
97
- to turn customer methods into decorated Celery tasks.
98
-
99
- Part of this process includes adding `ChainedTask` as the _base_ for all of
100
- these dynamically generated tasks.
101
-
102
- `ChainedTask` is a Celery `Task` that injects `tools` and `event` into the
103
- signature of all dynamically generated tasks.
104
-
105
- ### SermosScheduler
106
-
107
- We allow users to set new scheduled / recurring tasks on-the-fly. Celery's
108
- default `beat_scheduler` does not support this behavior and would require the
109
- Beat process be killed/restarted upon every change. Instead, we set our
110
- custom `sermos.celery_beat:SermosScheduler` as the `beat_scheduler`,
111
- which takes care of watching the database for new/modified entries and reloads
112
- dynamically.
113
-
114
- ## Workers / Tasks / Pipeline Nodes
115
-
116
- PypeLine handles decorating the tasks, generating the correct Celery
117
- chains, etc.
118
-
119
- Customer code has one requirement: write a python method that accepts one
120
- positional argument: `event`
121
-
122
- e.g.
123
-
124
- def demo_pipeline_node_a(event):
125
- logger.info(f"RUNNING demo_pipeline_node_a: {event}")
126
- return
127
-
128
- ### Generators
129
-
130
- _TODO_: This needs to be updated both in code and documentation. Leaving here
131
- because it's valuable to update in the future.
132
-
133
- A common task associated with processing batches of documents is generating
134
- the list of files to process. `pypeline.generators` contains two helpful
135
- classes to generate lists of files from S3 and from a local file system.
136
-
137
- `S3KeyGenerator` will produce a list of object keys in S3. Example:
138
-
139
- gen = S3KeyGenerator('access-key', 'secret-key')
140
- files = gen.list_files(
141
- 'bucket-name',
142
- 'folder/path/',
143
- offset=0,
144
- limit=4,
145
- return_full_path=False
146
- )
147
-
148
- `LocalKeyGenerator` will produce a list of file names on a local file system.
149
- Example:
150
-
151
- gen = LocalKeyGenerator()
152
- files = gen.list_files('/path/to/list/')
153
-
154
- ## Testing
155
-
156
- If you are developing pypeline and want to test this package,
157
- install the test dependencies:
158
-
159
- $ pip install -e .[test]
160
-
161
- Now, run the tests:
162
-
163
- $ tox
@@ -1,33 +0,0 @@
1
- pypeline/__init__.py,sha256=NcjoxDLYOmbp4fP2sdOI7frUmFsP_OmIRLppFUOMwJQ,22
2
- pypeline/celery.py,sha256=rbMCQQqevhdcyFYJSyoQT2b_NAbL3Inc9S789AtzN_w,9038
3
- pypeline/celery_beat.py,sha256=KNmEpZEFOadVTwtRJtgX0AirSSNq65PFLJer4Hsq-xw,10759
4
- pypeline/constants.py,sha256=0FLdlV8VZ3FtoTJiriCH8-YQHJuvoSLYYi_4QXHVQYg,4148
5
- pypeline/extensions.py,sha256=_tv62NtJDrwQUM9mablHUmYyBf8TXlIZmf1LM1rlHtc,599
6
- pypeline/generators.py,sha256=kRcJCohOMMljar6_nAaICTdNX1cNGyfkSvtl52Pd_hc,6776
7
- pypeline/logging_config.py,sha256=QbUbSqLtxUOlqLkY0sWxVe9VGFKjghZ-MrlduZmtgLw,5503
8
- pypeline/pipeline_config_schema.py,sha256=ggVs_cc8KVjKI4jg2TwKqoX82Nt2ET3oUbWk8o1agI4,8900
9
- pypeline/schedule_config_schema.py,sha256=rGN-tGC9fj-Nc_CVasBUQAOzn2OvYyj0V_S6Az3sq6I,7385
10
- pypeline/sermos_yaml.py,sha256=A0rFabU0ib9-IY64HZuYvB7sL06sTl84myTYmNSLK3g,17336
11
- pypeline/flask/__init__.py,sha256=_kh1kQcpuU7yNYH36AtMsaCMYru5l43lYsRBe0lTgKE,523
12
- pypeline/flask/decorators.py,sha256=zH9OB6DqxLUDSDBMs1Nd3Pt3qYdsAOoD8RL2MliRyRs,2298
13
- pypeline/flask/flask_sermos.py,sha256=D-mTlJENm8MCYgNmCFwHeZb43XoUBsTO8ER1YqnSL3M,5791
14
- pypeline/flask/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- pypeline/flask/api/pipelines.py,sha256=AT1he7IpykUiDDj7B2tKT0t4wwWQcBZIEeFXWOSHAYU,9026
16
- pypeline/flask/api/schedules.py,sha256=AHiGYzZL__1sq5KZho75VwPWCn9Pz_ooNM179uKuQ7Q,2314
17
- pypeline/flask/api/utils.py,sha256=bkUBZIbJHaG9nYahHI2J0NqoqoSlQ6j4aArY9q2ggqE,1302
18
- pypeline/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
- pypeline/pipeline/chained_task.py,sha256=G7pakmdUZK29E9X3_I5I2cTxTcEuRlXi_BeDAMJt8Qg,2961
20
- pypeline/pipeline/generator.py,sha256=Nt65IIs5jc2XCQqWUaOoJIdpaEvQvxJgQSnj9DuYX3s,11509
21
- pypeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- pypeline/utils/config_utils.py,sha256=SwpYy6Xk2apDK2GEb61ctHJh7q8IUo3bHOegE_xsDDk,13029
23
- pypeline/utils/graph_utils.py,sha256=prs0ATCYCxWGWx8s9_gg3PtwJA1DORIIwUlMfKRjHJM,4642
24
- pypeline/utils/module_utils.py,sha256=X4O2TdBvvoboK6PxzK18UuKbod9l2BfTIrALk_dI0tM,4166
25
- pypeline/utils/task_utils.py,sha256=p66d4xcuPuBsqtcnaXgqfNXU3-ZSe4lvN11MkJ5_8XY,22222
26
- tests/fixtures/__init__.py,sha256=vHbv5BMJXGb6XX764sChg5Ax7fixPuijiYNBuxgVTUQ,41
27
- tests/fixtures/s3_fixtures.py,sha256=jbsp0WeIibLtjdV1nPSkEuJf1n6e9O7LO-kNFkMqylo,1694
28
- scalable_pypeline-1.2.3.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
29
- scalable_pypeline-1.2.3.dist-info/METADATA,sha256=mt_3JLC-Ux3KNwue-BwQtRIz2ITMz7j2D3x-J999Yj4,6174
30
- scalable_pypeline-1.2.3.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
31
- scalable_pypeline-1.2.3.dist-info/entry_points.txt,sha256=ZDh7vdDaHrZD0RwUCiZidXg5-d2fBOYcEo7E6CL4g0U,56
32
- scalable_pypeline-1.2.3.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
33
- scalable_pypeline-1.2.3.dist-info/RECORD,,
@@ -1,2 +0,0 @@
1
- [console_scripts]
2
- pypeline = pypeline.cli.core:pypeline
@@ -1,52 +0,0 @@
1
- import os
2
- import pytest
3
- import json
4
- from boto3 import Session
5
-
6
-
7
- @pytest.fixture(scope="session")
8
- def source_documents(dummy_bucket):
9
- session = Session(
10
- aws_access_key_id='foo',
11
- aws_secret_access_key='bar',
12
- region_name='us-east-1'
13
- )
14
- s3 = session.client('s3')
15
- # Add PDFs
16
- for f_key in os.listdir('tests/fixtures/pdfs'):
17
- if f_key == '.DS_Store':
18
- continue
19
- with open('tests/fixtures/pdfs/' + f_key, 'rb') as f:
20
- object_key = 'pdfs/' + f_key
21
- s3.upload_fileobj(f, dummy_bucket.name, object_key)
22
-
23
- # Add 1 hidden file for testing.
24
- dstore = 'foo'
25
- with open(dstore, 'w') as newfile:
26
- newfile.write('bar')
27
- s3.upload_file(dstore, dummy_bucket.name, 'pdfs/.DS_Store')
28
- os.remove(dstore)
29
-
30
- # Add TIFFs
31
- for f_key in os.listdir('tests/fixtures/tiffs'):
32
- if f_key == '.DS_Store':
33
- continue
34
- with open('tests/fixtures/tiffs/' + f_key, 'rb') as f:
35
- object_key = 'tiffs/' + f_key
36
- s3.upload_fileobj(f, dummy_bucket.name, object_key)
37
-
38
- # Add PNGs
39
- for f_key in os.listdir('tests/fixtures/pngs'):
40
- if f_key == '.DS_Store':
41
- continue
42
- with open('tests/fixtures/pngs/' + f_key, 'rb') as f:
43
- object_key = 'pngs/' + f_key
44
- s3.upload_fileobj(f, dummy_bucket.name, object_key)
45
-
46
- # Add HTML files
47
- for f_key in os.listdir('tests/fixtures/html'):
48
- if f_key == '.DS_Store':
49
- continue
50
- with open('tests/fixtures/html/' + f_key, 'rb') as f:
51
- object_key = 'html/' + f_key
52
- s3.upload_fileobj(f, dummy_bucket.name, object_key)