scalable-pypeline 1.2.3__py2.py3-none-any.whl → 2.0.2__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. pypeline/__init__.py +1 -1
  2. pypeline/barrier.py +34 -0
  3. pypeline/composition.py +349 -0
  4. pypeline/constants.py +51 -84
  5. pypeline/dramatiq.py +470 -0
  6. pypeline/extensions.py +9 -8
  7. pypeline/flask/__init__.py +3 -5
  8. pypeline/flask/api/pipelines.py +109 -148
  9. pypeline/flask/api/schedules.py +14 -39
  10. pypeline/flask/decorators.py +18 -53
  11. pypeline/flask/flask_pypeline.py +156 -0
  12. pypeline/middleware.py +61 -0
  13. pypeline/pipeline_config_schema.py +105 -92
  14. pypeline/pypeline_yaml.py +458 -0
  15. pypeline/schedule_config_schema.py +35 -120
  16. pypeline/utils/config_utils.py +52 -310
  17. pypeline/utils/module_utils.py +35 -71
  18. pypeline/utils/pipeline_utils.py +161 -0
  19. scalable_pypeline-2.0.2.dist-info/METADATA +217 -0
  20. scalable_pypeline-2.0.2.dist-info/RECORD +27 -0
  21. scalable_pypeline-2.0.2.dist-info/entry_points.txt +3 -0
  22. tests/fixtures/__init__.py +0 -1
  23. pypeline/celery.py +0 -206
  24. pypeline/celery_beat.py +0 -254
  25. pypeline/flask/api/utils.py +0 -35
  26. pypeline/flask/flask_sermos.py +0 -156
  27. pypeline/generators.py +0 -196
  28. pypeline/logging_config.py +0 -171
  29. pypeline/pipeline/__init__.py +0 -0
  30. pypeline/pipeline/chained_task.py +0 -70
  31. pypeline/pipeline/generator.py +0 -254
  32. pypeline/sermos_yaml.py +0 -442
  33. pypeline/utils/graph_utils.py +0 -144
  34. pypeline/utils/task_utils.py +0 -552
  35. scalable_pypeline-1.2.3.dist-info/METADATA +0 -163
  36. scalable_pypeline-1.2.3.dist-info/RECORD +0 -33
  37. scalable_pypeline-1.2.3.dist-info/entry_points.txt +0 -2
  38. tests/fixtures/s3_fixtures.py +0 -52
  39. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/LICENSE +0 -0
  40. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/WHEEL +0 -0
  41. {scalable_pypeline-1.2.3.dist-info → scalable_pypeline-2.0.2.dist-info}/top_level.txt +0 -0
@@ -2,36 +2,33 @@
2
2
  """
3
3
  import logging
4
4
 
5
- from celery.canvas import _chain
6
- from celery_dyrygent.workflows import Workflow
7
- from flask import jsonify, request, abort
5
+ from flask import jsonify
8
6
  from flask_smorest import Blueprint
9
7
  from flask.views import MethodView
10
8
  from marshmallow import Schema, fields
11
9
  from marshmallow.exceptions import ValidationError
12
- from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS, API_PATH_V1, \
13
- WORKFLOW_PROCESSOR_DEFAULT_QUEUE
10
+ from webargs.flaskparser import abort
11
+
12
+ from pypeline.composition import PipelineResult
13
+ from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS, API_PATH_V1
14
+ from pypeline.utils.pipeline_utils import dag_generator
14
15
  from pypeline.flask.decorators import require_accesskey
15
- from pypeline.flask.api.utils import chain_helper
16
- from pypeline.utils.task_utils import PipelineResult
17
16
  from pypeline.utils.config_utils import retrieve_latest_pipeline_config
18
17
  from pypeline.pipeline_config_schema import BasePipelineSchema, PipelineSchemaV1
19
18
 
20
19
  logger = logging.getLogger(__name__)
21
- bp = Blueprint('pipelines', __name__, url_prefix=API_PATH_V1 + '/pipelines')
20
+ bp = Blueprint("pipelines", __name__, url_prefix=API_PATH_V1 + "/pipelines")
22
21
 
23
22
 
24
23
  class InvokePipelineSchema(Schema):
25
- """ Incoming schema for invoking a pipeline
26
- """
24
+ """Incoming schema for invoking a pipeline"""
25
+
27
26
  chain_payload = fields.Raw(
28
- description='Payload contains whatever arguments the pipeline expects '
29
- 'to be passed to each node in the graph.',
30
- example={
31
- 'document_id': '123',
32
- 'send_alert': True
33
- },
34
- required=False)
27
+ description="Payload contains whatever arguments the pipeline expects "
28
+ "to be passed to each node in the graph.",
29
+ example={"document_id": "123", "send_alert": True},
30
+ required=False,
31
+ )
35
32
 
36
33
 
37
34
  class InvokePipelineResponseSchema(Schema):
@@ -49,20 +46,19 @@ class GetPipelineResultResponseSchema(Schema):
49
46
  status_message = fields.String()
50
47
 
51
48
 
52
- @bp.route('/')
49
+ @bp.route("/")
53
50
  class Pipelines(MethodView):
54
- """ Operations against all pipelines.
55
- """
51
+ """Operations against all pipelines."""
52
+
56
53
  @require_accesskey
57
- @bp.doc(responses=API_DOC_RESPONSES,
58
- parameters=[API_DOC_PARAMS['accesskey']],
59
- tags=['Pipelines'])
54
+ @bp.doc(
55
+ responses=API_DOC_RESPONSES,
56
+ parameters=[API_DOC_PARAMS["accesskey"]],
57
+ tags=["Pipelines"],
58
+ )
60
59
  def get(self):
61
- """ Retrieve list of available pipelines.
62
- """
63
- access_key = request.headers.get('accesskey')
64
- pipeline_config_api_resp = retrieve_latest_pipeline_config(
65
- access_key=access_key)
60
+ """Retrieve list of available pipelines."""
61
+ pipeline_config_api_resp = retrieve_latest_pipeline_config()
66
62
 
67
63
  if pipeline_config_api_resp is None:
68
64
  abort(404)
@@ -70,41 +66,41 @@ class Pipelines(MethodView):
70
66
  try:
71
67
  pipelines = []
72
68
  for p in pipeline_config_api_resp:
73
- PipelineSchema = \
74
- BasePipelineSchema.get_by_version(p['schemaVersion'])
69
+ PipelineSchema = BasePipelineSchema.get_by_version(p["schemaVersion"])
75
70
  pipeline_config = PipelineSchema().load(p)
76
71
  pipelines.append(pipeline_config)
77
72
  except ValidationError as e:
78
73
  msg = f"Invalid pipeline configuration: {e}"
79
- return jsonify({'message': msg}), 202
74
+ return jsonify({"message": msg}), 202
80
75
 
81
76
  return jsonify(pipelines)
82
77
 
83
78
 
84
- @bp.route('/<string:pipeline_id>')
79
+ @bp.route("/<string:pipeline_id>")
85
80
  class PipelineInfo(MethodView):
86
- """ Operations against a single pipeline
87
- """
81
+ """Operations against a single pipeline"""
82
+
88
83
  @require_accesskey
89
- @bp.doc(responses=API_DOC_RESPONSES,
90
- parameters=[
91
- API_DOC_PARAMS['accesskey'], {
92
- 'in': 'path',
93
- 'name': 'pipeline_id',
94
- 'description':
95
- 'pipeline_id for which to retrieve metrics.',
96
- 'type': 'string',
97
- 'example': 'my_pipeline',
98
- 'required': True
99
- }
100
- ],
101
- tags=['Pipelines'])
84
+ @bp.doc(
85
+ responses=API_DOC_RESPONSES,
86
+ parameters=[
87
+ API_DOC_PARAMS["accesskey"],
88
+ {
89
+ "in": "path",
90
+ "name": "pipeline_id",
91
+ "description": "pipeline_id for which to retrieve metrics.",
92
+ "type": "string",
93
+ "example": "my_pipeline",
94
+ "required": True,
95
+ },
96
+ ],
97
+ tags=["Pipelines"],
98
+ )
102
99
  def get(self, pipeline_id: str):
103
- """ Retrieve details about a specific pipeline.
104
- """
105
- access_key = request.headers.get('accesskey')
100
+ """Retrieve details about a specific pipeline."""
106
101
  pipeline_config_api_resp = retrieve_latest_pipeline_config(
107
- pipeline_id=pipeline_id, access_key=access_key)
102
+ pipeline_id=pipeline_id
103
+ )
108
104
 
109
105
  if pipeline_config_api_resp is None:
110
106
  abort(404)
@@ -113,83 +109,48 @@ class PipelineInfo(MethodView):
113
109
  pipeline_config = PipelineSchemaV1().load(pipeline_config_api_resp)
114
110
  except ValidationError as e:
115
111
  msg = f"Invalid pipeline configuration: {e}"
116
- return jsonify({'message': msg}), 202
112
+ return jsonify({"message": msg}), 202
117
113
 
118
114
  return jsonify(pipeline_config)
119
115
 
120
116
 
121
- @bp.route('/invoke/<string:pipeline_id>')
117
+ @bp.route("/invoke/<string:pipeline_id>")
122
118
  class PipelineInvoke(MethodView):
123
- """ Operations involed with pipeline invocation
124
- """
119
+ """Operations involed with pipeline invocation"""
120
+
125
121
  @require_accesskey
126
- @bp.doc(responses=API_DOC_RESPONSES,
127
- parameters=[
128
- API_DOC_PARAMS['accesskey'], {
129
- 'in': 'path',
130
- 'name': 'pipeline_id',
131
- 'description':
132
- 'pipeline_id for which to retrieve metrics.',
133
- 'type': 'string',
134
- 'example': 'my_pipeline',
135
- 'required': True
136
- }
137
- ],
138
- tags=['Pipelines'])
122
+ @bp.doc(
123
+ responses=API_DOC_RESPONSES,
124
+ parameters=[
125
+ API_DOC_PARAMS["accesskey"],
126
+ {
127
+ "in": "path",
128
+ "name": "pipeline_id",
129
+ "description": "pipeline_id for which to retrieve metrics.",
130
+ "type": "string",
131
+ "example": "my_pipeline",
132
+ "required": True,
133
+ },
134
+ ],
135
+ tags=["Pipelines"],
136
+ )
139
137
  @bp.arguments(InvokePipelineSchema)
140
138
  @bp.response(InvokePipelineResponseSchema)
141
139
  def post(self, payload: dict, pipeline_id: str):
142
- """ Invoke a pipeline by it's ID; optionally provide pipeline arguments.
143
- """
144
- access_key = request.headers.get('accesskey')
145
- pipeline_config = retrieve_latest_pipeline_config(
146
- pipeline_id=pipeline_id, access_key=access_key)
140
+ """Invoke a pipeline by it's ID; optionally provide pipeline arguments."""
141
+ pipeline_config = retrieve_latest_pipeline_config(pipeline_id=pipeline_id)
147
142
 
148
143
  if pipeline_config is None:
149
144
  return abort(404)
150
145
 
151
- retval = {'pipeline_id': pipeline_id, 'status': ''}
146
+ retval = {"pipeline_id": pipeline_id, "status": "starting"}
152
147
  try:
153
- # TODO - ideally we can validate the payload *at this stage*
154
- # before the chain is ever invoked so we can handle issues
155
- # without kicking off work.
156
- payload = payload['chain_payload']\
157
- if 'chain_payload' in payload else {}
158
-
159
- gen = chain_helper(pipeline_id=pipeline_id,
160
- access_key=access_key,
161
- chain_payload=payload)
162
-
163
- if gen.chain is None:
164
- abort(400, message=gen.loading_message)
165
-
166
- chain: _chain = gen.chain
167
- work_flow_processor_queue = (
168
- pipeline_config["config"]["metadata"]
169
- .get("processorQueue", None) or
170
- WORKFLOW_PROCESSOR_DEFAULT_QUEUE)
171
- wf: Workflow = Workflow({"queue": work_flow_processor_queue})
172
- wf.add_celery_canvas(chain)
173
- wf.apply_async()
174
-
175
- celery_task_status = []
176
- for node in wf.nodes:
177
- celery_task = dict(
178
- name=wf.nodes[node].signature.name,
179
- task_id=node,
180
- status="RUNNING",
181
- retries=0
182
- )
183
- celery_task_status.append(celery_task)
184
-
185
- gen.pipeline_wrapper.celery_task_status = celery_task_status
186
- gen.pipeline_wrapper.save_to_cache()
187
- retval['status'] = 'success'
188
- retval['execution_id'] = gen.execution_id
189
- # Initialize the cached result
190
- pr = PipelineResult(gen.execution_id, status='pending')
191
- pr.save()
192
-
148
+ payload = payload["chain_payload"] if "chain_payload" in payload else {}
149
+ pipeline = dag_generator(pipeline_id=pipeline_id, event=payload)
150
+ pipeline.run()
151
+ pipeline_result = PipelineResult(pipeline.execution_id)
152
+ pipeline_result.create_result_entry(pipeline.to_json())
153
+ retval["execution_id"] = pipeline.execution_id
193
154
  except Exception as e:
194
155
  msg = "Failed to invoke pipeline ... {}".format(pipeline_id)
195
156
  logger.error(msg)
@@ -201,61 +162,61 @@ class PipelineInvoke(MethodView):
201
162
 
202
163
  results_responses = API_DOC_RESPONSES.copy()
203
164
  results_responses[202] = {
204
- 'code': 202,
205
- 'description': 'Pipeline is still running. Try again later.'
165
+ "code": 202,
166
+ "description": "Pipeline is still running. Try again later.",
206
167
  }
207
168
  results_responses[204] = {
208
- 'code': 204,
209
- 'description': 'The execution results have expired. Re-run pipeline.'
169
+ "code": 204,
170
+ "description": "The execution results have expired. Re-run pipeline.",
210
171
  }
211
172
 
212
173
 
213
- @bp.route('/results/<string:execution_id>')
174
+ @bp.route("/results/<string:execution_id>")
214
175
  class PipelineResults(MethodView):
215
- """ Operations with respect to pipeline results
216
- """
176
+ """Operations with respect to pipeline results"""
177
+
217
178
  @require_accesskey
218
- @bp.doc(responses=results_responses,
219
- parameters=[
220
- API_DOC_PARAMS['accesskey'], {
221
- 'in': 'path',
222
- 'name': 'execution_id',
223
- 'description':
224
- 'execution_id for which to retrieve results',
225
- 'type': 'string',
226
- 'example': '4c595cca-9bf1-4150-8c34-6b43faf276c8',
227
- 'required': True
228
- }
229
- ],
230
- tags=['Pipelines'])
179
+ @bp.doc(
180
+ responses=results_responses,
181
+ parameters=[
182
+ API_DOC_PARAMS["accesskey"],
183
+ {
184
+ "in": "path",
185
+ "name": "execution_id",
186
+ "description": "execution_id for which to retrieve results",
187
+ "type": "string",
188
+ "example": "4c595cca-9bf1-4150-8c34-6b43faf276c8",
189
+ "required": True,
190
+ },
191
+ ],
192
+ tags=["Pipelines"],
193
+ )
231
194
  @bp.response(GetPipelineResultResponseSchema)
232
195
  def get(self, execution_id: str):
233
- """ Retrieve results of a pipeline's execution based on execution_id
234
-
235
- NOTE: Cached results expire after a time window so are not available
236
- forever.
196
+ """Retrieve results of a pipeline's execution based on execution_id
237
197
 
238
- TODO: Need to add response marshalling/schema here.
198
+ NOTE: Cached results expire after a time window so are not available
199
+ forever.
239
200
  """
240
201
  try:
241
202
  pr = PipelineResult(execution_id)
242
203
  pr.load()
243
- retval = pr.to_dict()
244
- if pr.status == 'unavailable':
245
- retval['status_message'] = 'Results expired. Re-run pipeline.'
246
- return retval, 204
204
+ retval = {"execution_id": execution_id, "status": pr.status}
205
+ if pr.status == "unavailable":
206
+ retval["status_message"] = "Results expired. Re-run pipeline."
207
+ return retval, 200
247
208
 
248
- if pr.status == 'pending':
249
- retval['status_message'] = 'Results pending. Check again soon.'
209
+ if pr.status == "pending":
210
+ retval["status_message"] = "Results pending. Check again soon."
250
211
  return retval, 202
251
212
 
252
213
  else:
253
- retval['status_message'] = 'Results available.'
214
+ retval["status_message"] = "Results available."
215
+ retval["results"] = pr.get_results()
254
216
  return retval, 200
255
217
 
256
218
  except Exception as e:
257
- msg = "Failed to retrieve results for execution id: {}".format(
258
- execution_id)
219
+ msg = "Failed to retrieve results for execution id: {}".format(execution_id)
259
220
  logger.error(msg)
260
221
  logger.exception(f"{e}")
261
222
  abort(500, message=msg)
@@ -8,33 +8,31 @@ from marshmallow import Schema, fields
8
8
  from flask_smorest import Blueprint
9
9
  from flask import abort
10
10
  from marshmallow.exceptions import ValidationError
11
- from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS,\
12
- API_PATH_V1
13
- from pypeline.utils.config_utils import retrieve_latest_schedule_config, \
14
- update_schedule_config
11
+ from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS, API_PATH_V1
12
+ from pypeline.utils.config_utils import retrieve_latest_schedule_config
15
13
  from pypeline.schedule_config_schema import BaseScheduleSchema
16
14
  from pypeline.flask.decorators import require_accesskey
17
15
 
18
16
  logger = logging.getLogger(__name__)
19
17
 
20
- bp = Blueprint('schedules', __name__, url_prefix=API_PATH_V1 + '/schedules')
18
+ bp = Blueprint("schedules", __name__, url_prefix=API_PATH_V1 + "/schedules")
21
19
 
22
20
 
23
- @bp.route('/')
21
+ @bp.route("/")
24
22
  class Schedules(MethodView):
25
- """ Operations related to schedules
26
- """
23
+ """Operations related to schedules"""
24
+
27
25
  @require_accesskey
28
- @bp.doc(responses=API_DOC_RESPONSES,
29
- parameters=[API_DOC_PARAMS['accesskey']],
30
- tags=['Schedules'])
26
+ @bp.doc(
27
+ responses=API_DOC_RESPONSES,
28
+ parameters=[API_DOC_PARAMS["accesskey"]],
29
+ tags=["Schedules"],
30
+ )
31
31
  def get(self):
32
- """ Retrieve list of available schedule entries.
33
- """
34
- access_key = request.headers.get('accesskey')
32
+ """Retrieve list of available schedule entries."""
33
+ access_key = request.headers.get("accesskey")
35
34
  try:
36
- schedule_config = retrieve_latest_schedule_config(
37
- access_key=access_key)
35
+ schedule_config = retrieve_latest_schedule_config()
38
36
  except ValidationError:
39
37
  abort(400, message="Invalid schedule found ...")
40
38
 
@@ -42,26 +40,3 @@ class Schedules(MethodView):
42
40
  abort(404)
43
41
 
44
42
  return jsonify(schedule_config)
45
-
46
- @require_accesskey
47
- @bp.doc(responses=API_DOC_RESPONSES,
48
- parameters=[API_DOC_PARAMS['accesskey']],
49
- tags=['Schedules'])
50
- @bp.arguments(BaseScheduleSchema)
51
- def post(self, payload: dict):
52
- """ Update a deployment's schedules. Primarily used to update dynamic
53
- keys such as last run at and total run count. This does not allow
54
- overloading schedules, only updating select keys on known schedule
55
- entries (as in, this is not destructive).
56
- """
57
- access_key = request.headers.get('accesskey')
58
- try:
59
- success = update_schedule_config(new_schedule_config=payload,
60
- access_key=access_key)
61
- except ValidationError as e:
62
- abort(400, message=e)
63
-
64
- if not success:
65
- abort(500)
66
-
67
- return jsonify({'message': 'Schedule update successful ...'})
@@ -4,71 +4,36 @@ import os
4
4
  import logging
5
5
  from http import HTTPStatus
6
6
  from functools import wraps
7
- import requests
8
- from rhodb.redis_conf import RedisConnector
9
7
  from flask import request
10
8
  from flask import abort
11
- from pypeline.constants import DEFAULT_AUTH_URL, AUTH_LOCK_KEY, \
12
- AUTH_LOCK_DURATION, USING_SERMOS_CLOUD
13
9
 
14
10
  logger = logging.getLogger(__name__)
15
- redis_conn = RedisConnector().get_connection()
16
-
17
-
18
- def validate_access_key(access_key: str = None):
19
- """ Verify whether an Access Key is valid according to Sermos Cloud.
20
-
21
- If deploying in 'local' mode, no validation is done. To deploy in local
22
- mode, set DEFAULT_BASE_URL=local in your environment.
23
- """
24
- # Always 'valid' in local mode
25
- if not USING_SERMOS_CLOUD:
26
- return True
27
-
28
- # If get access key from either provided val or environment
29
- # if None provided.
30
- access_key = os.environ.get('SERMOS_ACCESS_KEY', access_key)
31
-
32
- # Invalid if None, no need to ask.
33
- if access_key is None:
34
- return False
35
-
36
- # Ask cache first
37
- validated = redis_conn.get(AUTH_LOCK_KEY)
38
- if validated is not None:
39
- return True
40
-
41
- # Ask Sermos Cloud (Note: Sermos Cloud's API expects `apikey`)
42
- headers = {'apikey': access_key}
43
- r = requests.post(DEFAULT_AUTH_URL, headers=headers, verify=True)
44
-
45
- if r.status_code == 200:
46
- redis_conn.setex(AUTH_LOCK_KEY, AUTH_LOCK_DURATION, '')
47
- return True
48
- return False
49
11
 
50
12
 
51
13
  def require_accesskey(fn):
52
- """ Convenience decorator to add to a web route (typically an API)
53
- when using Flask.
54
-
55
- Usage::
56
- from sermos import Blueprint, ApiServices
57
- bp = Blueprint('api_routes', __name__, url_prefix='/api')
58
-
59
- @bp.route('/my-api-route')
60
- class ApiClass(MethodView):
61
- @require_access_key
62
- def post(self, payload: dict):
63
- return {}
14
+ """Convenience decorator to add to a web route (typically an API)
15
+ when using Flask.
16
+
17
+ Usage::
18
+ from sermos import Blueprint, ApiServices
19
+ bp = Blueprint('api_routes', __name__, url_prefix='/api')
20
+
21
+ @bp.route('/my-api-route')
22
+ class ApiClass(MethodView):
23
+ @require_access_key
24
+ def post(self, payload: dict):
25
+ return {}
64
26
  """
27
+
65
28
  @wraps(fn)
66
29
  def decorated_view(*args, **kwargs):
67
- access_key = request.headers.get('accesskey')
30
+ access_key = request.headers.get("accesskey")
68
31
  if not access_key:
69
- access_key = request.args.get('accesskey')
32
+ access_key = request.args.get("accesskey")
33
+
34
+ configured_access_key = os.environ.get("API_ACCESS_KEY", None)
70
35
 
71
- if validate_access_key(access_key):
36
+ if access_key == configured_access_key:
72
37
  return fn(*args, **kwargs)
73
38
 
74
39
  abort(HTTPStatus.UNAUTHORIZED)