scalable-pypeline 2.1.31__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pypeline/__init__.py +1 -0
  2. pypeline/barrier.py +63 -0
  3. pypeline/constants.py +94 -0
  4. pypeline/dramatiq.py +455 -0
  5. pypeline/executable_job_config_schema.py +35 -0
  6. pypeline/extensions.py +17 -0
  7. pypeline/flask/__init__.py +16 -0
  8. pypeline/flask/api/__init__.py +0 -0
  9. pypeline/flask/api/pipelines.py +275 -0
  10. pypeline/flask/api/schedules.py +40 -0
  11. pypeline/flask/decorators.py +41 -0
  12. pypeline/flask/flask_pypeline.py +156 -0
  13. pypeline/job_runner.py +205 -0
  14. pypeline/pipeline_config_schema.py +352 -0
  15. pypeline/pipeline_settings_schema.py +561 -0
  16. pypeline/pipelines/__init__.py +0 -0
  17. pypeline/pipelines/composition/__init__.py +0 -0
  18. pypeline/pipelines/composition/parallel_pipeline_composition.py +375 -0
  19. pypeline/pipelines/composition/pypeline_composition.py +215 -0
  20. pypeline/pipelines/factory.py +86 -0
  21. pypeline/pipelines/middleware/__init__.py +0 -0
  22. pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
  23. pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
  24. pypeline/pipelines/middleware/parallel_pipeline_middleware.py +60 -0
  25. pypeline/pipelines/middleware/pypeline_middleware.py +202 -0
  26. pypeline/pypeline_yaml.py +468 -0
  27. pypeline/schedule_config_schema.py +125 -0
  28. pypeline/utils/__init__.py +0 -0
  29. pypeline/utils/config_utils.py +81 -0
  30. pypeline/utils/dramatiq_utils.py +134 -0
  31. pypeline/utils/executable_job_util.py +35 -0
  32. pypeline/utils/graceful_shutdown_util.py +39 -0
  33. pypeline/utils/module_utils.py +108 -0
  34. pypeline/utils/pipeline_utils.py +144 -0
  35. pypeline/utils/schema_utils.py +24 -0
  36. scalable_pypeline-2.1.31.dist-info/LICENSE +177 -0
  37. scalable_pypeline-2.1.31.dist-info/METADATA +212 -0
  38. scalable_pypeline-2.1.31.dist-info/RECORD +42 -0
  39. scalable_pypeline-2.1.31.dist-info/WHEEL +6 -0
  40. scalable_pypeline-2.1.31.dist-info/entry_points.txt +6 -0
  41. scalable_pypeline-2.1.31.dist-info/top_level.txt +2 -0
  42. tests/fixtures/__init__.py +0 -0
@@ -0,0 +1,275 @@
1
+ """ Pipeline APIs
2
+ """
3
+
4
+ import importlib.metadata
5
+ import logging
6
+ from http import HTTPStatus
7
+
8
+ from flask import jsonify
9
+ from flask.views import MethodView
10
+ from flask_smorest import Blueprint
11
+ from marshmallow import Schema, fields
12
+ from marshmallow.exceptions import ValidationError
13
+ from webargs.flaskparser import abort
14
+ from packaging import version
15
+ from pypeline.pipelines.composition.parallel_pipeline_composition import PipelineResult
16
+ from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS, API_PATH_V1
17
+ from pypeline.flask.decorators import require_accesskey
18
+ from pypeline.pipeline_config_schema import BasePipelineSchema, PipelineSchemaV1
19
+ from pypeline.pipeline_settings_schema import (
20
+ MissingSettingsException,
21
+ PipelineScenarioSchema,
22
+ )
23
+ from pypeline.pipelines.factory import dag_generator
24
+ from pypeline.utils.config_utils import retrieve_latest_pipeline_config
25
+ from pypeline.utils.schema_utils import get_clean_validation_messages
26
+
27
+ logger = logging.getLogger(__name__)
28
+ bp = Blueprint("pipelines", __name__, url_prefix=API_PATH_V1 + "/pipelines")
29
+
30
+
31
+ try:
32
+ flask_smorest_version = importlib.metadata.version("flask-smorest")
33
+ flask_smorest_version_parsed = version.parse(flask_smorest_version)
34
+ except importlib.metadata.PackageNotFoundError:
35
+ flask_smorest_version_parsed = None
36
+
37
+
38
+ def get_response_decorator(bp, status_code, *args, **kwargs):
39
+ if flask_smorest_version is None:
40
+ # Handle the case where flask-smorest is not installed
41
+ raise ImportError("flask-smorest is not installed.")
42
+ elif flask_smorest_version_parsed < version.parse("0.29"):
43
+ # Adjust arguments for older versions if needed
44
+ return bp.response(*args, **kwargs)
45
+ else:
46
+ # Adjust arguments for newer versions if needed
47
+ return bp.response(status_code, *args, **kwargs)
48
+
49
+
50
+ class InvokePipelineSchema(Schema):
51
+ """Incoming schema for invoking a pipeline"""
52
+
53
+ chain_payload = fields.Raw(
54
+ description="Payload contains whatever arguments the pipeline expects "
55
+ "to be passed to each node in the graph.",
56
+ example={"document_id": "123", "send_alert": True},
57
+ required=False,
58
+ )
59
+ scenarios = fields.List(
60
+ fields.Nested(PipelineScenarioSchema),
61
+ metadata={"description": "List of scenarios to run for a given pipeline"},
62
+ required=False,
63
+ )
64
+
65
+
66
+ class InvokePipelineResponseSchema(Schema):
67
+ execution_id = fields.String()
68
+ pipeline_id = fields.String()
69
+ status = fields.String()
70
+
71
+
72
+ class GetPipelineResultResponseSchema(Schema):
73
+ execution_id = fields.String()
74
+ result = fields.Raw()
75
+ result_ttl = fields.Integer()
76
+ results = fields.Raw()
77
+ status = fields.String()
78
+ status_message = fields.String()
79
+
80
+
81
+ @bp.route("/")
82
+ class Pipelines(MethodView):
83
+ """Operations against all pipelines."""
84
+
85
+ @require_accesskey
86
+ @bp.doc(
87
+ responses=API_DOC_RESPONSES,
88
+ parameters=[API_DOC_PARAMS["accesskey"]],
89
+ tags=["Pipelines"],
90
+ )
91
+ def get(self):
92
+ """Retrieve list of available pipelines."""
93
+ pipeline_config_api_resp = retrieve_latest_pipeline_config()
94
+
95
+ if pipeline_config_api_resp is None:
96
+ abort(404)
97
+
98
+ try:
99
+ pipelines = []
100
+ for p in pipeline_config_api_resp:
101
+ PipelineSchema = BasePipelineSchema.get_by_version(p["schemaVersion"])
102
+ pipeline_config = PipelineSchema().load(p)
103
+ pipelines.append(pipeline_config)
104
+ except ValidationError as e:
105
+ msg = f"Invalid pipeline configuration: {e}"
106
+ return jsonify({"message": msg}), 202
107
+
108
+ return jsonify(pipelines)
109
+
110
+
111
+ @bp.route("/<string:pipeline_id>")
112
+ class PipelineInfo(MethodView):
113
+ """Operations against a single pipeline"""
114
+
115
+ @require_accesskey
116
+ @bp.doc(
117
+ responses=API_DOC_RESPONSES,
118
+ parameters=[
119
+ API_DOC_PARAMS["accesskey"],
120
+ {
121
+ "in": "path",
122
+ "name": "pipeline_id",
123
+ "description": "pipeline_id for which to retrieve metrics.",
124
+ "type": "string",
125
+ "example": "my_pipeline",
126
+ "required": True,
127
+ },
128
+ ],
129
+ tags=["Pipelines"],
130
+ )
131
+ def get(self, pipeline_id: str):
132
+ """Retrieve details about a specific pipeline."""
133
+ pipeline_config_api_resp = retrieve_latest_pipeline_config(
134
+ pipeline_id=pipeline_id
135
+ )
136
+
137
+ if pipeline_config_api_resp is None:
138
+ abort(404)
139
+
140
+ try:
141
+ pipeline_config = PipelineSchemaV1().load(pipeline_config_api_resp)
142
+ except ValidationError as e:
143
+ msg = f"Invalid pipeline configuration: {e}"
144
+ return jsonify({"message": msg}), 202
145
+
146
+ return jsonify(pipeline_config)
147
+
148
+
149
+ @bp.route("/invoke/<string:pipeline_id>")
150
+ class PipelineInvoke(MethodView):
151
+ """Operations involed with pipeline invocation"""
152
+
153
+ @require_accesskey
154
+ @bp.doc(
155
+ responses=API_DOC_RESPONSES,
156
+ parameters=[
157
+ API_DOC_PARAMS["accesskey"],
158
+ {
159
+ "in": "path",
160
+ "name": "pipeline_id",
161
+ "description": "pipeline_id for which to retrieve metrics.",
162
+ "type": "string",
163
+ "example": "my_pipeline",
164
+ "required": True,
165
+ },
166
+ ],
167
+ tags=["Pipelines"],
168
+ )
169
+ @bp.arguments(InvokePipelineSchema)
170
+ @get_response_decorator(bp, "200", InvokePipelineResponseSchema)
171
+ def post(self, payload: dict, pipeline_id: str):
172
+ """Invoke a pipeline by it's ID; optionally provide pipeline arguments."""
173
+ pipeline_config = retrieve_latest_pipeline_config(pipeline_id=pipeline_id)
174
+
175
+ if pipeline_config is None:
176
+ return abort(404)
177
+
178
+ retval = {"pipeline_id": pipeline_id, "status": "starting"}
179
+ try:
180
+ chain_payload = payload.get("chain_payload", {})
181
+ scenarios = payload.get("scenarios", [])
182
+ if pipeline_config["schemaVersion"] == 1:
183
+ pipeline = dag_generator(
184
+ pipeline_id=pipeline_id,
185
+ event=chain_payload,
186
+ )
187
+ elif pipeline_config["schemaVersion"] == 2:
188
+ pipeline = dag_generator(
189
+ pipeline_id=pipeline_id,
190
+ scenarios=scenarios,
191
+ )
192
+ retval["scenarios"] = pipeline.scenarios
193
+ pipeline.run()
194
+ pipeline_result = PipelineResult(pipeline.execution_id)
195
+ pipeline_result.create_result_entry(pipeline.to_json())
196
+ retval["execution_id"] = pipeline.execution_id
197
+ except MissingSettingsException:
198
+ abort(
199
+ HTTPStatus.BAD_REQUEST,
200
+ message="Missing required settings in the request.",
201
+ )
202
+ except ValidationError as ve:
203
+ abort(
204
+ HTTPStatus.BAD_REQUEST,
205
+ message=get_clean_validation_messages(ve),
206
+ )
207
+ except Exception as e:
208
+ msg = "Failed to invoke pipeline ... {}".format(pipeline_id)
209
+ logger.error(msg)
210
+ logger.exception(f"{e}")
211
+ abort(500, message=msg)
212
+
213
+ return jsonify(retval)
214
+
215
+
216
+ results_responses = API_DOC_RESPONSES.copy()
217
+ results_responses[202] = {
218
+ "code": 202,
219
+ "description": "Pipeline is still running. Try again later.",
220
+ }
221
+ results_responses[204] = {
222
+ "code": 204,
223
+ "description": "The execution results have expired. Re-run pipeline.",
224
+ }
225
+
226
+
227
+ @bp.route("/results/<string:execution_id>")
228
+ class PipelineResults(MethodView):
229
+ """Operations with respect to pipeline results"""
230
+
231
+ @require_accesskey
232
+ @bp.doc(
233
+ responses=results_responses,
234
+ parameters=[
235
+ API_DOC_PARAMS["accesskey"],
236
+ {
237
+ "in": "path",
238
+ "name": "execution_id",
239
+ "description": "execution_id for which to retrieve results",
240
+ "type": "string",
241
+ "example": "4c595cca-9bf1-4150-8c34-6b43faf276c8",
242
+ "required": True,
243
+ },
244
+ ],
245
+ tags=["Pipelines"],
246
+ )
247
+ @get_response_decorator(bp, "200", GetPipelineResultResponseSchema)
248
+ def get(self, execution_id: str):
249
+ """Retrieve results of a pipeline's execution based on execution_id
250
+
251
+ NOTE: Cached results expire after a time window so are not available
252
+ forever.
253
+ """
254
+ try:
255
+ pr = PipelineResult(execution_id)
256
+ pr.load()
257
+ retval = {"execution_id": execution_id, "status": pr.status}
258
+ if pr.status == "unavailable":
259
+ retval["status_message"] = "Results expired. Re-run pipeline."
260
+ return retval, 200
261
+
262
+ if pr.status == "pending":
263
+ retval["status_message"] = "Results pending. Check again soon."
264
+ return retval, 202
265
+
266
+ else:
267
+ retval["status_message"] = "Results available."
268
+ retval["results"] = pr.get_results()
269
+ return retval, 200
270
+
271
+ except Exception as e:
272
+ msg = "Failed to retrieve results for execution id: {}".format(execution_id)
273
+ logger.error(msg)
274
+ logger.exception(f"{e}")
275
+ abort(500, message=msg)
@@ -0,0 +1,40 @@
1
+ """ API Endpoints for Scheduled Tasks
2
+ """
3
+
4
+ import logging
5
+ from flask import jsonify, request
6
+ from flask.views import MethodView
7
+ from flask_smorest import Blueprint
8
+ from flask import abort
9
+ from marshmallow.exceptions import ValidationError
10
+ from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS, API_PATH_V1
11
+ from pypeline.utils.config_utils import retrieve_latest_schedule_config
12
+ from pypeline.flask.decorators import require_accesskey
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+ bp = Blueprint("schedules", __name__, url_prefix=API_PATH_V1 + "/schedules")
17
+
18
+
19
+ @bp.route("/")
20
+ class Schedules(MethodView):
21
+ """Operations related to schedules"""
22
+
23
+ @require_accesskey
24
+ @bp.doc(
25
+ responses=API_DOC_RESPONSES,
26
+ parameters=[API_DOC_PARAMS["accesskey"]],
27
+ tags=["Schedules"],
28
+ )
29
+ def get(self):
30
+ """Retrieve list of available schedule entries."""
31
+ access_key = request.headers.get("accesskey")
32
+ try:
33
+ schedule_config = retrieve_latest_schedule_config()
34
+ except ValidationError:
35
+ abort(400, message="Invalid schedule found ...")
36
+
37
+ if schedule_config is None:
38
+ abort(404)
39
+
40
+ return jsonify(schedule_config)
@@ -0,0 +1,41 @@
1
+ """ Flask specific decorators (primarily for auth activities and app context)
2
+ """
3
+ import os
4
+ import logging
5
+ from http import HTTPStatus
6
+ from functools import wraps
7
+ from flask import request
8
+ from flask import abort
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def require_accesskey(fn):
14
+ """Convenience decorator to add to a web route (typically an API)
15
+ when using Flask.
16
+
17
+ Usage::
18
+ from sermos import Blueprint, ApiServices
19
+ bp = Blueprint('api_routes', __name__, url_prefix='/api')
20
+
21
+ @bp.route('/my-api-route')
22
+ class ApiClass(MethodView):
23
+ @require_access_key
24
+ def post(self, payload: dict):
25
+ return {}
26
+ """
27
+
28
+ @wraps(fn)
29
+ def decorated_view(*args, **kwargs):
30
+ access_key = request.headers.get("accesskey")
31
+ if not access_key:
32
+ access_key = request.args.get("accesskey")
33
+
34
+ configured_access_key = os.environ.get("API_ACCESS_KEY", None)
35
+
36
+ if access_key == configured_access_key:
37
+ return fn(*args, **kwargs)
38
+
39
+ abort(HTTPStatus.UNAUTHORIZED)
40
+
41
+ return decorated_view
@@ -0,0 +1,156 @@
1
+ """ Pypeline implementation as a Flask extension
2
+ """
3
+ import os
4
+
5
+ if os.getenv("USE_GEVENT", "false").lower() == "true":
6
+ import gevent.monkey
7
+
8
+ gevent.monkey.patch_all()
9
+
10
+ import logging
11
+ from flask import Flask
12
+ from werkzeug.middleware.proxy_fix import ProxyFix
13
+ from flask_smorest import Api, Blueprint
14
+ from pypeline.extensions import pypeline_config
15
+ from pypeline.constants import DEFAULT_OPENAPI_CONFIG
16
+ from pypeline import __version__
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class FlaskPypeline:
22
+ """Pypeline Flask extension."""
23
+
24
+ def __init__(self, app: Flask = None):
25
+ """Class init"""
26
+ self.app = app
27
+ self.pypeline_config = pypeline_config if pypeline_config is not None else {}
28
+
29
+ if app is not None:
30
+ self.init_app(app)
31
+
32
+ def init_app(self, app: Flask, init_api: bool = False):
33
+ """Pypeline bootstrapping process.
34
+
35
+ Application config variables to set include:
36
+
37
+ pypeline_client_version (default: v?.?.?)
38
+
39
+ Optional, if `init_api` is True:
40
+
41
+ API_DOCUMENTATION_TITLE
42
+ API_DOCUMENTATION_DESCRIPTION
43
+ OPENAPI_VERSION
44
+ OPENAPI_URL_PREFIX
45
+ OPENAPI_SWAGGER_APP_NAME
46
+ OPENAPI_SWAGGER_UI_PATH
47
+ OPENAPI_SWAGGER_BASE_TEMPLATE
48
+ OPENAPI_SWAGGER_URL
49
+ OPENAPI_SWAGGER_UI_URL
50
+ SWAGGER_UI_DOC_EXPANSION
51
+ EXPLAIN_TEMPLATE_LOADING
52
+
53
+ Args:
54
+ app (Flask): Flask Application to initialize.
55
+ init_api (bool): If `True`, Pypeline will initialize its
56
+ core APIs (including Pipelines, Scheduled Tasks, etc.) and
57
+ provide a pre-configured OpenAPI Spec/Swagger UI interface
58
+ available at the route defined in your application's config
59
+ under `OPENAPI_URL_PREFIX` (default `/api`). Refer to
60
+ [flask-smorest](https://flask-smorest.readthedocs.io/en/latest/openapi.html)
61
+ documentation for additional configuration options.
62
+ """
63
+ # Ensure there's a PYPELINE_CLIENT_VERSION on app config
64
+ app.config.setdefault(
65
+ "PYPELINE_CLIENT_VERSION",
66
+ app.config.get("PYPELINE_CLIENT_VERSION", "v?.?.?"),
67
+ )
68
+
69
+ app.wsgi_app = ProxyFix(app.wsgi_app)
70
+ app.url_map.strict_slashes = False
71
+
72
+ # Create and register the pypeline blueprint
73
+ bp = Blueprint(
74
+ "pypeline",
75
+ __name__,
76
+ template_folder="../templates",
77
+ static_folder="../static",
78
+ url_prefix="/pypeline",
79
+ )
80
+ app.register_blueprint(bp)
81
+
82
+ # Bootstrap api if app requests
83
+ if init_api is True:
84
+ self._bootstrap_api(app)
85
+
86
+ def _bootstrap_api(self, app: Flask):
87
+ """If initializing the API, we will create the core Pypeline API paths
88
+ and initialize the default Swagger documentation.
89
+ """
90
+ # Set sensible defaults for Swagger docs. Provided `app` will
91
+ # take precedent.
92
+ for swagger_config in DEFAULT_OPENAPI_CONFIG:
93
+ app.config.setdefault(
94
+ swagger_config[0], app.config.get(swagger_config[0], swagger_config[1])
95
+ )
96
+
97
+ # Attempt to override with values from client's pypeline.yaml if
98
+ # they are available. This will add new tags and new docs if
99
+ # defined and add to the core Pypeline API docs.
100
+ api_config = self.pypeline_config.get("apiConfig", {})
101
+ api_docs = api_config.get("apiDocumentation", {})
102
+
103
+ custom_tags = api_config.get("prefixDescriptions", [])
104
+
105
+ app.config["PYPELINE_CLIENT_VERSION"] = (
106
+ api_docs.get("version", None)
107
+ if api_docs.get("version", None) is not None
108
+ else app.config["PYPELINE_CLIENT_VERSION"]
109
+ )
110
+
111
+ app.config["API_DOCUMENTATION_TITLE"] = (
112
+ api_docs.get("title", None)
113
+ if api_docs.get("title", None) is not None
114
+ else app.config["API_DOCUMENTATION_TITLE"]
115
+ )
116
+
117
+ app.config["API_DOCUMENTATION_DESCRIPTION"] = (
118
+ api_docs.get("description", None)
119
+ if api_docs.get("description", None) is not None
120
+ else app.config["API_DOCUMENTATION_DESCRIPTION"]
121
+ )
122
+
123
+ tags = [
124
+ {"name": "Pipelines", "description": "Operations related to Pipelines"},
125
+ {"name": "Schedules", "description": "Operations related to Schedules"},
126
+ ] + custom_tags
127
+
128
+ # Set up the initializing spec kwargs for API
129
+ spec_kwargs = {
130
+ "title": app.config["API_DOCUMENTATION_TITLE"],
131
+ "version": f"Pypeline: {__version__} - "
132
+ f"Client: {app.config['PYPELINE_CLIENT_VERSION']}",
133
+ "description": app.config["API_DOCUMENTATION_DESCRIPTION"],
134
+ "tags": tags,
135
+ }
136
+ try:
137
+ api = Api()
138
+ api.init_app(app, spec_kwargs=spec_kwargs)
139
+
140
+ self._register_api_namespaces(api)
141
+
142
+ except Exception as e:
143
+ api = None
144
+ logging.exception(f"Unable to initialize API ... {e}")
145
+
146
+ app.extensions.setdefault("pypeline_core_api", api)
147
+
148
+ @staticmethod
149
+ def _register_api_namespaces(api: Api):
150
+ """Register Default API namespaces"""
151
+ from pypeline.flask.api.pipelines import bp as pipelinesbp
152
+
153
+ api.register_blueprint(pipelinesbp)
154
+ from pypeline.flask.api.schedules import bp as schedulesbp
155
+
156
+ api.register_blueprint(schedulesbp)