scalable-pypeline 2.0.9__py2.py3-none-any.whl → 2.1.0__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypeline/__init__.py +1 -1
- pypeline/barrier.py +3 -0
- pypeline/dramatiq.py +26 -154
- pypeline/flask/api/pipelines.py +60 -4
- pypeline/flask/api/schedules.py +1 -3
- pypeline/pipeline_config_schema.py +91 -3
- pypeline/pipeline_settings_schema.py +334 -0
- pypeline/pipelines/__init__.py +0 -0
- pypeline/pipelines/composition/__init__.py +0 -0
- pypeline/pipelines/composition/pypeline_composition.py +188 -0
- pypeline/pipelines/factory.py +107 -0
- pypeline/pipelines/middleware/__init__.py +0 -0
- pypeline/pipelines/middleware/pypeline_middleware.py +188 -0
- pypeline/utils/dramatiq_utils.py +126 -0
- pypeline/utils/module_utils.py +27 -2
- pypeline/utils/pipeline_utils.py +22 -37
- pypeline/utils/schema_utils.py +24 -0
- {scalable_pypeline-2.0.9.dist-info → scalable_pypeline-2.1.0.dist-info}/METADATA +2 -2
- scalable_pypeline-2.1.0.dist-info/RECORD +36 -0
- scalable_pypeline-2.0.9.dist-info/RECORD +0 -27
- /pypeline/{composition.py → pipelines/composition/parallel_pipeline_composition.py} +0 -0
- /pypeline/{middleware.py → pipelines/middleware/parallel_pipeline_middleware.py} +0 -0
- {scalable_pypeline-2.0.9.dist-info → scalable_pypeline-2.1.0.dist-info}/LICENSE +0 -0
- {scalable_pypeline-2.0.9.dist-info → scalable_pypeline-2.1.0.dist-info}/WHEEL +0 -0
- {scalable_pypeline-2.0.9.dist-info → scalable_pypeline-2.1.0.dist-info}/entry_points.txt +0 -0
- {scalable_pypeline-2.0.9.dist-info → scalable_pypeline-2.1.0.dist-info}/top_level.txt +0 -0
pypeline/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.0
|
1
|
+
__version__ = "2.1.0"
|
pypeline/barrier.py
CHANGED
@@ -29,6 +29,9 @@ class LockingParallelBarrier:
|
|
29
29
|
"""Decrement the task counter in Redis."""
|
30
30
|
return self.redis.decr(self.task_key)
|
31
31
|
|
32
|
+
def task_exists(self):
|
33
|
+
return self.redis.exists(self.task_key)
|
34
|
+
|
32
35
|
def get_task_count(self):
|
33
36
|
"""Get the current value of the task counter."""
|
34
37
|
return int(self.redis.get(self.task_key) or 0)
|
pypeline/dramatiq.py
CHANGED
@@ -1,6 +1,3 @@
|
|
1
|
-
import importlib
|
2
|
-
import os.path
|
3
|
-
import sys
|
4
1
|
import typing
|
5
2
|
import pika
|
6
3
|
import logging
|
@@ -10,11 +7,9 @@ from urllib.parse import urlparse
|
|
10
7
|
|
11
8
|
from pypeline.extensions import pypeline_config
|
12
9
|
from warnings import warn
|
13
|
-
from functools import wraps
|
14
10
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
15
11
|
from apscheduler.triggers.cron import CronTrigger
|
16
|
-
from
|
17
|
-
from dramatiq import Broker, Middleware, actor as register_actor, set_broker, get_broker
|
12
|
+
from dramatiq import Broker, Middleware, set_broker, get_broker
|
18
13
|
from dramatiq.brokers.rabbitmq import RabbitmqBroker
|
19
14
|
from dramatiq.cli import (
|
20
15
|
CPUS,
|
@@ -33,29 +28,24 @@ from pypeline.constants import (
|
|
33
28
|
REDIS_URL,
|
34
29
|
RABBIT_URL,
|
35
30
|
DEFAULT_BROKER_CALLABLE,
|
36
|
-
MS_IN_SECONDS,
|
37
|
-
DEFAULT_TASK_TTL,
|
38
|
-
DEFAULT_RESULT_TTL,
|
39
|
-
DEFAULT_TASK_MAX_RETRY,
|
40
|
-
DEFAULT_TASK_MIN_BACKOFF,
|
41
|
-
DEFAULT_TASK_MAX_BACKOFF,
|
42
31
|
DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
43
32
|
DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
44
33
|
DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
45
34
|
)
|
46
|
-
from pypeline.middleware import ParallelPipeline
|
35
|
+
from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
|
36
|
+
from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
|
47
37
|
from pypeline.utils.config_utils import (
|
48
38
|
retrieve_latest_schedule_config,
|
49
39
|
get_service_config_for_worker,
|
50
40
|
)
|
41
|
+
from pypeline.utils.dramatiq_utils import (
|
42
|
+
guess_code_directory,
|
43
|
+
list_managed_actors,
|
44
|
+
register_lazy_actor,
|
45
|
+
LazyActor,
|
46
|
+
)
|
47
|
+
from pypeline.utils.module_utils import get_callable
|
51
48
|
|
52
|
-
if TYPE_CHECKING:
|
53
|
-
from typing_extensions import ParamSpec
|
54
|
-
|
55
|
-
P = ParamSpec("P")
|
56
|
-
else:
|
57
|
-
P = TypeVar("P")
|
58
|
-
R = TypeVar("R")
|
59
49
|
|
60
50
|
logging.basicConfig(level=logging.INFO)
|
61
51
|
logger = logging.getLogger(__name__)
|
@@ -79,71 +69,12 @@ def configure_default_broker(broker: Broker = None):
|
|
79
69
|
)
|
80
70
|
rabbit_broker.add_middleware(Results(backend=redis_backend))
|
81
71
|
rabbit_broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
|
72
|
+
rabbit_broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
|
82
73
|
rabbit_broker.add_middleware(CurrentMessage())
|
83
74
|
register_actors_for_workers(rabbit_broker)
|
84
75
|
set_broker(rabbit_broker)
|
85
76
|
|
86
77
|
|
87
|
-
def guess_code_directory(broker):
|
88
|
-
actor = next(iter(broker.actors.values()))
|
89
|
-
modname, *_ = actor.fn.__module__.partition(".")
|
90
|
-
mod = sys.modules[modname]
|
91
|
-
return os.path.dirname(mod.__file__)
|
92
|
-
|
93
|
-
|
94
|
-
def get_module(resource_dot_path: str):
|
95
|
-
"""Retrieve the module based on a 'resource dot path'.
|
96
|
-
e.g. package.subdir.feature_file.MyCallable
|
97
|
-
"""
|
98
|
-
module_path = ".".join(resource_dot_path.split(".")[:-1])
|
99
|
-
module = importlib.import_module(module_path)
|
100
|
-
return module
|
101
|
-
|
102
|
-
|
103
|
-
def get_callable_name(resource_dot_path: str) -> str:
|
104
|
-
"""Retrieve the callable based on config string.
|
105
|
-
e.g. package.subdir.feature_file.MyCallable
|
106
|
-
"""
|
107
|
-
callable_name = resource_dot_path.split(".")[-1]
|
108
|
-
return callable_name
|
109
|
-
|
110
|
-
|
111
|
-
def get_callable(resource_dot_path: str) -> Callable:
|
112
|
-
"""Retrieve the actual handler class based on config string.
|
113
|
-
e.g. package.subdir.feature_file.MyCallable
|
114
|
-
"""
|
115
|
-
module = get_module(resource_dot_path)
|
116
|
-
callable_name = get_callable_name(resource_dot_path)
|
117
|
-
return getattr(module, callable_name)
|
118
|
-
|
119
|
-
|
120
|
-
def register_lazy_actor(
|
121
|
-
broker: Broker,
|
122
|
-
fn: Optional[Callable[P, Union[Awaitable[R], R]]] = None,
|
123
|
-
pipeline_meta: typing.Dict = {},
|
124
|
-
**kwargs,
|
125
|
-
) -> typing.Type["LazyActor"]:
|
126
|
-
kwargs["queue_name"] = pipeline_meta.get("queue", "default")
|
127
|
-
kwargs["max_retries"] = pipeline_meta.get("maxRetry", DEFAULT_TASK_MAX_RETRY)
|
128
|
-
# Convert from seconds to milliseconds
|
129
|
-
kwargs["min_backoff"] = (
|
130
|
-
pipeline_meta.get("retryBackoff", DEFAULT_TASK_MIN_BACKOFF) * MS_IN_SECONDS
|
131
|
-
)
|
132
|
-
kwargs["max_backoff"] = (
|
133
|
-
pipeline_meta.get("retryBackoffMax", DEFAULT_TASK_MAX_BACKOFF) * MS_IN_SECONDS
|
134
|
-
)
|
135
|
-
kwargs["time_limit"] = pipeline_meta.get("maxTtl", DEFAULT_TASK_TTL) * MS_IN_SECONDS
|
136
|
-
# Always store results for registered pipeline actors
|
137
|
-
kwargs["store_results"] = pipeline_meta.get("store_results", False)
|
138
|
-
if kwargs["store_results"]:
|
139
|
-
kwargs["result_ttl"] = (
|
140
|
-
pipeline_meta.get("result_ttl", DEFAULT_RESULT_TTL) * MS_IN_SECONDS
|
141
|
-
)
|
142
|
-
lazy_actor: LazyActor = LazyActor(fn, kwargs)
|
143
|
-
lazy_actor.register(broker)
|
144
|
-
return lazy_actor
|
145
|
-
|
146
|
-
|
147
78
|
def register_actors_for_workers(broker: Broker):
|
148
79
|
service = get_service_config_for_worker(pypeline_config)
|
149
80
|
scheduled_jobs_config = retrieve_latest_schedule_config()
|
@@ -154,9 +85,18 @@ def register_actors_for_workers(broker: Broker):
|
|
154
85
|
pipeline_meta = None
|
155
86
|
for pipeline_key, pipeline in pypeline_config["pipelines"].items():
|
156
87
|
pipeline_config = pipeline["config"]
|
157
|
-
|
158
|
-
|
159
|
-
|
88
|
+
if pipeline["schemaVersion"] == 1:
|
89
|
+
pipeline_tasks = [
|
90
|
+
t["handler"] for t in pipeline_config["taskDefinitions"].values()
|
91
|
+
]
|
92
|
+
elif pipeline["schemaVersion"] == 2:
|
93
|
+
pipeline_tasks = [
|
94
|
+
handler
|
95
|
+
for key in pipeline_config["taskDefinitions"]
|
96
|
+
for handler in pipeline_config["taskDefinitions"][key].get(
|
97
|
+
"handlers", []
|
98
|
+
)
|
99
|
+
]
|
160
100
|
if task["handler"] in pipeline_tasks:
|
161
101
|
pipeline_meta = pipeline_config["metadata"]
|
162
102
|
break
|
@@ -292,77 +232,6 @@ class Dramatiq:
|
|
292
232
|
return decorator
|
293
233
|
|
294
234
|
|
295
|
-
def format_actor(actor):
|
296
|
-
return "%s@%s" % (actor.actor_name, actor.queue_name)
|
297
|
-
|
298
|
-
|
299
|
-
def ensure_return_value(default_value=None):
|
300
|
-
def decorator(func):
|
301
|
-
@wraps(func)
|
302
|
-
def wrapper(*args, **kwargs):
|
303
|
-
# Call the original function
|
304
|
-
result = func(*args, **kwargs)
|
305
|
-
# Check if the function has returned a value
|
306
|
-
if result is None:
|
307
|
-
# Return the default value if the function returned None
|
308
|
-
return default_value
|
309
|
-
return result
|
310
|
-
|
311
|
-
return wrapper
|
312
|
-
|
313
|
-
return decorator
|
314
|
-
|
315
|
-
|
316
|
-
class LazyActor(object):
|
317
|
-
# Intermediate object that register actor on broker an call.
|
318
|
-
|
319
|
-
def __init__(self, fn, kw):
|
320
|
-
self.fn = fn
|
321
|
-
self.kw = kw
|
322
|
-
self.actor = None
|
323
|
-
|
324
|
-
def __call__(self, *a, **kw):
|
325
|
-
return self.fn(*a, **kw)
|
326
|
-
|
327
|
-
def __repr__(self):
|
328
|
-
return "<%s %s.%s>" % (
|
329
|
-
self.__class__.__name__,
|
330
|
-
self.fn.__module__,
|
331
|
-
self.fn.__name__,
|
332
|
-
)
|
333
|
-
|
334
|
-
def __getattr__(self, name):
|
335
|
-
if not self.actor:
|
336
|
-
raise AttributeError(name)
|
337
|
-
return getattr(self.actor, name)
|
338
|
-
|
339
|
-
def register(self, broker):
|
340
|
-
self.actor = register_actor(
|
341
|
-
actor_name=f"{self.fn.__module__}.{self.fn.__name__}",
|
342
|
-
broker=broker,
|
343
|
-
**self.kw,
|
344
|
-
)(ensure_return_value(default_value=True)(self.fn))
|
345
|
-
|
346
|
-
# Next is regular actor API.
|
347
|
-
def send(self, *a, **kw):
|
348
|
-
return self.actor.send(*a, **kw)
|
349
|
-
|
350
|
-
def message(self, *a, **kw):
|
351
|
-
return self.actor.message(*a, **kw)
|
352
|
-
|
353
|
-
def send_with_options(self, *a, **kw):
|
354
|
-
return self.actor.send_with_options(*a, **kw)
|
355
|
-
|
356
|
-
|
357
|
-
def list_managed_actors(broker, queues):
|
358
|
-
queues = set(queues)
|
359
|
-
all_actors = broker.actors.values()
|
360
|
-
if not queues:
|
361
|
-
return all_actors
|
362
|
-
else:
|
363
|
-
return [a for a in all_actors if a.queue_name in queues]
|
364
|
-
|
365
|
-
|
366
235
|
@click.command("cron-scheduler")
|
367
236
|
def cron_scheduler(): # pragma: no cover
|
368
237
|
# Configure our broker that we will schedule registered tasks for
|
@@ -464,6 +333,9 @@ def pypeline_worker(
|
|
464
333
|
# Wraps dramatiq worker CLI in a Flask command. This is private API of
|
465
334
|
# dramatiq.
|
466
335
|
|
336
|
+
def format_actor(actor):
|
337
|
+
return "%s@%s" % (actor.actor_name, actor.queue_name)
|
338
|
+
|
467
339
|
parser = dramatiq_argument_parser()
|
468
340
|
|
469
341
|
# Set worker broker globally.
|
pypeline/flask/api/pipelines.py
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
|
4
4
|
import importlib.metadata
|
5
5
|
import logging
|
6
|
+
from http import HTTPStatus
|
6
7
|
|
7
8
|
from flask import jsonify
|
8
9
|
from flask.views import MethodView
|
@@ -11,12 +12,17 @@ from marshmallow import Schema, fields
|
|
11
12
|
from marshmallow.exceptions import ValidationError
|
12
13
|
from webargs.flaskparser import abort
|
13
14
|
from packaging import version
|
14
|
-
from pypeline.composition import PipelineResult
|
15
|
+
from pypeline.pipelines.composition.parallel_pipeline_composition import PipelineResult
|
15
16
|
from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS, API_PATH_V1
|
16
17
|
from pypeline.flask.decorators import require_accesskey
|
17
18
|
from pypeline.pipeline_config_schema import BasePipelineSchema, PipelineSchemaV1
|
19
|
+
from pypeline.pipeline_settings_schema import (
|
20
|
+
MissingSettingsException,
|
21
|
+
PipelineScenarioSchema,
|
22
|
+
)
|
23
|
+
from pypeline.pipelines.factory import dag_generator
|
18
24
|
from pypeline.utils.config_utils import retrieve_latest_pipeline_config
|
19
|
-
from pypeline.utils.
|
25
|
+
from pypeline.utils.schema_utils import get_clean_validation_messages
|
20
26
|
|
21
27
|
logger = logging.getLogger(__name__)
|
22
28
|
bp = Blueprint("pipelines", __name__, url_prefix=API_PATH_V1 + "/pipelines")
|
@@ -50,6 +56,31 @@ class InvokePipelineSchema(Schema):
|
|
50
56
|
example={"document_id": "123", "send_alert": True},
|
51
57
|
required=False,
|
52
58
|
)
|
59
|
+
settings = fields.Raw(
|
60
|
+
description="Payload contains settings for a given pipeline",
|
61
|
+
example={
|
62
|
+
"param1": "Dataset",
|
63
|
+
"param2": 1,
|
64
|
+
"param3": 2,
|
65
|
+
},
|
66
|
+
required=False,
|
67
|
+
)
|
68
|
+
|
69
|
+
task_replacements = fields.Raw(
|
70
|
+
description="A dictionary of task definitions as the key and the value of the index for which handler"
|
71
|
+
" should be executed. If none provided it will default to the first handler in the list at index position 0.",
|
72
|
+
example={
|
73
|
+
"a": 1,
|
74
|
+
"b": 3,
|
75
|
+
},
|
76
|
+
required=False,
|
77
|
+
)
|
78
|
+
|
79
|
+
scenarios = fields.List(
|
80
|
+
fields.Nested(PipelineScenarioSchema),
|
81
|
+
metadata={"description": "List of scenarios to run for a given pipeline"},
|
82
|
+
required=False,
|
83
|
+
)
|
53
84
|
|
54
85
|
|
55
86
|
class InvokePipelineResponseSchema(Schema):
|
@@ -166,12 +197,37 @@ class PipelineInvoke(MethodView):
|
|
166
197
|
|
167
198
|
retval = {"pipeline_id": pipeline_id, "status": "starting"}
|
168
199
|
try:
|
169
|
-
|
170
|
-
|
200
|
+
chain_payload = payload.get("chain_payload", {})
|
201
|
+
settings = payload.get("settings", None)
|
202
|
+
task_replacements = payload.get("task_replacements", {})
|
203
|
+
scenarios = payload.get("scenarios", [])
|
204
|
+
if pipeline_config["schemaVersion"] == 1:
|
205
|
+
pipeline = dag_generator(
|
206
|
+
pipeline_id=pipeline_id,
|
207
|
+
event=chain_payload,
|
208
|
+
)
|
209
|
+
elif pipeline_config["schemaVersion"] == 2 and task_replacements:
|
210
|
+
pipeline = dag_generator(
|
211
|
+
pipeline_id=pipeline_id,
|
212
|
+
task_replacements=task_replacements,
|
213
|
+
scenarios=scenarios,
|
214
|
+
settings=settings,
|
215
|
+
)
|
216
|
+
retval["scenarios"] = pipeline.scenarios
|
171
217
|
pipeline.run()
|
172
218
|
pipeline_result = PipelineResult(pipeline.execution_id)
|
173
219
|
pipeline_result.create_result_entry(pipeline.to_json())
|
174
220
|
retval["execution_id"] = pipeline.execution_id
|
221
|
+
except MissingSettingsException:
|
222
|
+
abort(
|
223
|
+
HTTPStatus.BAD_REQUEST,
|
224
|
+
message="Missing required settings in the request.",
|
225
|
+
)
|
226
|
+
except ValidationError as ve:
|
227
|
+
abort(
|
228
|
+
HTTPStatus.BAD_REQUEST,
|
229
|
+
message=get_clean_validation_messages(ve),
|
230
|
+
)
|
175
231
|
except Exception as e:
|
176
232
|
msg = "Failed to invoke pipeline ... {}".format(pipeline_id)
|
177
233
|
logger.error(msg)
|
pypeline/flask/api/schedules.py
CHANGED
@@ -1,16 +1,14 @@
|
|
1
1
|
""" API Endpoints for Scheduled Tasks
|
2
2
|
"""
|
3
|
-
|
3
|
+
|
4
4
|
import logging
|
5
5
|
from flask import jsonify, request
|
6
6
|
from flask.views import MethodView
|
7
|
-
from marshmallow import Schema, fields
|
8
7
|
from flask_smorest import Blueprint
|
9
8
|
from flask import abort
|
10
9
|
from marshmallow.exceptions import ValidationError
|
11
10
|
from pypeline.constants import API_DOC_RESPONSES, API_DOC_PARAMS, API_PATH_V1
|
12
11
|
from pypeline.utils.config_utils import retrieve_latest_schedule_config
|
13
|
-
from pypeline.schedule_config_schema import BaseScheduleSchema
|
14
12
|
from pypeline.flask.decorators import require_accesskey
|
15
13
|
|
16
14
|
logger = logging.getLogger(__name__)
|
@@ -1,9 +1,12 @@
|
|
1
1
|
""" Schemas for Pipelines
|
2
2
|
"""
|
3
|
+
|
3
4
|
import yaml
|
4
5
|
from marshmallow import Schema, fields, EXCLUDE, validates_schema
|
5
6
|
from marshmallow.exceptions import ValidationError
|
6
7
|
|
8
|
+
from pypeline.pipeline_settings_schema import PipelineSettingsSchema
|
9
|
+
|
7
10
|
|
8
11
|
class ExcludeUnknownSchema(Schema):
|
9
12
|
"""Remove unknown keys from loaded dictionary"""
|
@@ -81,8 +84,17 @@ class MetadataSchema(Schema):
|
|
81
84
|
example=600,
|
82
85
|
)
|
83
86
|
|
87
|
+
groupName = fields.String(
|
88
|
+
required=False,
|
89
|
+
metadata={
|
90
|
+
"description": "If two pipelines logically belong to a group the user can identify that two. "
|
91
|
+
"Imagine pipeline_a and pipeline_b both process data for images. "
|
92
|
+
'Logically we could give them a mutual group name of "Image Processing Pipelines"'
|
93
|
+
},
|
94
|
+
)
|
95
|
+
|
84
96
|
|
85
|
-
class
|
97
|
+
class TaskDefinitionsSchemaV1(ExcludeUnknownSchema):
|
86
98
|
"""Schema for a single task's configuration"""
|
87
99
|
|
88
100
|
handler = fields.String(
|
@@ -105,7 +117,31 @@ class TaskDefinitionsSchema(ExcludeUnknownSchema):
|
|
105
117
|
)
|
106
118
|
|
107
119
|
|
108
|
-
class
|
120
|
+
class TaskDefinitionsSchemaV2(ExcludeUnknownSchema):
|
121
|
+
"""Schema for a single task's configuration"""
|
122
|
+
|
123
|
+
handlers = fields.List(
|
124
|
+
fields.String(
|
125
|
+
required=True,
|
126
|
+
description="Path to the worker task definition",
|
127
|
+
example="client.workers.my_task",
|
128
|
+
)
|
129
|
+
)
|
130
|
+
maxTtl = fields.Integer(
|
131
|
+
required=False,
|
132
|
+
description="Max TTL for a task in seconds.",
|
133
|
+
default=60,
|
134
|
+
example=60,
|
135
|
+
)
|
136
|
+
|
137
|
+
queue = fields.String(
|
138
|
+
required=False,
|
139
|
+
description="Non-default queue for this task.",
|
140
|
+
example="custom-queue-name",
|
141
|
+
)
|
142
|
+
|
143
|
+
|
144
|
+
class PipelineConfigSchemaBase(Schema):
|
109
145
|
"""Overall pipeline configuration schema"""
|
110
146
|
|
111
147
|
metadata = fields.Nested(
|
@@ -128,6 +164,31 @@ class PipelineConfigSchemaV1(Schema):
|
|
128
164
|
required=True,
|
129
165
|
description="The DAG Adjacency definition.",
|
130
166
|
)
|
167
|
+
|
168
|
+
|
169
|
+
class PipelineConfigSchemaV1(PipelineConfigSchemaBase):
|
170
|
+
"""Overall pipeline configuration schema"""
|
171
|
+
|
172
|
+
taskDefinitions = fields.Dict(
|
173
|
+
keys=fields.String(
|
174
|
+
required=True,
|
175
|
+
description="Task's node name. *Must* match related key in dagAdjacency.",
|
176
|
+
example="node_a",
|
177
|
+
),
|
178
|
+
values=fields.Nested(
|
179
|
+
TaskDefinitionsSchemaV1,
|
180
|
+
required=True,
|
181
|
+
description="Definition of each task in the pipeline.",
|
182
|
+
example={"handler": "abc.task", "maxRetry": 1},
|
183
|
+
),
|
184
|
+
required=True,
|
185
|
+
description="Configuration for each node defined in DAG.",
|
186
|
+
)
|
187
|
+
|
188
|
+
|
189
|
+
class PipelineConfigSchemaV2(PipelineConfigSchemaBase):
|
190
|
+
"""Overall pipeline configuration schema"""
|
191
|
+
|
131
192
|
taskDefinitions = fields.Dict(
|
132
193
|
keys=fields.String(
|
133
194
|
required=True,
|
@@ -135,7 +196,7 @@ class PipelineConfigSchemaV1(Schema):
|
|
135
196
|
example="node_a",
|
136
197
|
),
|
137
198
|
values=fields.Nested(
|
138
|
-
|
199
|
+
TaskDefinitionsSchemaV2,
|
139
200
|
required=True,
|
140
201
|
description="Definition of each task in the pipeline.",
|
141
202
|
example={"handler": "abc.task", "maxRetry": 1},
|
@@ -144,6 +205,14 @@ class PipelineConfigSchemaV1(Schema):
|
|
144
205
|
description="Configuration for each node defined in DAG.",
|
145
206
|
)
|
146
207
|
|
208
|
+
settings = fields.Nested(
|
209
|
+
PipelineSettingsSchema,
|
210
|
+
required=False,
|
211
|
+
metadata={
|
212
|
+
"description": "Settings schema to validate the actual settings being passed through to the pipelines."
|
213
|
+
},
|
214
|
+
)
|
215
|
+
|
147
216
|
|
148
217
|
class BasePipelineSchema(ExcludeUnknownSchema):
|
149
218
|
__schema_version__ = None
|
@@ -185,6 +254,25 @@ class BasePipelineSchema(ExcludeUnknownSchema):
|
|
185
254
|
schema.load(data)
|
186
255
|
|
187
256
|
|
257
|
+
class PipelineSchemaV2(BasePipelineSchema):
|
258
|
+
__schema_version__ = 2
|
259
|
+
|
260
|
+
class Meta:
|
261
|
+
unknown = EXCLUDE
|
262
|
+
|
263
|
+
config = fields.Nested(
|
264
|
+
PipelineConfigSchemaV2,
|
265
|
+
required=True,
|
266
|
+
description="Metadata and configuration information for this pipeline.",
|
267
|
+
)
|
268
|
+
|
269
|
+
def validate_pipeline(self, data, **kwargs):
|
270
|
+
# We need to add this function to avoid infinite recursion since
|
271
|
+
# the BasePipelineSchema class above uses the same method for
|
272
|
+
# validation
|
273
|
+
pass
|
274
|
+
|
275
|
+
|
188
276
|
class PipelineSchemaV1(BasePipelineSchema):
|
189
277
|
__schema_version__ = 1
|
190
278
|
|