scalable-pypeline 2.1.31__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypeline/__init__.py +1 -0
- pypeline/barrier.py +63 -0
- pypeline/constants.py +94 -0
- pypeline/dramatiq.py +455 -0
- pypeline/executable_job_config_schema.py +35 -0
- pypeline/extensions.py +17 -0
- pypeline/flask/__init__.py +16 -0
- pypeline/flask/api/__init__.py +0 -0
- pypeline/flask/api/pipelines.py +275 -0
- pypeline/flask/api/schedules.py +40 -0
- pypeline/flask/decorators.py +41 -0
- pypeline/flask/flask_pypeline.py +156 -0
- pypeline/job_runner.py +205 -0
- pypeline/pipeline_config_schema.py +352 -0
- pypeline/pipeline_settings_schema.py +561 -0
- pypeline/pipelines/__init__.py +0 -0
- pypeline/pipelines/composition/__init__.py +0 -0
- pypeline/pipelines/composition/parallel_pipeline_composition.py +375 -0
- pypeline/pipelines/composition/pypeline_composition.py +215 -0
- pypeline/pipelines/factory.py +86 -0
- pypeline/pipelines/middleware/__init__.py +0 -0
- pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
- pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
- pypeline/pipelines/middleware/parallel_pipeline_middleware.py +60 -0
- pypeline/pipelines/middleware/pypeline_middleware.py +202 -0
- pypeline/pypeline_yaml.py +468 -0
- pypeline/schedule_config_schema.py +125 -0
- pypeline/utils/__init__.py +0 -0
- pypeline/utils/config_utils.py +81 -0
- pypeline/utils/dramatiq_utils.py +134 -0
- pypeline/utils/executable_job_util.py +35 -0
- pypeline/utils/graceful_shutdown_util.py +39 -0
- pypeline/utils/module_utils.py +108 -0
- pypeline/utils/pipeline_utils.py +144 -0
- pypeline/utils/schema_utils.py +24 -0
- scalable_pypeline-2.1.31.dist-info/LICENSE +177 -0
- scalable_pypeline-2.1.31.dist-info/METADATA +212 -0
- scalable_pypeline-2.1.31.dist-info/RECORD +42 -0
- scalable_pypeline-2.1.31.dist-info/WHEEL +6 -0
- scalable_pypeline-2.1.31.dist-info/entry_points.txt +6 -0
- scalable_pypeline-2.1.31.dist-info/top_level.txt +2 -0
- tests/fixtures/__init__.py +0 -0
|
@@ -0,0 +1,375 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import copy
|
|
4
|
+
import json
|
|
5
|
+
import time
|
|
6
|
+
import typing
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
from dramatiq.broker import get_broker
|
|
11
|
+
from dramatiq.results import ResultMissing
|
|
12
|
+
from db_medley.redis_conf import RedisConnector
|
|
13
|
+
from redis.exceptions import RedisError
|
|
14
|
+
from redis.sentinel import Sentinel
|
|
15
|
+
from pypeline.constants import (
|
|
16
|
+
REDIS_URL,
|
|
17
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
18
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
19
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
20
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
21
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
22
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
23
|
+
)
|
|
24
|
+
from pypeline.barrier import LockingParallelBarrier
|
|
25
|
+
from pypeline.constants import DEFAULT_RESULT_TTL
|
|
26
|
+
from pypeline.dramatiq import REDIS_URL
|
|
27
|
+
|
|
28
|
+
from dramatiq.message import Message
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class parallel_pipeline:
|
|
32
|
+
"""Chain actors together, passing the result of one actor to the
|
|
33
|
+
next one in line.
|
|
34
|
+
|
|
35
|
+
Parameters:
|
|
36
|
+
children(typing.List[typing.List[Message]]): A sequence of messages or
|
|
37
|
+
pipelines. Child pipelines are flattened into the resulting
|
|
38
|
+
pipeline.
|
|
39
|
+
broker(Broker): The broker to run the pipeline on. Defaults to
|
|
40
|
+
the current global broker.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
messages: list[Message]
|
|
44
|
+
|
|
45
|
+
def __init__(self, messages: typing.List[typing.List[Message]], broker=None):
|
|
46
|
+
self.broker = broker or get_broker()
|
|
47
|
+
self.messages = messages
|
|
48
|
+
self.execution_id = str(uuid4())
|
|
49
|
+
execution_graph = []
|
|
50
|
+
|
|
51
|
+
for message_group in self.messages:
|
|
52
|
+
sub_execution_group = []
|
|
53
|
+
group_completion_uuid = str(uuid4())
|
|
54
|
+
for m in message_group:
|
|
55
|
+
m.kwargs["event"]["execution_id"] = self.execution_id
|
|
56
|
+
m.options["group_completion_uuid"] = group_completion_uuid
|
|
57
|
+
message_dict = copy.deepcopy(m.asdict())
|
|
58
|
+
sub_execution_group.append(message_dict)
|
|
59
|
+
# Last item in the group is the id of the group to be executed
|
|
60
|
+
execution_graph.append(sub_execution_group)
|
|
61
|
+
|
|
62
|
+
self.execution_graph = execution_graph
|
|
63
|
+
|
|
64
|
+
for m in self.messages[0]:
|
|
65
|
+
m.options["execution_graph"] = execution_graph
|
|
66
|
+
|
|
67
|
+
def __len__(self):
|
|
68
|
+
"""Returns the length of the parallel_pipeline."""
|
|
69
|
+
count = 0
|
|
70
|
+
for message_group in self.messages:
|
|
71
|
+
count = count + len(message_group)
|
|
72
|
+
|
|
73
|
+
return count
|
|
74
|
+
|
|
75
|
+
def __str__(self): # pragma: no cover
|
|
76
|
+
"""Return a string representation of the parallel_pipeline.
|
|
77
|
+
|
|
78
|
+
This representation shows the order of execution for each group of messages.
|
|
79
|
+
"""
|
|
80
|
+
result = []
|
|
81
|
+
|
|
82
|
+
for i, message_group in enumerate(self.messages):
|
|
83
|
+
group_str = f"Group {i + 1}: [\n"
|
|
84
|
+
for j, message in enumerate(message_group):
|
|
85
|
+
message_str = f" Message {j + 1}: {message.actor_name}\n"
|
|
86
|
+
group_str += message_str
|
|
87
|
+
group_str += "]\n"
|
|
88
|
+
result.append(group_str)
|
|
89
|
+
|
|
90
|
+
return "".join(result)
|
|
91
|
+
|
|
92
|
+
@property
|
|
93
|
+
def completed(self):
|
|
94
|
+
return self.completed_count == len(self)
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def completed_count(self):
|
|
98
|
+
count = 0
|
|
99
|
+
|
|
100
|
+
for message_group in self.messages:
|
|
101
|
+
for message in message_group:
|
|
102
|
+
try:
|
|
103
|
+
message.get_result()
|
|
104
|
+
count = count + 1
|
|
105
|
+
except ResultMissing:
|
|
106
|
+
pass
|
|
107
|
+
return count
|
|
108
|
+
|
|
109
|
+
def run(self, *, delay=None):
|
|
110
|
+
"""Run this parallel_pipeline.
|
|
111
|
+
|
|
112
|
+
Parameters:
|
|
113
|
+
delay(int): The minimum amount of time, in milliseconds, the
|
|
114
|
+
parallel_pipeline should be delayed by. If both parallel_pipeline's delay and
|
|
115
|
+
first message's delay are provided, the bigger value will be
|
|
116
|
+
used.
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
parallel_pipeline: Itself.
|
|
120
|
+
"""
|
|
121
|
+
starting_group = self.messages[0]
|
|
122
|
+
|
|
123
|
+
completion_uuid = starting_group[0].options["group_completion_uuid"]
|
|
124
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
|
125
|
+
REDIS_URL, task_key=completion_uuid, lock_key=f"{completion_uuid}-lock"
|
|
126
|
+
)
|
|
127
|
+
locking_parallel_barrier.set_task_count(len(starting_group))
|
|
128
|
+
|
|
129
|
+
for m in starting_group:
|
|
130
|
+
self.broker.enqueue(m, delay=delay)
|
|
131
|
+
|
|
132
|
+
return self
|
|
133
|
+
|
|
134
|
+
def get_result(self, *, block=False, timeout=None):
|
|
135
|
+
"""Get the result of this pipeline.
|
|
136
|
+
|
|
137
|
+
Pipeline results are represented by the result of the last
|
|
138
|
+
message in the chain.
|
|
139
|
+
|
|
140
|
+
Parameters:
|
|
141
|
+
block(bool): Whether or not to block until a result is set.
|
|
142
|
+
timeout(int): The maximum amount of time, in ms, to wait for
|
|
143
|
+
a result when block is True. Defaults to 10 seconds.
|
|
144
|
+
|
|
145
|
+
Raises:
|
|
146
|
+
ResultMissing: When block is False and the result isn't set.
|
|
147
|
+
ResultTimeout: When waiting for a result times out.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
object: The result.
|
|
151
|
+
"""
|
|
152
|
+
last_message = self.messages[-1][-1]
|
|
153
|
+
|
|
154
|
+
backend = self.broker.get_results_backend()
|
|
155
|
+
return last_message.get_result(backend=backend, block=block, timeout=timeout)
|
|
156
|
+
|
|
157
|
+
def get_results(self, *, block=False, timeout=None):
|
|
158
|
+
"""Get the results of each job in the pipeline.
|
|
159
|
+
|
|
160
|
+
Parameters:
|
|
161
|
+
block(bool): Whether or not to block until a result is set.
|
|
162
|
+
timeout(int): The maximum amount of time, in ms, to wait for
|
|
163
|
+
a result when block is True. Defaults to 10 seconds.
|
|
164
|
+
|
|
165
|
+
Raises:
|
|
166
|
+
ResultMissing: When block is False and the result isn't set.
|
|
167
|
+
ResultTimeout: When waiting for a result times out.
|
|
168
|
+
|
|
169
|
+
Returns:
|
|
170
|
+
A result generator.
|
|
171
|
+
"""
|
|
172
|
+
deadline = None
|
|
173
|
+
if timeout:
|
|
174
|
+
deadline = time.monotonic() + timeout / 1000
|
|
175
|
+
|
|
176
|
+
for message_group in self.messages:
|
|
177
|
+
for message in message_group:
|
|
178
|
+
if deadline:
|
|
179
|
+
timeout = max(0, int((deadline - time.monotonic()) * 1000))
|
|
180
|
+
|
|
181
|
+
backend = self.broker.get_results_backend()
|
|
182
|
+
yield {
|
|
183
|
+
message.actor_name: message.get_result(
|
|
184
|
+
backend=backend, block=block, timeout=timeout
|
|
185
|
+
)
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
def to_json(self) -> str:
|
|
189
|
+
"""Convert the execution graph to a JSON string representation.
|
|
190
|
+
|
|
191
|
+
This method serializes the execution graph of the pipeline into a JSON string.
|
|
192
|
+
This serialized form can be used to save the pipeline state or share it across different systems,
|
|
193
|
+
enabling the retrieval of a pipeline "run" for obtaining its results at a later time.
|
|
194
|
+
|
|
195
|
+
:return: A JSON string representing the execution graph.
|
|
196
|
+
:rtype: str
|
|
197
|
+
"""
|
|
198
|
+
return json.dumps(self.execution_graph)
|
|
199
|
+
|
|
200
|
+
@classmethod
|
|
201
|
+
def from_json(cls, json_data: str) -> parallel_pipeline:
|
|
202
|
+
"""Create a ParallelPipeline object from a JSON string representation of the execution graph.
|
|
203
|
+
|
|
204
|
+
This class method deserializes a JSON string into a list of messages, each representing
|
|
205
|
+
a task or operation in the pipeline. The method reconstructs the execution graph using
|
|
206
|
+
the `dramatiq.message.Message` objects and returns an instance of the `parallel_pipeline` class.
|
|
207
|
+
|
|
208
|
+
:param json_data: A JSON string containing the serialized execution graph.
|
|
209
|
+
:type json_data: str
|
|
210
|
+
:return: An instance of `parallel_pipeline` reconstructed from the JSON data.
|
|
211
|
+
:rtype: parallel_pipeline
|
|
212
|
+
"""
|
|
213
|
+
execution_graph = json.loads(json_data)
|
|
214
|
+
|
|
215
|
+
messages = []
|
|
216
|
+
|
|
217
|
+
for message_group in execution_graph:
|
|
218
|
+
temp_group = []
|
|
219
|
+
for message in message_group:
|
|
220
|
+
temp_group.append(Message(**message))
|
|
221
|
+
messages.append(temp_group)
|
|
222
|
+
|
|
223
|
+
return cls(messages)
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
class PipelineResult:
|
|
227
|
+
"""
|
|
228
|
+
A class to manage and retrieve the results of a parallel pipeline execution.
|
|
229
|
+
|
|
230
|
+
The `PipelineResult` class provides methods for creating a result entry in a Redis database,
|
|
231
|
+
loading pipeline data from Redis, and retrieving the status and results of the pipeline execution.
|
|
232
|
+
|
|
233
|
+
Attributes:
|
|
234
|
+
pipeline (parallel_pipeline): The pipeline object representing the execution graph.
|
|
235
|
+
execution_id (str): A unique identifier for the execution of the pipeline.
|
|
236
|
+
redis_key (str): The key used to store and retrieve pipeline data from Redis.
|
|
237
|
+
redis_conn: A Redis connection object used to interact with the Redis database.
|
|
238
|
+
result_ttl (int): Time-to-live (TTL) for the result entry in Redis, in seconds.
|
|
239
|
+
"""
|
|
240
|
+
|
|
241
|
+
def __init__(self, execution_id: str, result_ttl: int = DEFAULT_RESULT_TTL):
|
|
242
|
+
"""
|
|
243
|
+
Initialize a PipelineResult object with an execution ID and optional result TTL.
|
|
244
|
+
|
|
245
|
+
:param execution_id: A unique identifier for the pipeline execution.
|
|
246
|
+
:type execution_id: str
|
|
247
|
+
:param result_ttl: The time-to-live (TTL) for the result entry in Redis. Defaults to DEFAULT_RESULT_TTL.
|
|
248
|
+
:type result_ttl: int
|
|
249
|
+
"""
|
|
250
|
+
self.pipeline: parallel_pipeline = None
|
|
251
|
+
self.execution_id = execution_id
|
|
252
|
+
self.redis_key = f"{execution_id}-results-key"
|
|
253
|
+
self.result_ttl = result_ttl
|
|
254
|
+
|
|
255
|
+
if REDIS_SENTINEL_MASTER_NAME is not None:
|
|
256
|
+
parsed_redis_url = urlparse(REDIS_URL)
|
|
257
|
+
redis_sentinel = Sentinel(
|
|
258
|
+
sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
|
|
259
|
+
)
|
|
260
|
+
self.redis_conn = redis_sentinel.master_for(
|
|
261
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
262
|
+
db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
|
|
263
|
+
password=parsed_redis_url.password,
|
|
264
|
+
socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
265
|
+
socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
266
|
+
retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
267
|
+
socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
268
|
+
health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
self.redis_conn = RedisConnector().get_connection()
|
|
272
|
+
|
|
273
|
+
def create_result_entry(self, pipeline_json_str: str):
|
|
274
|
+
"""
|
|
275
|
+
Store the serialized pipeline data in Redis with a specified TTL.
|
|
276
|
+
|
|
277
|
+
This method saves the JSON string representation of the pipeline in the Redis database
|
|
278
|
+
using the execution ID as the key. The entry is stored with a time-to-live (TTL) defined by `result_ttl`.
|
|
279
|
+
|
|
280
|
+
:param pipeline_json_str: A JSON string representing the pipeline execution graph.
|
|
281
|
+
:type pipeline_json_str: str
|
|
282
|
+
:raises ValueError: If the provided pipeline data is None or an empty string.
|
|
283
|
+
:raises RedisError: If there is an issue connecting to Redis or setting the value.
|
|
284
|
+
"""
|
|
285
|
+
if not pipeline_json_str:
|
|
286
|
+
raise ValueError("No pipeline data passed to create result store")
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
self.redis_conn.setex(self.redis_key, self.result_ttl, pipeline_json_str)
|
|
290
|
+
except RedisError as e:
|
|
291
|
+
raise RuntimeError(f"Failed to store pipeline data in Redis: {e}")
|
|
292
|
+
|
|
293
|
+
def load(self):
|
|
294
|
+
"""
|
|
295
|
+
Load the pipeline data from Redis and reconstruct the pipeline object.
|
|
296
|
+
|
|
297
|
+
This method retrieves the JSON string stored in Redis and deserializes it
|
|
298
|
+
into a `parallel_pipeline` object, enabling access to the pipeline's execution details.
|
|
299
|
+
|
|
300
|
+
:raises RedisError: If there is an issue connecting to Redis or retrieving the data.
|
|
301
|
+
"""
|
|
302
|
+
try:
|
|
303
|
+
pipeline_data = self.redis_conn.get(self.redis_key)
|
|
304
|
+
if pipeline_data:
|
|
305
|
+
self.pipeline = parallel_pipeline.from_json(pipeline_data)
|
|
306
|
+
else:
|
|
307
|
+
self.pipeline = None
|
|
308
|
+
except RedisError as e:
|
|
309
|
+
raise RuntimeError(f"Failed to load pipeline data from Redis: {e}")
|
|
310
|
+
|
|
311
|
+
@property
|
|
312
|
+
def status(self) -> str:
|
|
313
|
+
"""
|
|
314
|
+
Get the current status of the pipeline execution.
|
|
315
|
+
|
|
316
|
+
This property checks the completion status of the pipeline and returns its current state.
|
|
317
|
+
|
|
318
|
+
:return: The status of the pipeline execution, which can be "complete", "pending", or "unavailable".
|
|
319
|
+
:rtype: str
|
|
320
|
+
"""
|
|
321
|
+
if not self.pipeline:
|
|
322
|
+
return "unavailable"
|
|
323
|
+
return "complete" if self.pipeline.completed else "pending"
|
|
324
|
+
|
|
325
|
+
def get_results(self) -> dict:
|
|
326
|
+
"""
|
|
327
|
+
Retrieve all results from the pipeline execution with unique actor identifiers.
|
|
328
|
+
|
|
329
|
+
This method aggregates results from the pipeline and ensures that each actor's result
|
|
330
|
+
has a unique identifier by appending a numeric suffix to duplicate actor names.
|
|
331
|
+
|
|
332
|
+
:return: A dictionary containing all results from the pipeline execution, keyed by unique actor identifiers.
|
|
333
|
+
:rtype: dict
|
|
334
|
+
"""
|
|
335
|
+
if not self.pipeline:
|
|
336
|
+
return {}
|
|
337
|
+
|
|
338
|
+
results = {}
|
|
339
|
+
for result in self.pipeline.get_results():
|
|
340
|
+
for actor, res in result.items():
|
|
341
|
+
unique_actor = self._get_unique_actor_name(actor, results)
|
|
342
|
+
results[unique_actor] = res
|
|
343
|
+
return results
|
|
344
|
+
|
|
345
|
+
def get_result(self):
|
|
346
|
+
"""
|
|
347
|
+
Retrieve a single result from the pipeline execution.
|
|
348
|
+
|
|
349
|
+
This method returns the result of a single execution step from the pipeline, if available.
|
|
350
|
+
|
|
351
|
+
:return: The result of a single execution step from the pipeline, or None if no pipeline is loaded.
|
|
352
|
+
"""
|
|
353
|
+
if self.pipeline:
|
|
354
|
+
return self.pipeline.get_result()
|
|
355
|
+
|
|
356
|
+
def _get_unique_actor_name(self, actor: str, results: dict) -> str:
|
|
357
|
+
"""
|
|
358
|
+
Generate a unique actor name by appending a numeric suffix if necessary.
|
|
359
|
+
|
|
360
|
+
:param actor: The base name of the actor.
|
|
361
|
+
:type actor: str
|
|
362
|
+
:param results: The current dictionary of results to check for uniqueness.
|
|
363
|
+
:type results: dict
|
|
364
|
+
:return: A unique actor name.
|
|
365
|
+
:rtype: str
|
|
366
|
+
"""
|
|
367
|
+
if actor not in results:
|
|
368
|
+
return actor
|
|
369
|
+
|
|
370
|
+
suffix = 0
|
|
371
|
+
new_actor = f"{actor}-{suffix}"
|
|
372
|
+
while new_actor in results:
|
|
373
|
+
suffix += 1
|
|
374
|
+
new_actor = f"{actor}-{suffix}"
|
|
375
|
+
return new_actor
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import typing
|
|
3
|
+
from copy import copy
|
|
4
|
+
from uuid import uuid4
|
|
5
|
+
|
|
6
|
+
import networkx as nx
|
|
7
|
+
from dramatiq import get_broker
|
|
8
|
+
|
|
9
|
+
from pypeline.barrier import LockingParallelBarrier
|
|
10
|
+
from pypeline.constants import REDIS_URL, PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
|
11
|
+
from pypeline.utils.dramatiq_utils import register_lazy_actor
|
|
12
|
+
from pypeline.utils.module_utils import get_callable
|
|
13
|
+
from pypeline.utils.pipeline_utils import get_execution_graph
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Pypeline:
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
pipeline: dict,
|
|
20
|
+
scenarios: dict = {},
|
|
21
|
+
broker=None,
|
|
22
|
+
):
|
|
23
|
+
# Construct initial properties
|
|
24
|
+
self.pipeline = pipeline
|
|
25
|
+
self.broker = broker or get_broker()
|
|
26
|
+
self._starting_messages = []
|
|
27
|
+
self.scenarios = scenarios
|
|
28
|
+
|
|
29
|
+
# Get pipeline dag graph and find first task
|
|
30
|
+
pipeline_config = pipeline["config"]
|
|
31
|
+
self.graph = get_execution_graph(pipeline_config)
|
|
32
|
+
self.number_of_tasks = len(self.graph.nodes)
|
|
33
|
+
task_definitions = pipeline_config["taskDefinitions"]
|
|
34
|
+
first_task = list(pipeline_config["dagAdjacency"].keys())[0]
|
|
35
|
+
|
|
36
|
+
base_case_execution_id = None
|
|
37
|
+
|
|
38
|
+
# Process the scenarios one by one
|
|
39
|
+
for scenario in self.scenarios:
|
|
40
|
+
# The first scenario is the base case and always runs
|
|
41
|
+
if self.scenarios.index(scenario) == 0:
|
|
42
|
+
base_case_execution_id = scenario.get("execution_id", None) or str(
|
|
43
|
+
uuid4()
|
|
44
|
+
)
|
|
45
|
+
scenario["execution_id"] = base_case_execution_id
|
|
46
|
+
scenario["base_case_execution_id"] = base_case_execution_id
|
|
47
|
+
scenario["tasksToRunInScenario"] = list(self.graph.nodes)
|
|
48
|
+
continue
|
|
49
|
+
tasks_in_reruns = scenario["taskReruns"]
|
|
50
|
+
|
|
51
|
+
# Find any tasks that have replacements for this scenario
|
|
52
|
+
tasks_in_replacements = list(scenario["taskReplacements"].keys())
|
|
53
|
+
|
|
54
|
+
distinct_scenario_tasks = list(set(tasks_in_reruns + tasks_in_replacements))
|
|
55
|
+
tasks_to_be_rerun_in_scenario = distinct_scenario_tasks
|
|
56
|
+
|
|
57
|
+
tasks_to_be_rerun_in_scenario = list(
|
|
58
|
+
set(
|
|
59
|
+
task
|
|
60
|
+
for task in distinct_scenario_tasks
|
|
61
|
+
for task in nx.descendants(self.graph, task)
|
|
62
|
+
)
|
|
63
|
+
| set(tasks_to_be_rerun_in_scenario)
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
self.number_of_tasks = self.number_of_tasks + len(
|
|
67
|
+
tasks_to_be_rerun_in_scenario
|
|
68
|
+
)
|
|
69
|
+
scenario["tasksToRunInScenario"] = tasks_to_be_rerun_in_scenario
|
|
70
|
+
scenario["base_case_execution_id"] = base_case_execution_id
|
|
71
|
+
scenario["execution_id"] = scenario.get("execution_id", None) or str(
|
|
72
|
+
uuid4()
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Check if any of the scenarios need to be kicked off now
|
|
76
|
+
if first_task in tasks_to_be_rerun_in_scenario:
|
|
77
|
+
handler = task_definitions[first_task]["handlers"][
|
|
78
|
+
scenario["taskReplacements"].get(first_task, 0)
|
|
79
|
+
]
|
|
80
|
+
server_type = task_definitions[first_task].get("serverType", None)
|
|
81
|
+
lazy_actor = register_lazy_actor(
|
|
82
|
+
self.broker,
|
|
83
|
+
get_callable(handler),
|
|
84
|
+
pipeline_config["metadata"],
|
|
85
|
+
server_type,
|
|
86
|
+
)
|
|
87
|
+
message = lazy_actor.message()
|
|
88
|
+
message.options["pipeline"] = pipeline
|
|
89
|
+
if pipeline_config["metadata"].get("maxRetry", None) is not None:
|
|
90
|
+
message.options["max_retries"] = pipeline_config["metadata"][
|
|
91
|
+
"maxRetry"
|
|
92
|
+
]
|
|
93
|
+
message.options["task_replacements"] = copy(
|
|
94
|
+
scenario["taskReplacements"]
|
|
95
|
+
)
|
|
96
|
+
message.options["execution_id"] = scenario["execution_id"]
|
|
97
|
+
message.options["task_name"] = first_task
|
|
98
|
+
message.options["base_case_execution_id"] = base_case_execution_id
|
|
99
|
+
if scenario["settings"]:
|
|
100
|
+
message.kwargs["settings"] = copy(scenario["settings"])
|
|
101
|
+
message.kwargs["settings"]["execution_id"] = scenario[
|
|
102
|
+
"execution_id"
|
|
103
|
+
]
|
|
104
|
+
message.kwargs["settings"][
|
|
105
|
+
"base_case_execution_id"
|
|
106
|
+
] = base_case_execution_id
|
|
107
|
+
self._starting_messages.append(message)
|
|
108
|
+
|
|
109
|
+
for m in self._starting_messages:
|
|
110
|
+
m.options["scenarios"] = self.scenarios
|
|
111
|
+
|
|
112
|
+
# Run the first task of the first scenario no matter what
|
|
113
|
+
first_scenario_task_replacements = scenarios[0]["taskReplacements"]
|
|
114
|
+
first_scenario_settings = scenarios[0].get("settings", None)
|
|
115
|
+
|
|
116
|
+
handler = task_definitions[first_task]["handlers"][
|
|
117
|
+
first_scenario_task_replacements.get(first_task, 0)
|
|
118
|
+
]
|
|
119
|
+
server_type = task_definitions[first_task].get("serverType", None)
|
|
120
|
+
lazy_actor = register_lazy_actor(
|
|
121
|
+
self.broker,
|
|
122
|
+
get_callable(handler),
|
|
123
|
+
pipeline_config["metadata"],
|
|
124
|
+
server_type,
|
|
125
|
+
)
|
|
126
|
+
message = lazy_actor.message()
|
|
127
|
+
message.options["pipeline"] = pipeline
|
|
128
|
+
if pipeline_config["metadata"].get("maxRetry", None) is not None:
|
|
129
|
+
message.options["max_retries"] = pipeline_config["metadata"]["maxRetry"]
|
|
130
|
+
message.options["task_replacements"] = first_scenario_task_replacements
|
|
131
|
+
message.options["execution_id"] = base_case_execution_id
|
|
132
|
+
message.options["task_name"] = first_task
|
|
133
|
+
message.options["scenarios"] = self.scenarios
|
|
134
|
+
message.options["base_case_execution_id"] = base_case_execution_id
|
|
135
|
+
|
|
136
|
+
if first_scenario_settings:
|
|
137
|
+
message.kwargs["settings"] = copy(first_scenario_settings)
|
|
138
|
+
message.kwargs["settings"]["execution_id"] = base_case_execution_id
|
|
139
|
+
message.kwargs["settings"][
|
|
140
|
+
"base_case_execution_id"
|
|
141
|
+
] = base_case_execution_id
|
|
142
|
+
|
|
143
|
+
self._starting_messages.append(message)
|
|
144
|
+
|
|
145
|
+
def run(self, *, delay=None):
|
|
146
|
+
for message in self._starting_messages:
|
|
147
|
+
task_key = (
|
|
148
|
+
f"{message.options['execution_id']}-{message.options['task_name']}"
|
|
149
|
+
)
|
|
150
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
|
151
|
+
REDIS_URL,
|
|
152
|
+
task_key=task_key,
|
|
153
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
|
154
|
+
)
|
|
155
|
+
locking_parallel_barrier.set_task_count(1)
|
|
156
|
+
self.broker.enqueue(message, delay=delay)
|
|
157
|
+
|
|
158
|
+
return self
|
|
159
|
+
|
|
160
|
+
def __len__(self):
|
|
161
|
+
return self.number_of_tasks
|
|
162
|
+
|
|
163
|
+
def completed(self):
|
|
164
|
+
locks = []
|
|
165
|
+
|
|
166
|
+
for scenario in self.scenarios:
|
|
167
|
+
locks.append(
|
|
168
|
+
{
|
|
169
|
+
"scenario_task_keys": [
|
|
170
|
+
f"{scenario['execution_id']}-{task}"
|
|
171
|
+
for task in scenario["tasksToRunInScenario"]
|
|
172
|
+
],
|
|
173
|
+
"redis_lock_key": f"{scenario['base_case_execution_id']}-lock",
|
|
174
|
+
}
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
for lock in locks:
|
|
178
|
+
for task_key in lock["scenario_task_keys"]:
|
|
179
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
|
180
|
+
REDIS_URL, task_key=task_key, lock_key=lock["redis_lock_key"]
|
|
181
|
+
)
|
|
182
|
+
try:
|
|
183
|
+
locking_parallel_barrier.acquire_lock(
|
|
184
|
+
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
|
185
|
+
)
|
|
186
|
+
task_complete = True
|
|
187
|
+
if locking_parallel_barrier.task_exists():
|
|
188
|
+
remaining_tasks = locking_parallel_barrier.get_task_count()
|
|
189
|
+
if remaining_tasks >= 1:
|
|
190
|
+
task_complete = False
|
|
191
|
+
else:
|
|
192
|
+
task_complete = False
|
|
193
|
+
finally:
|
|
194
|
+
locking_parallel_barrier.release_lock()
|
|
195
|
+
if not task_complete:
|
|
196
|
+
return task_complete
|
|
197
|
+
|
|
198
|
+
return True
|
|
199
|
+
|
|
200
|
+
def to_json(self) -> str:
|
|
201
|
+
return json.dumps(
|
|
202
|
+
{
|
|
203
|
+
"pipeline": self.pipeline,
|
|
204
|
+
"scenarios": self.scenarios,
|
|
205
|
+
}
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
@classmethod
|
|
209
|
+
def from_json(cls, json_data: str) -> typing.Type["Pypeline"]:
|
|
210
|
+
data = json.loads(json_data)
|
|
211
|
+
|
|
212
|
+
return cls(
|
|
213
|
+
data["pipeline"],
|
|
214
|
+
scenarios=data["scenarios"],
|
|
215
|
+
)
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
from dramatiq import get_broker, Message
|
|
3
|
+
from pypeline.pipelines.composition.parallel_pipeline_composition import (
|
|
4
|
+
parallel_pipeline,
|
|
5
|
+
)
|
|
6
|
+
from pypeline.dramatiq import LazyActor
|
|
7
|
+
from pypeline.utils.dramatiq_utils import register_lazy_actor
|
|
8
|
+
from pypeline.pipeline_settings_schema import (
|
|
9
|
+
MissingSettingsException,
|
|
10
|
+
create_pipeline_settings_schema,
|
|
11
|
+
PipelineScenarioSchema,
|
|
12
|
+
)
|
|
13
|
+
from pypeline.pipelines.composition.pypeline_composition import Pypeline
|
|
14
|
+
from pypeline.utils.config_utils import retrieve_latest_pipeline_config
|
|
15
|
+
from pypeline.utils.module_utils import get_callable
|
|
16
|
+
from pypeline.utils.pipeline_utils import (
|
|
17
|
+
get_execution_graph,
|
|
18
|
+
topological_sort_with_parallelism,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def dag_generator(
|
|
23
|
+
pipeline_id: str, scenarios: typing.List[typing.Dict] = [], *args, **kwargs
|
|
24
|
+
) -> typing.Union[parallel_pipeline, Pypeline]:
|
|
25
|
+
"""Generates a pipeline dag from a pre-defined pipeline yaml
|
|
26
|
+
|
|
27
|
+
:param pipeline_id: Id of the pipeline to generate
|
|
28
|
+
:param task_replacements: A dictionary of task names and handler index to run. E.g. {"a": 1} would run the handler
|
|
29
|
+
in the second index position.
|
|
30
|
+
:param scenarios:
|
|
31
|
+
:param args:
|
|
32
|
+
:param kwargs:
|
|
33
|
+
:return: Returns a parallel_pipeline object which can be run
|
|
34
|
+
"""
|
|
35
|
+
pipeline = retrieve_latest_pipeline_config(pipeline_id=pipeline_id)
|
|
36
|
+
|
|
37
|
+
pipeline_config = pipeline["config"]
|
|
38
|
+
broker = get_broker()
|
|
39
|
+
broker.actors.clear()
|
|
40
|
+
|
|
41
|
+
if pipeline["schemaVersion"] == 2:
|
|
42
|
+
supplied_pipeline_settings_schema = create_pipeline_settings_schema(
|
|
43
|
+
pipeline_config["settings"]
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Validate scenarios settings to make sure they look okay
|
|
47
|
+
validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
|
|
48
|
+
|
|
49
|
+
for scenario in validated_scenarios:
|
|
50
|
+
supplied_pipeline_settings_schema.load(scenario["settings"])
|
|
51
|
+
|
|
52
|
+
p = Pypeline(pipeline, scenarios=scenarios, broker=broker)
|
|
53
|
+
return p
|
|
54
|
+
|
|
55
|
+
graph = get_execution_graph(pipeline_config)
|
|
56
|
+
optimal_execution_graph = topological_sort_with_parallelism(graph.copy())
|
|
57
|
+
registered_actors: typing.Dict[str, LazyActor] = {}
|
|
58
|
+
|
|
59
|
+
messages: typing.List[typing.List[Message]] = []
|
|
60
|
+
|
|
61
|
+
task_definitions = pipeline_config["taskDefinitions"]
|
|
62
|
+
for task_group in optimal_execution_graph:
|
|
63
|
+
message_group = []
|
|
64
|
+
for task in task_group:
|
|
65
|
+
module_path = task_definitions[task]["handler"]
|
|
66
|
+
server_type = task_definitions[task].get("serverType", None)
|
|
67
|
+
tmp_handler = get_callable(module_path)
|
|
68
|
+
lazy_actor = register_lazy_actor(
|
|
69
|
+
broker, tmp_handler, pipeline_config["metadata"], server_type
|
|
70
|
+
)
|
|
71
|
+
registered_actors[task] = lazy_actor
|
|
72
|
+
if args and not kwargs:
|
|
73
|
+
msg = registered_actors[task].message(*args)
|
|
74
|
+
elif kwargs and not args:
|
|
75
|
+
msg = registered_actors[task].message(**kwargs)
|
|
76
|
+
elif args and kwargs:
|
|
77
|
+
msg = registered_actors[task].message(*args, **kwargs)
|
|
78
|
+
else:
|
|
79
|
+
msg = registered_actors[task].message()
|
|
80
|
+
msg.options["task_ttl"] = pipeline_config["metadata"]["maxTtl"]
|
|
81
|
+
message_group.append(msg)
|
|
82
|
+
|
|
83
|
+
messages.append(message_group)
|
|
84
|
+
p = parallel_pipeline(messages)
|
|
85
|
+
|
|
86
|
+
return p
|
|
File without changes
|