scalable-pypeline 2.1.31__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pypeline/__init__.py +1 -0
  2. pypeline/barrier.py +63 -0
  3. pypeline/constants.py +94 -0
  4. pypeline/dramatiq.py +455 -0
  5. pypeline/executable_job_config_schema.py +35 -0
  6. pypeline/extensions.py +17 -0
  7. pypeline/flask/__init__.py +16 -0
  8. pypeline/flask/api/__init__.py +0 -0
  9. pypeline/flask/api/pipelines.py +275 -0
  10. pypeline/flask/api/schedules.py +40 -0
  11. pypeline/flask/decorators.py +41 -0
  12. pypeline/flask/flask_pypeline.py +156 -0
  13. pypeline/job_runner.py +205 -0
  14. pypeline/pipeline_config_schema.py +352 -0
  15. pypeline/pipeline_settings_schema.py +561 -0
  16. pypeline/pipelines/__init__.py +0 -0
  17. pypeline/pipelines/composition/__init__.py +0 -0
  18. pypeline/pipelines/composition/parallel_pipeline_composition.py +375 -0
  19. pypeline/pipelines/composition/pypeline_composition.py +215 -0
  20. pypeline/pipelines/factory.py +86 -0
  21. pypeline/pipelines/middleware/__init__.py +0 -0
  22. pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
  23. pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
  24. pypeline/pipelines/middleware/parallel_pipeline_middleware.py +60 -0
  25. pypeline/pipelines/middleware/pypeline_middleware.py +202 -0
  26. pypeline/pypeline_yaml.py +468 -0
  27. pypeline/schedule_config_schema.py +125 -0
  28. pypeline/utils/__init__.py +0 -0
  29. pypeline/utils/config_utils.py +81 -0
  30. pypeline/utils/dramatiq_utils.py +134 -0
  31. pypeline/utils/executable_job_util.py +35 -0
  32. pypeline/utils/graceful_shutdown_util.py +39 -0
  33. pypeline/utils/module_utils.py +108 -0
  34. pypeline/utils/pipeline_utils.py +144 -0
  35. pypeline/utils/schema_utils.py +24 -0
  36. scalable_pypeline-2.1.31.dist-info/LICENSE +177 -0
  37. scalable_pypeline-2.1.31.dist-info/METADATA +212 -0
  38. scalable_pypeline-2.1.31.dist-info/RECORD +42 -0
  39. scalable_pypeline-2.1.31.dist-info/WHEEL +6 -0
  40. scalable_pypeline-2.1.31.dist-info/entry_points.txt +6 -0
  41. scalable_pypeline-2.1.31.dist-info/top_level.txt +2 -0
  42. tests/fixtures/__init__.py +0 -0
@@ -0,0 +1,375 @@
1
+ from __future__ import annotations
2
+
3
+ import copy
4
+ import json
5
+ import time
6
+ import typing
7
+ from uuid import uuid4
8
+ from urllib.parse import urlparse
9
+
10
+ from dramatiq.broker import get_broker
11
+ from dramatiq.results import ResultMissing
12
+ from db_medley.redis_conf import RedisConnector
13
+ from redis.exceptions import RedisError
14
+ from redis.sentinel import Sentinel
15
+ from pypeline.constants import (
16
+ REDIS_URL,
17
+ REDIS_SENTINEL_MASTER_NAME,
18
+ DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
19
+ DEFAULT_REDIS_SOCKET_TIMEOUT,
20
+ DEFAULT_REDIS_RETRY_ON_TIMEOUT,
21
+ DEFAULT_REDIS_SOCKET_KEEPALIVE,
22
+ DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
23
+ )
24
+ from pypeline.barrier import LockingParallelBarrier
25
+ from pypeline.constants import DEFAULT_RESULT_TTL
26
+ from pypeline.dramatiq import REDIS_URL
27
+
28
+ from dramatiq.message import Message
29
+
30
+
31
+ class parallel_pipeline:
32
+ """Chain actors together, passing the result of one actor to the
33
+ next one in line.
34
+
35
+ Parameters:
36
+ children(typing.List[typing.List[Message]]): A sequence of messages or
37
+ pipelines. Child pipelines are flattened into the resulting
38
+ pipeline.
39
+ broker(Broker): The broker to run the pipeline on. Defaults to
40
+ the current global broker.
41
+ """
42
+
43
+ messages: list[Message]
44
+
45
+ def __init__(self, messages: typing.List[typing.List[Message]], broker=None):
46
+ self.broker = broker or get_broker()
47
+ self.messages = messages
48
+ self.execution_id = str(uuid4())
49
+ execution_graph = []
50
+
51
+ for message_group in self.messages:
52
+ sub_execution_group = []
53
+ group_completion_uuid = str(uuid4())
54
+ for m in message_group:
55
+ m.kwargs["event"]["execution_id"] = self.execution_id
56
+ m.options["group_completion_uuid"] = group_completion_uuid
57
+ message_dict = copy.deepcopy(m.asdict())
58
+ sub_execution_group.append(message_dict)
59
+ # Last item in the group is the id of the group to be executed
60
+ execution_graph.append(sub_execution_group)
61
+
62
+ self.execution_graph = execution_graph
63
+
64
+ for m in self.messages[0]:
65
+ m.options["execution_graph"] = execution_graph
66
+
67
+ def __len__(self):
68
+ """Returns the length of the parallel_pipeline."""
69
+ count = 0
70
+ for message_group in self.messages:
71
+ count = count + len(message_group)
72
+
73
+ return count
74
+
75
+ def __str__(self): # pragma: no cover
76
+ """Return a string representation of the parallel_pipeline.
77
+
78
+ This representation shows the order of execution for each group of messages.
79
+ """
80
+ result = []
81
+
82
+ for i, message_group in enumerate(self.messages):
83
+ group_str = f"Group {i + 1}: [\n"
84
+ for j, message in enumerate(message_group):
85
+ message_str = f" Message {j + 1}: {message.actor_name}\n"
86
+ group_str += message_str
87
+ group_str += "]\n"
88
+ result.append(group_str)
89
+
90
+ return "".join(result)
91
+
92
+ @property
93
+ def completed(self):
94
+ return self.completed_count == len(self)
95
+
96
+ @property
97
+ def completed_count(self):
98
+ count = 0
99
+
100
+ for message_group in self.messages:
101
+ for message in message_group:
102
+ try:
103
+ message.get_result()
104
+ count = count + 1
105
+ except ResultMissing:
106
+ pass
107
+ return count
108
+
109
+ def run(self, *, delay=None):
110
+ """Run this parallel_pipeline.
111
+
112
+ Parameters:
113
+ delay(int): The minimum amount of time, in milliseconds, the
114
+ parallel_pipeline should be delayed by. If both parallel_pipeline's delay and
115
+ first message's delay are provided, the bigger value will be
116
+ used.
117
+
118
+ Returns:
119
+ parallel_pipeline: Itself.
120
+ """
121
+ starting_group = self.messages[0]
122
+
123
+ completion_uuid = starting_group[0].options["group_completion_uuid"]
124
+ locking_parallel_barrier = LockingParallelBarrier(
125
+ REDIS_URL, task_key=completion_uuid, lock_key=f"{completion_uuid}-lock"
126
+ )
127
+ locking_parallel_barrier.set_task_count(len(starting_group))
128
+
129
+ for m in starting_group:
130
+ self.broker.enqueue(m, delay=delay)
131
+
132
+ return self
133
+
134
+ def get_result(self, *, block=False, timeout=None):
135
+ """Get the result of this pipeline.
136
+
137
+ Pipeline results are represented by the result of the last
138
+ message in the chain.
139
+
140
+ Parameters:
141
+ block(bool): Whether or not to block until a result is set.
142
+ timeout(int): The maximum amount of time, in ms, to wait for
143
+ a result when block is True. Defaults to 10 seconds.
144
+
145
+ Raises:
146
+ ResultMissing: When block is False and the result isn't set.
147
+ ResultTimeout: When waiting for a result times out.
148
+
149
+ Returns:
150
+ object: The result.
151
+ """
152
+ last_message = self.messages[-1][-1]
153
+
154
+ backend = self.broker.get_results_backend()
155
+ return last_message.get_result(backend=backend, block=block, timeout=timeout)
156
+
157
+ def get_results(self, *, block=False, timeout=None):
158
+ """Get the results of each job in the pipeline.
159
+
160
+ Parameters:
161
+ block(bool): Whether or not to block until a result is set.
162
+ timeout(int): The maximum amount of time, in ms, to wait for
163
+ a result when block is True. Defaults to 10 seconds.
164
+
165
+ Raises:
166
+ ResultMissing: When block is False and the result isn't set.
167
+ ResultTimeout: When waiting for a result times out.
168
+
169
+ Returns:
170
+ A result generator.
171
+ """
172
+ deadline = None
173
+ if timeout:
174
+ deadline = time.monotonic() + timeout / 1000
175
+
176
+ for message_group in self.messages:
177
+ for message in message_group:
178
+ if deadline:
179
+ timeout = max(0, int((deadline - time.monotonic()) * 1000))
180
+
181
+ backend = self.broker.get_results_backend()
182
+ yield {
183
+ message.actor_name: message.get_result(
184
+ backend=backend, block=block, timeout=timeout
185
+ )
186
+ }
187
+
188
+ def to_json(self) -> str:
189
+ """Convert the execution graph to a JSON string representation.
190
+
191
+ This method serializes the execution graph of the pipeline into a JSON string.
192
+ This serialized form can be used to save the pipeline state or share it across different systems,
193
+ enabling the retrieval of a pipeline "run" for obtaining its results at a later time.
194
+
195
+ :return: A JSON string representing the execution graph.
196
+ :rtype: str
197
+ """
198
+ return json.dumps(self.execution_graph)
199
+
200
+ @classmethod
201
+ def from_json(cls, json_data: str) -> parallel_pipeline:
202
+ """Create a ParallelPipeline object from a JSON string representation of the execution graph.
203
+
204
+ This class method deserializes a JSON string into a list of messages, each representing
205
+ a task or operation in the pipeline. The method reconstructs the execution graph using
206
+ the `dramatiq.message.Message` objects and returns an instance of the `parallel_pipeline` class.
207
+
208
+ :param json_data: A JSON string containing the serialized execution graph.
209
+ :type json_data: str
210
+ :return: An instance of `parallel_pipeline` reconstructed from the JSON data.
211
+ :rtype: parallel_pipeline
212
+ """
213
+ execution_graph = json.loads(json_data)
214
+
215
+ messages = []
216
+
217
+ for message_group in execution_graph:
218
+ temp_group = []
219
+ for message in message_group:
220
+ temp_group.append(Message(**message))
221
+ messages.append(temp_group)
222
+
223
+ return cls(messages)
224
+
225
+
226
+ class PipelineResult:
227
+ """
228
+ A class to manage and retrieve the results of a parallel pipeline execution.
229
+
230
+ The `PipelineResult` class provides methods for creating a result entry in a Redis database,
231
+ loading pipeline data from Redis, and retrieving the status and results of the pipeline execution.
232
+
233
+ Attributes:
234
+ pipeline (parallel_pipeline): The pipeline object representing the execution graph.
235
+ execution_id (str): A unique identifier for the execution of the pipeline.
236
+ redis_key (str): The key used to store and retrieve pipeline data from Redis.
237
+ redis_conn: A Redis connection object used to interact with the Redis database.
238
+ result_ttl (int): Time-to-live (TTL) for the result entry in Redis, in seconds.
239
+ """
240
+
241
+ def __init__(self, execution_id: str, result_ttl: int = DEFAULT_RESULT_TTL):
242
+ """
243
+ Initialize a PipelineResult object with an execution ID and optional result TTL.
244
+
245
+ :param execution_id: A unique identifier for the pipeline execution.
246
+ :type execution_id: str
247
+ :param result_ttl: The time-to-live (TTL) for the result entry in Redis. Defaults to DEFAULT_RESULT_TTL.
248
+ :type result_ttl: int
249
+ """
250
+ self.pipeline: parallel_pipeline = None
251
+ self.execution_id = execution_id
252
+ self.redis_key = f"{execution_id}-results-key"
253
+ self.result_ttl = result_ttl
254
+
255
+ if REDIS_SENTINEL_MASTER_NAME is not None:
256
+ parsed_redis_url = urlparse(REDIS_URL)
257
+ redis_sentinel = Sentinel(
258
+ sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
259
+ )
260
+ self.redis_conn = redis_sentinel.master_for(
261
+ REDIS_SENTINEL_MASTER_NAME,
262
+ db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
263
+ password=parsed_redis_url.password,
264
+ socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
265
+ socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
266
+ retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
267
+ socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
268
+ health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
269
+ )
270
+ else:
271
+ self.redis_conn = RedisConnector().get_connection()
272
+
273
+ def create_result_entry(self, pipeline_json_str: str):
274
+ """
275
+ Store the serialized pipeline data in Redis with a specified TTL.
276
+
277
+ This method saves the JSON string representation of the pipeline in the Redis database
278
+ using the execution ID as the key. The entry is stored with a time-to-live (TTL) defined by `result_ttl`.
279
+
280
+ :param pipeline_json_str: A JSON string representing the pipeline execution graph.
281
+ :type pipeline_json_str: str
282
+ :raises ValueError: If the provided pipeline data is None or an empty string.
283
+ :raises RedisError: If there is an issue connecting to Redis or setting the value.
284
+ """
285
+ if not pipeline_json_str:
286
+ raise ValueError("No pipeline data passed to create result store")
287
+
288
+ try:
289
+ self.redis_conn.setex(self.redis_key, self.result_ttl, pipeline_json_str)
290
+ except RedisError as e:
291
+ raise RuntimeError(f"Failed to store pipeline data in Redis: {e}")
292
+
293
+ def load(self):
294
+ """
295
+ Load the pipeline data from Redis and reconstruct the pipeline object.
296
+
297
+ This method retrieves the JSON string stored in Redis and deserializes it
298
+ into a `parallel_pipeline` object, enabling access to the pipeline's execution details.
299
+
300
+ :raises RedisError: If there is an issue connecting to Redis or retrieving the data.
301
+ """
302
+ try:
303
+ pipeline_data = self.redis_conn.get(self.redis_key)
304
+ if pipeline_data:
305
+ self.pipeline = parallel_pipeline.from_json(pipeline_data)
306
+ else:
307
+ self.pipeline = None
308
+ except RedisError as e:
309
+ raise RuntimeError(f"Failed to load pipeline data from Redis: {e}")
310
+
311
+ @property
312
+ def status(self) -> str:
313
+ """
314
+ Get the current status of the pipeline execution.
315
+
316
+ This property checks the completion status of the pipeline and returns its current state.
317
+
318
+ :return: The status of the pipeline execution, which can be "complete", "pending", or "unavailable".
319
+ :rtype: str
320
+ """
321
+ if not self.pipeline:
322
+ return "unavailable"
323
+ return "complete" if self.pipeline.completed else "pending"
324
+
325
+ def get_results(self) -> dict:
326
+ """
327
+ Retrieve all results from the pipeline execution with unique actor identifiers.
328
+
329
+ This method aggregates results from the pipeline and ensures that each actor's result
330
+ has a unique identifier by appending a numeric suffix to duplicate actor names.
331
+
332
+ :return: A dictionary containing all results from the pipeline execution, keyed by unique actor identifiers.
333
+ :rtype: dict
334
+ """
335
+ if not self.pipeline:
336
+ return {}
337
+
338
+ results = {}
339
+ for result in self.pipeline.get_results():
340
+ for actor, res in result.items():
341
+ unique_actor = self._get_unique_actor_name(actor, results)
342
+ results[unique_actor] = res
343
+ return results
344
+
345
+ def get_result(self):
346
+ """
347
+ Retrieve a single result from the pipeline execution.
348
+
349
+ This method returns the result of a single execution step from the pipeline, if available.
350
+
351
+ :return: The result of a single execution step from the pipeline, or None if no pipeline is loaded.
352
+ """
353
+ if self.pipeline:
354
+ return self.pipeline.get_result()
355
+
356
+ def _get_unique_actor_name(self, actor: str, results: dict) -> str:
357
+ """
358
+ Generate a unique actor name by appending a numeric suffix if necessary.
359
+
360
+ :param actor: The base name of the actor.
361
+ :type actor: str
362
+ :param results: The current dictionary of results to check for uniqueness.
363
+ :type results: dict
364
+ :return: A unique actor name.
365
+ :rtype: str
366
+ """
367
+ if actor not in results:
368
+ return actor
369
+
370
+ suffix = 0
371
+ new_actor = f"{actor}-{suffix}"
372
+ while new_actor in results:
373
+ suffix += 1
374
+ new_actor = f"{actor}-{suffix}"
375
+ return new_actor
@@ -0,0 +1,215 @@
1
+ import json
2
+ import typing
3
+ from copy import copy
4
+ from uuid import uuid4
5
+
6
+ import networkx as nx
7
+ from dramatiq import get_broker
8
+
9
+ from pypeline.barrier import LockingParallelBarrier
10
+ from pypeline.constants import REDIS_URL, PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
11
+ from pypeline.utils.dramatiq_utils import register_lazy_actor
12
+ from pypeline.utils.module_utils import get_callable
13
+ from pypeline.utils.pipeline_utils import get_execution_graph
14
+
15
+
16
+ class Pypeline:
17
+ def __init__(
18
+ self,
19
+ pipeline: dict,
20
+ scenarios: dict = {},
21
+ broker=None,
22
+ ):
23
+ # Construct initial properties
24
+ self.pipeline = pipeline
25
+ self.broker = broker or get_broker()
26
+ self._starting_messages = []
27
+ self.scenarios = scenarios
28
+
29
+ # Get pipeline dag graph and find first task
30
+ pipeline_config = pipeline["config"]
31
+ self.graph = get_execution_graph(pipeline_config)
32
+ self.number_of_tasks = len(self.graph.nodes)
33
+ task_definitions = pipeline_config["taskDefinitions"]
34
+ first_task = list(pipeline_config["dagAdjacency"].keys())[0]
35
+
36
+ base_case_execution_id = None
37
+
38
+ # Process the scenarios one by one
39
+ for scenario in self.scenarios:
40
+ # The first scenario is the base case and always runs
41
+ if self.scenarios.index(scenario) == 0:
42
+ base_case_execution_id = scenario.get("execution_id", None) or str(
43
+ uuid4()
44
+ )
45
+ scenario["execution_id"] = base_case_execution_id
46
+ scenario["base_case_execution_id"] = base_case_execution_id
47
+ scenario["tasksToRunInScenario"] = list(self.graph.nodes)
48
+ continue
49
+ tasks_in_reruns = scenario["taskReruns"]
50
+
51
+ # Find any tasks that have replacements for this scenario
52
+ tasks_in_replacements = list(scenario["taskReplacements"].keys())
53
+
54
+ distinct_scenario_tasks = list(set(tasks_in_reruns + tasks_in_replacements))
55
+ tasks_to_be_rerun_in_scenario = distinct_scenario_tasks
56
+
57
+ tasks_to_be_rerun_in_scenario = list(
58
+ set(
59
+ task
60
+ for task in distinct_scenario_tasks
61
+ for task in nx.descendants(self.graph, task)
62
+ )
63
+ | set(tasks_to_be_rerun_in_scenario)
64
+ )
65
+
66
+ self.number_of_tasks = self.number_of_tasks + len(
67
+ tasks_to_be_rerun_in_scenario
68
+ )
69
+ scenario["tasksToRunInScenario"] = tasks_to_be_rerun_in_scenario
70
+ scenario["base_case_execution_id"] = base_case_execution_id
71
+ scenario["execution_id"] = scenario.get("execution_id", None) or str(
72
+ uuid4()
73
+ )
74
+
75
+ # Check if any of the scenarios need to be kicked off now
76
+ if first_task in tasks_to_be_rerun_in_scenario:
77
+ handler = task_definitions[first_task]["handlers"][
78
+ scenario["taskReplacements"].get(first_task, 0)
79
+ ]
80
+ server_type = task_definitions[first_task].get("serverType", None)
81
+ lazy_actor = register_lazy_actor(
82
+ self.broker,
83
+ get_callable(handler),
84
+ pipeline_config["metadata"],
85
+ server_type,
86
+ )
87
+ message = lazy_actor.message()
88
+ message.options["pipeline"] = pipeline
89
+ if pipeline_config["metadata"].get("maxRetry", None) is not None:
90
+ message.options["max_retries"] = pipeline_config["metadata"][
91
+ "maxRetry"
92
+ ]
93
+ message.options["task_replacements"] = copy(
94
+ scenario["taskReplacements"]
95
+ )
96
+ message.options["execution_id"] = scenario["execution_id"]
97
+ message.options["task_name"] = first_task
98
+ message.options["base_case_execution_id"] = base_case_execution_id
99
+ if scenario["settings"]:
100
+ message.kwargs["settings"] = copy(scenario["settings"])
101
+ message.kwargs["settings"]["execution_id"] = scenario[
102
+ "execution_id"
103
+ ]
104
+ message.kwargs["settings"][
105
+ "base_case_execution_id"
106
+ ] = base_case_execution_id
107
+ self._starting_messages.append(message)
108
+
109
+ for m in self._starting_messages:
110
+ m.options["scenarios"] = self.scenarios
111
+
112
+ # Run the first task of the first scenario no matter what
113
+ first_scenario_task_replacements = scenarios[0]["taskReplacements"]
114
+ first_scenario_settings = scenarios[0].get("settings", None)
115
+
116
+ handler = task_definitions[first_task]["handlers"][
117
+ first_scenario_task_replacements.get(first_task, 0)
118
+ ]
119
+ server_type = task_definitions[first_task].get("serverType", None)
120
+ lazy_actor = register_lazy_actor(
121
+ self.broker,
122
+ get_callable(handler),
123
+ pipeline_config["metadata"],
124
+ server_type,
125
+ )
126
+ message = lazy_actor.message()
127
+ message.options["pipeline"] = pipeline
128
+ if pipeline_config["metadata"].get("maxRetry", None) is not None:
129
+ message.options["max_retries"] = pipeline_config["metadata"]["maxRetry"]
130
+ message.options["task_replacements"] = first_scenario_task_replacements
131
+ message.options["execution_id"] = base_case_execution_id
132
+ message.options["task_name"] = first_task
133
+ message.options["scenarios"] = self.scenarios
134
+ message.options["base_case_execution_id"] = base_case_execution_id
135
+
136
+ if first_scenario_settings:
137
+ message.kwargs["settings"] = copy(first_scenario_settings)
138
+ message.kwargs["settings"]["execution_id"] = base_case_execution_id
139
+ message.kwargs["settings"][
140
+ "base_case_execution_id"
141
+ ] = base_case_execution_id
142
+
143
+ self._starting_messages.append(message)
144
+
145
+ def run(self, *, delay=None):
146
+ for message in self._starting_messages:
147
+ task_key = (
148
+ f"{message.options['execution_id']}-{message.options['task_name']}"
149
+ )
150
+ locking_parallel_barrier = LockingParallelBarrier(
151
+ REDIS_URL,
152
+ task_key=task_key,
153
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
154
+ )
155
+ locking_parallel_barrier.set_task_count(1)
156
+ self.broker.enqueue(message, delay=delay)
157
+
158
+ return self
159
+
160
+ def __len__(self):
161
+ return self.number_of_tasks
162
+
163
+ def completed(self):
164
+ locks = []
165
+
166
+ for scenario in self.scenarios:
167
+ locks.append(
168
+ {
169
+ "scenario_task_keys": [
170
+ f"{scenario['execution_id']}-{task}"
171
+ for task in scenario["tasksToRunInScenario"]
172
+ ],
173
+ "redis_lock_key": f"{scenario['base_case_execution_id']}-lock",
174
+ }
175
+ )
176
+
177
+ for lock in locks:
178
+ for task_key in lock["scenario_task_keys"]:
179
+ locking_parallel_barrier = LockingParallelBarrier(
180
+ REDIS_URL, task_key=task_key, lock_key=lock["redis_lock_key"]
181
+ )
182
+ try:
183
+ locking_parallel_barrier.acquire_lock(
184
+ timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
185
+ )
186
+ task_complete = True
187
+ if locking_parallel_barrier.task_exists():
188
+ remaining_tasks = locking_parallel_barrier.get_task_count()
189
+ if remaining_tasks >= 1:
190
+ task_complete = False
191
+ else:
192
+ task_complete = False
193
+ finally:
194
+ locking_parallel_barrier.release_lock()
195
+ if not task_complete:
196
+ return task_complete
197
+
198
+ return True
199
+
200
+ def to_json(self) -> str:
201
+ return json.dumps(
202
+ {
203
+ "pipeline": self.pipeline,
204
+ "scenarios": self.scenarios,
205
+ }
206
+ )
207
+
208
+ @classmethod
209
+ def from_json(cls, json_data: str) -> typing.Type["Pypeline"]:
210
+ data = json.loads(json_data)
211
+
212
+ return cls(
213
+ data["pipeline"],
214
+ scenarios=data["scenarios"],
215
+ )
@@ -0,0 +1,86 @@
1
+ import typing
2
+ from dramatiq import get_broker, Message
3
+ from pypeline.pipelines.composition.parallel_pipeline_composition import (
4
+ parallel_pipeline,
5
+ )
6
+ from pypeline.dramatiq import LazyActor
7
+ from pypeline.utils.dramatiq_utils import register_lazy_actor
8
+ from pypeline.pipeline_settings_schema import (
9
+ MissingSettingsException,
10
+ create_pipeline_settings_schema,
11
+ PipelineScenarioSchema,
12
+ )
13
+ from pypeline.pipelines.composition.pypeline_composition import Pypeline
14
+ from pypeline.utils.config_utils import retrieve_latest_pipeline_config
15
+ from pypeline.utils.module_utils import get_callable
16
+ from pypeline.utils.pipeline_utils import (
17
+ get_execution_graph,
18
+ topological_sort_with_parallelism,
19
+ )
20
+
21
+
22
+ def dag_generator(
23
+ pipeline_id: str, scenarios: typing.List[typing.Dict] = [], *args, **kwargs
24
+ ) -> typing.Union[parallel_pipeline, Pypeline]:
25
+ """Generates a pipeline dag from a pre-defined pipeline yaml
26
+
27
+ :param pipeline_id: Id of the pipeline to generate
28
+ :param task_replacements: A dictionary of task names and handler index to run. E.g. {"a": 1} would run the handler
29
+ in the second index position.
30
+ :param scenarios:
31
+ :param args:
32
+ :param kwargs:
33
+ :return: Returns a parallel_pipeline object which can be run
34
+ """
35
+ pipeline = retrieve_latest_pipeline_config(pipeline_id=pipeline_id)
36
+
37
+ pipeline_config = pipeline["config"]
38
+ broker = get_broker()
39
+ broker.actors.clear()
40
+
41
+ if pipeline["schemaVersion"] == 2:
42
+ supplied_pipeline_settings_schema = create_pipeline_settings_schema(
43
+ pipeline_config["settings"]
44
+ )
45
+
46
+ # Validate scenarios settings to make sure they look okay
47
+ validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
48
+
49
+ for scenario in validated_scenarios:
50
+ supplied_pipeline_settings_schema.load(scenario["settings"])
51
+
52
+ p = Pypeline(pipeline, scenarios=scenarios, broker=broker)
53
+ return p
54
+
55
+ graph = get_execution_graph(pipeline_config)
56
+ optimal_execution_graph = topological_sort_with_parallelism(graph.copy())
57
+ registered_actors: typing.Dict[str, LazyActor] = {}
58
+
59
+ messages: typing.List[typing.List[Message]] = []
60
+
61
+ task_definitions = pipeline_config["taskDefinitions"]
62
+ for task_group in optimal_execution_graph:
63
+ message_group = []
64
+ for task in task_group:
65
+ module_path = task_definitions[task]["handler"]
66
+ server_type = task_definitions[task].get("serverType", None)
67
+ tmp_handler = get_callable(module_path)
68
+ lazy_actor = register_lazy_actor(
69
+ broker, tmp_handler, pipeline_config["metadata"], server_type
70
+ )
71
+ registered_actors[task] = lazy_actor
72
+ if args and not kwargs:
73
+ msg = registered_actors[task].message(*args)
74
+ elif kwargs and not args:
75
+ msg = registered_actors[task].message(**kwargs)
76
+ elif args and kwargs:
77
+ msg = registered_actors[task].message(*args, **kwargs)
78
+ else:
79
+ msg = registered_actors[task].message()
80
+ msg.options["task_ttl"] = pipeline_config["metadata"]["maxTtl"]
81
+ message_group.append(msg)
82
+
83
+ messages.append(message_group)
84
+ p = parallel_pipeline(messages)
85
+
86
+ return p
File without changes