scalable-pypeline 2.0.10__py2.py3-none-any.whl → 2.1.1__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypeline/__init__.py +1 -1
- pypeline/barrier.py +3 -0
- pypeline/dramatiq.py +26 -154
- pypeline/flask/api/pipelines.py +60 -4
- pypeline/flask/api/schedules.py +1 -3
- pypeline/pipeline_config_schema.py +91 -3
- pypeline/pipeline_settings_schema.py +541 -0
- pypeline/pipelines/__init__.py +0 -0
- pypeline/pipelines/composition/__init__.py +0 -0
- pypeline/pipelines/composition/pypeline_composition.py +188 -0
- pypeline/pipelines/factory.py +107 -0
- pypeline/pipelines/middleware/__init__.py +0 -0
- pypeline/pipelines/middleware/pypeline_middleware.py +188 -0
- pypeline/utils/dramatiq_utils.py +126 -0
- pypeline/utils/module_utils.py +27 -2
- pypeline/utils/pipeline_utils.py +22 -37
- pypeline/utils/schema_utils.py +24 -0
- {scalable_pypeline-2.0.10.dist-info → scalable_pypeline-2.1.1.dist-info}/METADATA +1 -1
- scalable_pypeline-2.1.1.dist-info/RECORD +36 -0
- scalable_pypeline-2.0.10.dist-info/RECORD +0 -27
- /pypeline/{composition.py → pipelines/composition/parallel_pipeline_composition.py} +0 -0
- /pypeline/{middleware.py → pipelines/middleware/parallel_pipeline_middleware.py} +0 -0
- {scalable_pypeline-2.0.10.dist-info → scalable_pypeline-2.1.1.dist-info}/LICENSE +0 -0
- {scalable_pypeline-2.0.10.dist-info → scalable_pypeline-2.1.1.dist-info}/WHEEL +0 -0
- {scalable_pypeline-2.0.10.dist-info → scalable_pypeline-2.1.1.dist-info}/entry_points.txt +0 -0
- {scalable_pypeline-2.0.10.dist-info → scalable_pypeline-2.1.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,188 @@
|
|
1
|
+
import json
|
2
|
+
import typing
|
3
|
+
from copy import copy
|
4
|
+
from uuid import uuid4
|
5
|
+
|
6
|
+
import networkx as nx
|
7
|
+
from dramatiq import get_broker
|
8
|
+
|
9
|
+
from pypeline.barrier import LockingParallelBarrier
|
10
|
+
from pypeline.constants import REDIS_URL, PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
11
|
+
from pypeline.utils.dramatiq_utils import register_lazy_actor
|
12
|
+
from pypeline.utils.module_utils import get_callable
|
13
|
+
from pypeline.utils.pipeline_utils import get_execution_graph
|
14
|
+
|
15
|
+
|
16
|
+
class Pypeline:
|
17
|
+
def __init__(
|
18
|
+
self,
|
19
|
+
pipeline: dict,
|
20
|
+
pipeline_settings: dict = None,
|
21
|
+
task_replacements: dict = {},
|
22
|
+
scenarios: dict = {},
|
23
|
+
broker=None,
|
24
|
+
execution_id=None,
|
25
|
+
):
|
26
|
+
# Construct initial properties
|
27
|
+
self.pipeline = pipeline
|
28
|
+
self.broker = broker or get_broker()
|
29
|
+
self.execution_id = execution_id or str(uuid4())
|
30
|
+
self._starting_messages = []
|
31
|
+
self.scenarios = scenarios
|
32
|
+
self.pipeline_settings = pipeline_settings
|
33
|
+
self.task_replacements = task_replacements
|
34
|
+
|
35
|
+
# Get pipeline dag graph and find first task
|
36
|
+
pipeline_config = pipeline["config"]
|
37
|
+
self.graph = get_execution_graph(pipeline_config)
|
38
|
+
self.number_of_tasks = len(self.graph.nodes)
|
39
|
+
task_definitions = pipeline_config["taskDefinitions"]
|
40
|
+
first_task = list(pipeline_config["dagAdjacency"].keys())[0]
|
41
|
+
|
42
|
+
# Process the scenarios one by one
|
43
|
+
for scenario in self.scenarios:
|
44
|
+
tasks_in_reruns = scenario["taskReruns"]
|
45
|
+
|
46
|
+
# Find any tasks that have replacements for this scenario
|
47
|
+
tasks_in_replacements = list(scenario["taskReplacements"].keys())
|
48
|
+
|
49
|
+
distinct_scenario_tasks = list(set(tasks_in_reruns + tasks_in_replacements))
|
50
|
+
tasks_to_be_rerun_in_scenario = distinct_scenario_tasks
|
51
|
+
|
52
|
+
tasks_to_be_rerun_in_scenario = list(
|
53
|
+
set(
|
54
|
+
task
|
55
|
+
for task in distinct_scenario_tasks
|
56
|
+
for task in nx.descendants(self.graph, task)
|
57
|
+
)
|
58
|
+
| set(tasks_to_be_rerun_in_scenario)
|
59
|
+
)
|
60
|
+
|
61
|
+
self.number_of_tasks = self.number_of_tasks + len(
|
62
|
+
tasks_to_be_rerun_in_scenario
|
63
|
+
)
|
64
|
+
scenario["tasksToRunInScenario"] = tasks_to_be_rerun_in_scenario
|
65
|
+
scenario["execution_id"] = scenario.get("execution_id", None) or str(
|
66
|
+
uuid4()
|
67
|
+
)
|
68
|
+
|
69
|
+
# Check if any of the scenarios need to be kicked off now
|
70
|
+
if first_task in tasks_to_be_rerun_in_scenario:
|
71
|
+
handler = task_definitions[first_task]["handlers"][
|
72
|
+
scenario["taskReplacements"].get(first_task, 0)
|
73
|
+
]
|
74
|
+
lazy_actor = register_lazy_actor(
|
75
|
+
self.broker,
|
76
|
+
get_callable(handler),
|
77
|
+
pipeline_config["metadata"],
|
78
|
+
)
|
79
|
+
message = lazy_actor.message()
|
80
|
+
message.options["pipeline"] = pipeline
|
81
|
+
message.options["task_replacements"] = self.task_replacements
|
82
|
+
message.options["execution_id"] = scenario["execution_id"]
|
83
|
+
message.options["task_name"] = first_task
|
84
|
+
message.options["root_execution_id"] = self.execution_id
|
85
|
+
if self.pipeline_settings:
|
86
|
+
message.kwargs["settings"] = copy(self.pipeline_settings)
|
87
|
+
message.kwargs["settings"]["execution_id"] = scenario[
|
88
|
+
"execution_id"
|
89
|
+
]
|
90
|
+
self._starting_messages.append(message)
|
91
|
+
|
92
|
+
for m in self._starting_messages:
|
93
|
+
m.options["scenarios"] = self.scenarios
|
94
|
+
|
95
|
+
handler = task_definitions[first_task]["handlers"][
|
96
|
+
self.task_replacements.get(first_task, 0)
|
97
|
+
]
|
98
|
+
lazy_actor = register_lazy_actor(
|
99
|
+
self.broker,
|
100
|
+
get_callable(handler),
|
101
|
+
pipeline_config["metadata"],
|
102
|
+
)
|
103
|
+
message = lazy_actor.message()
|
104
|
+
message.options["pipeline"] = pipeline
|
105
|
+
message.options["task_replacements"] = self.task_replacements
|
106
|
+
message.options["execution_id"] = self.execution_id
|
107
|
+
message.options["task_name"] = first_task
|
108
|
+
message.options["scenarios"] = self.scenarios
|
109
|
+
message.options["root_execution_id"] = self.execution_id
|
110
|
+
|
111
|
+
if self.pipeline_settings:
|
112
|
+
message.kwargs["settings"] = copy(self.pipeline_settings)
|
113
|
+
message.kwargs["settings"]["execution_id"] = self.execution_id
|
114
|
+
|
115
|
+
self._starting_messages.append(message)
|
116
|
+
|
117
|
+
def run(self, *, delay=None):
|
118
|
+
for message in self._starting_messages:
|
119
|
+
task_key = (
|
120
|
+
f"{message.options['execution_id']}-{message.options['task_name']}"
|
121
|
+
)
|
122
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
123
|
+
REDIS_URL, task_key=task_key, lock_key=f"{self.execution_id}-lock"
|
124
|
+
)
|
125
|
+
locking_parallel_barrier.set_task_count(1)
|
126
|
+
self.broker.enqueue(message, delay=delay)
|
127
|
+
|
128
|
+
return self
|
129
|
+
|
130
|
+
def __len__(self):
|
131
|
+
return self.number_of_tasks
|
132
|
+
|
133
|
+
def completed(self):
|
134
|
+
redis_task_keys = [
|
135
|
+
f"{self.execution_id}-{node}" for node in list(self.graph.nodes)
|
136
|
+
]
|
137
|
+
redis_lock_key = f"{self.execution_id}-lock"
|
138
|
+
for scenario in self.scenarios:
|
139
|
+
scenario_task_keys = [
|
140
|
+
f"{scenario['execution_id']}-{task}"
|
141
|
+
for task in scenario["tasksToRunInScenario"]
|
142
|
+
]
|
143
|
+
redis_task_keys = redis_task_keys + scenario_task_keys
|
144
|
+
|
145
|
+
for task_key in redis_task_keys:
|
146
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
147
|
+
REDIS_URL, task_key=task_key, lock_key=redis_lock_key
|
148
|
+
)
|
149
|
+
try:
|
150
|
+
locking_parallel_barrier.acquire_lock(
|
151
|
+
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
152
|
+
)
|
153
|
+
task_complete = True
|
154
|
+
if locking_parallel_barrier.task_exists():
|
155
|
+
remaining_tasks = locking_parallel_barrier.get_task_count()
|
156
|
+
if remaining_tasks >= 1:
|
157
|
+
task_complete = False
|
158
|
+
else:
|
159
|
+
task_complete = False
|
160
|
+
finally:
|
161
|
+
locking_parallel_barrier.release_lock()
|
162
|
+
if not task_complete:
|
163
|
+
return task_complete
|
164
|
+
|
165
|
+
return True
|
166
|
+
|
167
|
+
def to_json(self) -> str:
|
168
|
+
return json.dumps(
|
169
|
+
{
|
170
|
+
"pipeline": self.pipeline,
|
171
|
+
"pipeline_settings": self.pipeline_settings,
|
172
|
+
"task_replacements": self.task_replacements,
|
173
|
+
"scenarios": self.scenarios,
|
174
|
+
"execution_id": self.execution_id,
|
175
|
+
}
|
176
|
+
)
|
177
|
+
|
178
|
+
@classmethod
|
179
|
+
def from_json(cls, json_data: str) -> typing.Type["Pypeline"]:
|
180
|
+
data = json.loads(json_data)
|
181
|
+
|
182
|
+
return cls(
|
183
|
+
data["pipeline"],
|
184
|
+
pipeline_settings=data["pipeline_settings"],
|
185
|
+
task_replacements=data["task_replacements"],
|
186
|
+
scenarios=data["scenarios"],
|
187
|
+
execution_id=data["execution_id"],
|
188
|
+
)
|
@@ -0,0 +1,107 @@
|
|
1
|
+
import typing
|
2
|
+
from dramatiq import get_broker, Message
|
3
|
+
from pypeline.pipelines.composition.parallel_pipeline_composition import (
|
4
|
+
parallel_pipeline,
|
5
|
+
)
|
6
|
+
from pypeline.dramatiq import LazyActor
|
7
|
+
from pypeline.utils.dramatiq_utils import register_lazy_actor
|
8
|
+
from pypeline.pipeline_settings_schema import (
|
9
|
+
MissingSettingsException,
|
10
|
+
create_pipeline_settings_schema,
|
11
|
+
PipelineScenarioSchema,
|
12
|
+
)
|
13
|
+
from pypeline.pipelines.composition.pypeline_composition import Pypeline
|
14
|
+
from pypeline.utils.config_utils import retrieve_latest_pipeline_config
|
15
|
+
from pypeline.utils.module_utils import get_callable
|
16
|
+
from pypeline.utils.pipeline_utils import (
|
17
|
+
get_execution_graph,
|
18
|
+
topological_sort_with_parallelism,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
def dag_generator(
|
23
|
+
pipeline_id: str,
|
24
|
+
task_replacements: dict = {},
|
25
|
+
scenarios: typing.List[typing.Dict] = [],
|
26
|
+
*args,
|
27
|
+
**kwargs
|
28
|
+
) -> typing.Union[parallel_pipeline, Pypeline]:
|
29
|
+
"""Generates a pipeline dag from a pre-defined pipeline yaml
|
30
|
+
|
31
|
+
:param pipeline_id: Id of the pipeline to generate
|
32
|
+
:param task_replacements: A dictionary of task names and handler index to run. E.g. {"a": 1} would run the handler
|
33
|
+
in the second index position.
|
34
|
+
:param scenarios:
|
35
|
+
:param args:
|
36
|
+
:param kwargs:
|
37
|
+
:return: Returns a parallel_pipeline object which can be run
|
38
|
+
"""
|
39
|
+
pipeline = retrieve_latest_pipeline_config(pipeline_id=pipeline_id)
|
40
|
+
|
41
|
+
pipeline_config = pipeline["config"]
|
42
|
+
broker = get_broker()
|
43
|
+
broker.actors.clear()
|
44
|
+
|
45
|
+
if pipeline["schemaVersion"] == 2:
|
46
|
+
# If the pipeline_config expects settings ensure we have them
|
47
|
+
if (
|
48
|
+
"settings" in pipeline_config
|
49
|
+
and len(pipeline_config["settings"]["required"]) > 0
|
50
|
+
and "settings" not in kwargs
|
51
|
+
):
|
52
|
+
raise MissingSettingsException()
|
53
|
+
|
54
|
+
# If we're here we expect to have settings. Pop them out of kwargs to validate
|
55
|
+
inputted_settings = kwargs.pop("settings", {})
|
56
|
+
if "settings" in pipeline_config:
|
57
|
+
supplied_pipeline_settings_schema = create_pipeline_settings_schema(
|
58
|
+
pipeline_config["settings"]
|
59
|
+
)
|
60
|
+
|
61
|
+
# Validate scenarios settings to make sure they look okay
|
62
|
+
validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
|
63
|
+
|
64
|
+
for scenario in validated_scenarios:
|
65
|
+
supplied_pipeline_settings_schema.load(scenario["settings"])
|
66
|
+
|
67
|
+
validated_settings = supplied_pipeline_settings_schema.load(
|
68
|
+
inputted_settings
|
69
|
+
)
|
70
|
+
p = Pypeline(
|
71
|
+
pipeline,
|
72
|
+
pipeline_settings=validated_settings,
|
73
|
+
task_replacements=task_replacements,
|
74
|
+
scenarios=scenarios,
|
75
|
+
broker=broker,
|
76
|
+
)
|
77
|
+
else:
|
78
|
+
p = Pypeline(pipeline, task_replacements=task_replacements, broker=broker)
|
79
|
+
return p
|
80
|
+
graph = get_execution_graph(pipeline_config)
|
81
|
+
optimal_execution_graph = topological_sort_with_parallelism(graph.copy())
|
82
|
+
registered_actors: typing.Dict[str, LazyActor] = {}
|
83
|
+
|
84
|
+
messages: typing.List[typing.List[Message]] = []
|
85
|
+
|
86
|
+
task_definitions = pipeline_config["taskDefinitions"]
|
87
|
+
for task_group in optimal_execution_graph:
|
88
|
+
message_group = []
|
89
|
+
for task in task_group:
|
90
|
+
module_path = task_definitions[task]["handler"]
|
91
|
+
tmp_handler = get_callable(module_path)
|
92
|
+
lazy_actor = register_lazy_actor(
|
93
|
+
broker, tmp_handler, pipeline_config["metadata"]
|
94
|
+
)
|
95
|
+
registered_actors[task] = lazy_actor
|
96
|
+
if args and not kwargs:
|
97
|
+
message_group.append(registered_actors[task].message(*args))
|
98
|
+
elif kwargs and not args:
|
99
|
+
message_group.append(registered_actors[task].message(**kwargs))
|
100
|
+
elif args and kwargs:
|
101
|
+
message_group.append(registered_actors[task].message(*args, **kwargs))
|
102
|
+
else:
|
103
|
+
message_group.append(registered_actors[task].message())
|
104
|
+
messages.append(message_group)
|
105
|
+
p = parallel_pipeline(messages)
|
106
|
+
|
107
|
+
return p
|
File without changes
|
@@ -0,0 +1,188 @@
|
|
1
|
+
from copy import copy
|
2
|
+
|
3
|
+
import networkx as nx
|
4
|
+
from dramatiq import Middleware
|
5
|
+
|
6
|
+
from pypeline.barrier import LockingParallelBarrier
|
7
|
+
from pypeline.constants import PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
8
|
+
from pypeline.utils.module_utils import get_callable
|
9
|
+
from pypeline.utils.pipeline_utils import get_execution_graph
|
10
|
+
from pypeline.utils.dramatiq_utils import register_lazy_actor
|
11
|
+
|
12
|
+
|
13
|
+
class PypelineMiddleware(Middleware):
|
14
|
+
|
15
|
+
def __init__(self, redis_url):
|
16
|
+
self.redis_url = redis_url
|
17
|
+
|
18
|
+
def after_process_message(self, broker, message, *, result=None, exception=None):
|
19
|
+
|
20
|
+
if exception is not None:
|
21
|
+
return
|
22
|
+
|
23
|
+
if "pipeline" not in message.options:
|
24
|
+
return
|
25
|
+
|
26
|
+
pipeline = message.options["pipeline"]
|
27
|
+
pipeline_config = pipeline["config"]
|
28
|
+
task_replacements = message.options["task_replacements"]
|
29
|
+
execution_id = message.options["execution_id"]
|
30
|
+
task_definitions = pipeline_config["taskDefinitions"]
|
31
|
+
task_name = message.options["task_name"]
|
32
|
+
task_key = f"{execution_id}-{task_name}"
|
33
|
+
|
34
|
+
# Signal to other jobs that current job is finished
|
35
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
36
|
+
self.redis_url,
|
37
|
+
task_key=task_key,
|
38
|
+
lock_key=f"{message.options['root_execution_id']}-lock",
|
39
|
+
)
|
40
|
+
try:
|
41
|
+
locking_parallel_barrier.acquire_lock(
|
42
|
+
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
43
|
+
)
|
44
|
+
_ = locking_parallel_barrier.decrement_task_count()
|
45
|
+
finally:
|
46
|
+
locking_parallel_barrier.release_lock()
|
47
|
+
|
48
|
+
graph = get_execution_graph(pipeline_config)
|
49
|
+
children_tasks = pipeline_config["dagAdjacency"].get(task_name, [])
|
50
|
+
|
51
|
+
messages = []
|
52
|
+
for child in children_tasks:
|
53
|
+
child_ancestors = sorted(nx.ancestors(graph, child))
|
54
|
+
|
55
|
+
ancestor_tasks_complete = True
|
56
|
+
|
57
|
+
for ancestor in child_ancestors:
|
58
|
+
task_key = f"{execution_id}-{ancestor}"
|
59
|
+
|
60
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
61
|
+
self.redis_url,
|
62
|
+
task_key=task_key,
|
63
|
+
lock_key=f"{message.options['root_execution_id']}-lock",
|
64
|
+
)
|
65
|
+
try:
|
66
|
+
locking_parallel_barrier.acquire_lock(
|
67
|
+
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
68
|
+
)
|
69
|
+
|
70
|
+
if locking_parallel_barrier.task_exists():
|
71
|
+
remaining_tasks = locking_parallel_barrier.get_task_count()
|
72
|
+
else:
|
73
|
+
remaining_tasks = None
|
74
|
+
finally:
|
75
|
+
locking_parallel_barrier.release_lock()
|
76
|
+
|
77
|
+
if not remaining_tasks:
|
78
|
+
task_key = f"{message.options['root_execution_id']}-{ancestor}"
|
79
|
+
|
80
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
81
|
+
self.redis_url,
|
82
|
+
task_key=task_key,
|
83
|
+
lock_key=f"{message.options['root_execution_id']}-lock",
|
84
|
+
)
|
85
|
+
try:
|
86
|
+
locking_parallel_barrier.acquire_lock(
|
87
|
+
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
88
|
+
)
|
89
|
+
|
90
|
+
if locking_parallel_barrier.task_exists():
|
91
|
+
remaining_tasks = locking_parallel_barrier.get_task_count()
|
92
|
+
else:
|
93
|
+
raise Exception(
|
94
|
+
f"Parent task {ancestor} barrier lock not found"
|
95
|
+
)
|
96
|
+
finally:
|
97
|
+
locking_parallel_barrier.release_lock()
|
98
|
+
if remaining_tasks >= 1:
|
99
|
+
ancestor_tasks_complete = False
|
100
|
+
break
|
101
|
+
|
102
|
+
# If the child's ancestor tasks aren't complete move onto the next child to check
|
103
|
+
if not ancestor_tasks_complete:
|
104
|
+
break
|
105
|
+
|
106
|
+
if message.options["root_execution_id"] == message.options["execution_id"]:
|
107
|
+
for scenario in message.options["scenarios"]:
|
108
|
+
child_predecessors = list(graph.predecessors(child))
|
109
|
+
if (
|
110
|
+
child in scenario["tasksToRunInScenario"]
|
111
|
+
and task_name in child_predecessors
|
112
|
+
and task_name not in scenario["tasksToRunInScenario"]
|
113
|
+
):
|
114
|
+
task_key = f"{scenario['execution_id']}-{child}"
|
115
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
116
|
+
self.redis_url,
|
117
|
+
task_key=task_key,
|
118
|
+
lock_key=f"{message.options['root_execution_id']}-lock",
|
119
|
+
)
|
120
|
+
locking_parallel_barrier.set_task_count(1)
|
121
|
+
handler = task_definitions[child]["handlers"][
|
122
|
+
task_replacements.get(child, 0)
|
123
|
+
]
|
124
|
+
|
125
|
+
lazy_actor = register_lazy_actor(
|
126
|
+
broker,
|
127
|
+
get_callable(handler),
|
128
|
+
pipeline_config["metadata"],
|
129
|
+
)
|
130
|
+
scenario_message = lazy_actor.message()
|
131
|
+
scenario_message.options["pipeline"] = pipeline
|
132
|
+
scenario_message.options["task_replacements"] = (
|
133
|
+
task_replacements
|
134
|
+
)
|
135
|
+
scenario_message.options["execution_id"] = scenario[
|
136
|
+
"execution_id"
|
137
|
+
]
|
138
|
+
|
139
|
+
scenario_message.options["task_name"] = child
|
140
|
+
scenario_message.options["root_execution_id"] = message.options[
|
141
|
+
"root_execution_id"
|
142
|
+
]
|
143
|
+
scenario_message.options["scenarios"] = message.options[
|
144
|
+
"scenarios"
|
145
|
+
]
|
146
|
+
if "settings" in message.kwargs:
|
147
|
+
scenario_message.kwargs["settings"] = copy(
|
148
|
+
message.kwargs["settings"]
|
149
|
+
)
|
150
|
+
scenario_message.kwargs["settings"]["execution_id"] = (
|
151
|
+
scenario["execution_id"]
|
152
|
+
)
|
153
|
+
messages.append(scenario_message)
|
154
|
+
task_key = f"{execution_id}-{child}"
|
155
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
156
|
+
self.redis_url,
|
157
|
+
task_key=task_key,
|
158
|
+
lock_key=f"{message.options['root_execution_id']}-lock",
|
159
|
+
)
|
160
|
+
locking_parallel_barrier.set_task_count(1)
|
161
|
+
handler = task_definitions[child]["handlers"][
|
162
|
+
task_replacements.get(child, 0)
|
163
|
+
]
|
164
|
+
lazy_actor = register_lazy_actor(
|
165
|
+
broker,
|
166
|
+
get_callable(handler),
|
167
|
+
pipeline_config["metadata"],
|
168
|
+
)
|
169
|
+
|
170
|
+
child_message = lazy_actor.message()
|
171
|
+
child_message.options["pipeline"] = pipeline
|
172
|
+
child_message.options["task_replacements"] = task_replacements
|
173
|
+
child_message.options["execution_id"] = execution_id
|
174
|
+
child_message.options["task_name"] = child
|
175
|
+
child_message.options["root_execution_id"] = message.options[
|
176
|
+
"root_execution_id"
|
177
|
+
]
|
178
|
+
child_message.options["scenarios"] = message.options["scenarios"]
|
179
|
+
if "settings" in message.kwargs:
|
180
|
+
child_message.kwargs["settings"] = message.kwargs["settings"]
|
181
|
+
child_message.kwargs["settings"]["execution_id"] = message.options[
|
182
|
+
"execution_id"
|
183
|
+
]
|
184
|
+
|
185
|
+
messages.append(child_message)
|
186
|
+
|
187
|
+
for new_message in messages:
|
188
|
+
broker.enqueue(new_message)
|
@@ -0,0 +1,126 @@
|
|
1
|
+
import os.path
|
2
|
+
import sys
|
3
|
+
import typing
|
4
|
+
from typing import Optional, Callable, Union, Awaitable
|
5
|
+
from functools import wraps
|
6
|
+
from typing import TYPE_CHECKING, TypeVar
|
7
|
+
from dramatiq import Broker, actor as register_actor
|
8
|
+
|
9
|
+
from pypeline.constants import (
|
10
|
+
DEFAULT_TASK_MAX_RETRY,
|
11
|
+
DEFAULT_TASK_MIN_BACKOFF,
|
12
|
+
MS_IN_SECONDS,
|
13
|
+
DEFAULT_TASK_MAX_BACKOFF,
|
14
|
+
DEFAULT_TASK_TTL,
|
15
|
+
DEFAULT_RESULT_TTL,
|
16
|
+
)
|
17
|
+
|
18
|
+
if TYPE_CHECKING:
|
19
|
+
from typing_extensions import ParamSpec
|
20
|
+
|
21
|
+
P = ParamSpec("P")
|
22
|
+
else:
|
23
|
+
P = TypeVar("P")
|
24
|
+
|
25
|
+
R = TypeVar("R")
|
26
|
+
|
27
|
+
|
28
|
+
def guess_code_directory(broker):
|
29
|
+
actor = next(iter(broker.actors.values()))
|
30
|
+
modname, *_ = actor.fn.__module__.partition(".")
|
31
|
+
mod = sys.modules[modname]
|
32
|
+
return os.path.dirname(mod.__file__)
|
33
|
+
|
34
|
+
|
35
|
+
def list_managed_actors(broker, queues):
|
36
|
+
queues = set(queues)
|
37
|
+
all_actors = broker.actors.values()
|
38
|
+
if not queues:
|
39
|
+
return all_actors
|
40
|
+
else:
|
41
|
+
return [a for a in all_actors if a.queue_name in queues]
|
42
|
+
|
43
|
+
|
44
|
+
def register_lazy_actor(
|
45
|
+
broker: Broker,
|
46
|
+
fn: Optional[Callable[P, Union[Awaitable[R], R]]] = None,
|
47
|
+
pipeline_meta: typing.Dict = {},
|
48
|
+
**kwargs,
|
49
|
+
) -> typing.Type["LazyActor"]:
|
50
|
+
kwargs["queue_name"] = pipeline_meta.get("queue", "default")
|
51
|
+
kwargs["max_retries"] = pipeline_meta.get("maxRetry", DEFAULT_TASK_MAX_RETRY)
|
52
|
+
# Convert from seconds to milliseconds
|
53
|
+
kwargs["min_backoff"] = (
|
54
|
+
pipeline_meta.get("retryBackoff", DEFAULT_TASK_MIN_BACKOFF) * MS_IN_SECONDS
|
55
|
+
)
|
56
|
+
kwargs["max_backoff"] = (
|
57
|
+
pipeline_meta.get("retryBackoffMax", DEFAULT_TASK_MAX_BACKOFF) * MS_IN_SECONDS
|
58
|
+
)
|
59
|
+
kwargs["time_limit"] = pipeline_meta.get("maxTtl", DEFAULT_TASK_TTL) * MS_IN_SECONDS
|
60
|
+
# Always store results for registered pipeline actors
|
61
|
+
kwargs["store_results"] = pipeline_meta.get("store_results", True)
|
62
|
+
if kwargs["store_results"]:
|
63
|
+
kwargs["result_ttl"] = (
|
64
|
+
pipeline_meta.get("result_ttl", DEFAULT_RESULT_TTL) * MS_IN_SECONDS
|
65
|
+
)
|
66
|
+
lazy_actor: LazyActor = LazyActor(fn, kwargs)
|
67
|
+
lazy_actor.register(broker)
|
68
|
+
return lazy_actor
|
69
|
+
|
70
|
+
|
71
|
+
def ensure_return_value(default_value=None):
|
72
|
+
def decorator(func):
|
73
|
+
@wraps(func)
|
74
|
+
def wrapper(*args, **kwargs):
|
75
|
+
# Call the original function
|
76
|
+
result = func(*args, **kwargs)
|
77
|
+
# Check if the function has returned a value
|
78
|
+
if result is None:
|
79
|
+
# Return the default value if the function returned None
|
80
|
+
return default_value
|
81
|
+
return result
|
82
|
+
|
83
|
+
return wrapper
|
84
|
+
|
85
|
+
return decorator
|
86
|
+
|
87
|
+
|
88
|
+
class LazyActor(object):
|
89
|
+
# Intermediate object that register actor on broker an call.
|
90
|
+
|
91
|
+
def __init__(self, fn, kw):
|
92
|
+
self.fn = fn
|
93
|
+
self.kw = kw
|
94
|
+
self.actor = None
|
95
|
+
|
96
|
+
def __call__(self, *a, **kw):
|
97
|
+
return self.fn(*a, **kw)
|
98
|
+
|
99
|
+
def __repr__(self):
|
100
|
+
return "<%s %s.%s>" % (
|
101
|
+
self.__class__.__name__,
|
102
|
+
self.fn.__module__,
|
103
|
+
self.fn.__name__,
|
104
|
+
)
|
105
|
+
|
106
|
+
def __getattr__(self, name):
|
107
|
+
if not self.actor:
|
108
|
+
raise AttributeError(name)
|
109
|
+
return getattr(self.actor, name)
|
110
|
+
|
111
|
+
def register(self, broker):
|
112
|
+
self.actor = register_actor(
|
113
|
+
actor_name=f"{self.fn.__module__}.{self.fn.__name__}",
|
114
|
+
broker=broker,
|
115
|
+
**self.kw,
|
116
|
+
)(ensure_return_value(default_value=True)(self.fn))
|
117
|
+
|
118
|
+
# Next is regular actor API.
|
119
|
+
def send(self, *a, **kw):
|
120
|
+
return self.actor.send(*a, **kw)
|
121
|
+
|
122
|
+
def message(self, *a, **kw):
|
123
|
+
return self.actor.message(*a, **kw)
|
124
|
+
|
125
|
+
def send_with_options(self, *a, **kw):
|
126
|
+
return self.actor.send_with_options(*a, **kw)
|
pypeline/utils/module_utils.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
""" Utilities for loading modules/callables based on strings.
|
2
2
|
"""
|
3
|
-
|
3
|
+
|
4
4
|
import re
|
5
5
|
import logging
|
6
6
|
import importlib
|
7
7
|
from typing import Callable
|
8
|
-
from pypeline.constants import API_ACCESS_KEY, PYPELINE_CLIENT_PKG_NAME
|
9
8
|
|
10
9
|
logger = logging.getLogger(__name__)
|
11
10
|
|
@@ -81,3 +80,29 @@ def match_prefix_suffix(string: str, prefix_p: str, suffix_p: str) -> bool:
|
|
81
80
|
if match_prefix(string, prefix_p) and match_suffix(string, suffix_p):
|
82
81
|
return True
|
83
82
|
return False
|
83
|
+
|
84
|
+
|
85
|
+
def get_module(resource_dot_path: str):
|
86
|
+
"""Retrieve the module based on a 'resource dot path'.
|
87
|
+
e.g. package.subdir.feature_file.MyCallable
|
88
|
+
"""
|
89
|
+
module_path = ".".join(resource_dot_path.split(".")[:-1])
|
90
|
+
module = importlib.import_module(module_path)
|
91
|
+
return module
|
92
|
+
|
93
|
+
|
94
|
+
def get_callable_name(resource_dot_path: str) -> str:
|
95
|
+
"""Retrieve the callable based on config string.
|
96
|
+
e.g. package.subdir.feature_file.MyCallable
|
97
|
+
"""
|
98
|
+
callable_name = resource_dot_path.split(".")[-1]
|
99
|
+
return callable_name
|
100
|
+
|
101
|
+
|
102
|
+
def get_callable(resource_dot_path: str) -> Callable:
|
103
|
+
"""Retrieve the actual handler class based on config string.
|
104
|
+
e.g. package.subdir.feature_file.MyCallable
|
105
|
+
"""
|
106
|
+
module = get_module(resource_dot_path)
|
107
|
+
callable_name = get_callable_name(resource_dot_path)
|
108
|
+
return getattr(module, callable_name)
|