scalable-pypeline 2.1.3__py2.py3-none-any.whl → 2.1.5__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypeline/__init__.py +1 -1
- pypeline/constants.py +1 -0
- pypeline/dramatiq.py +28 -23
- pypeline/flask/api/pipelines.py +1 -25
- pypeline/pipelines/composition/pypeline_composition.py +67 -49
- pypeline/pipelines/factory.py +10 -35
- pypeline/pipelines/middleware/pypeline_middleware.py +15 -12
- {scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/METADATA +1 -1
- {scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/RECORD +13 -14
- pypeline/pipelines/middleware/deduplication_middleware.py +0 -94
- {scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/LICENSE +0 -0
- {scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/WHEEL +0 -0
- {scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/entry_points.txt +0 -0
- {scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/top_level.txt +0 -0
pypeline/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.1.
|
1
|
+
__version__ = "2.1.5"
|
pypeline/constants.py
CHANGED
@@ -36,6 +36,7 @@ DEFAULT_BROKER_CONNECTION_ATTEMPTS = int(
|
|
36
36
|
DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT = int(
|
37
37
|
os.getenv("DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT", 30)
|
38
38
|
)
|
39
|
+
MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
|
39
40
|
|
40
41
|
MS_IN_SECONDS = 1000
|
41
42
|
API_PATH_V1 = "/api/v1"
|
pypeline/dramatiq.py
CHANGED
@@ -5,6 +5,8 @@ import logging
|
|
5
5
|
import click
|
6
6
|
from urllib.parse import urlparse
|
7
7
|
|
8
|
+
from dramatiq.brokers.redis import RedisBroker
|
9
|
+
|
8
10
|
from pypeline.extensions import pypeline_config
|
9
11
|
from warnings import warn
|
10
12
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
@@ -31,12 +33,10 @@ from pypeline.constants import (
|
|
31
33
|
DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
32
34
|
DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
33
35
|
DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
36
|
+
MESSAGE_BROKER,
|
34
37
|
)
|
35
38
|
from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
|
36
39
|
from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
|
37
|
-
from pypeline.pipelines.middleware.deduplication_middleware import (
|
38
|
-
DeduplicationMiddleware,
|
39
|
-
)
|
40
40
|
from pypeline.utils.config_utils import (
|
41
41
|
retrieve_latest_schedule_config,
|
42
42
|
get_service_config_for_worker,
|
@@ -56,27 +56,32 @@ logger = logging.getLogger(__name__)
|
|
56
56
|
|
57
57
|
def configure_default_broker(broker: Broker = None):
|
58
58
|
redis_backend = RedisBackend(url=REDIS_URL)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
59
|
+
|
60
|
+
if MESSAGE_BROKER == "RABBITMQ":
|
61
|
+
parsed_url = urlparse(RABBIT_URL)
|
62
|
+
credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
|
63
|
+
broker = (
|
64
|
+
broker
|
65
|
+
if broker is not None
|
66
|
+
else RabbitmqBroker(
|
67
|
+
host=parsed_url.hostname,
|
68
|
+
port=parsed_url.port,
|
69
|
+
credentials=credentials,
|
70
|
+
heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
71
|
+
connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
72
|
+
blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
73
|
+
)
|
71
74
|
)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
75
|
+
|
76
|
+
elif MESSAGE_BROKER == "REDIS":
|
77
|
+
broker = broker if broker is not None else RedisBroker(url=REDIS_URL)
|
78
|
+
|
79
|
+
broker.add_middleware(Results(backend=redis_backend))
|
80
|
+
broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
|
81
|
+
broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
|
82
|
+
broker.add_middleware(CurrentMessage())
|
83
|
+
register_actors_for_workers(broker)
|
84
|
+
set_broker(broker)
|
80
85
|
|
81
86
|
|
82
87
|
def register_actors_for_workers(broker: Broker):
|
pypeline/flask/api/pipelines.py
CHANGED
@@ -56,26 +56,6 @@ class InvokePipelineSchema(Schema):
|
|
56
56
|
example={"document_id": "123", "send_alert": True},
|
57
57
|
required=False,
|
58
58
|
)
|
59
|
-
settings = fields.Raw(
|
60
|
-
description="Payload contains settings for a given pipeline",
|
61
|
-
example={
|
62
|
-
"param1": "Dataset",
|
63
|
-
"param2": 1,
|
64
|
-
"param3": 2,
|
65
|
-
},
|
66
|
-
required=False,
|
67
|
-
)
|
68
|
-
|
69
|
-
task_replacements = fields.Raw(
|
70
|
-
description="A dictionary of task definitions as the key and the value of the index for which handler"
|
71
|
-
" should be executed. If none provided it will default to the first handler in the list at index position 0.",
|
72
|
-
example={
|
73
|
-
"a": 1,
|
74
|
-
"b": 3,
|
75
|
-
},
|
76
|
-
required=False,
|
77
|
-
)
|
78
|
-
|
79
59
|
scenarios = fields.List(
|
80
60
|
fields.Nested(PipelineScenarioSchema),
|
81
61
|
metadata={"description": "List of scenarios to run for a given pipeline"},
|
@@ -198,20 +178,16 @@ class PipelineInvoke(MethodView):
|
|
198
178
|
retval = {"pipeline_id": pipeline_id, "status": "starting"}
|
199
179
|
try:
|
200
180
|
chain_payload = payload.get("chain_payload", {})
|
201
|
-
settings = payload.get("settings", None)
|
202
|
-
task_replacements = payload.get("task_replacements", {})
|
203
181
|
scenarios = payload.get("scenarios", [])
|
204
182
|
if pipeline_config["schemaVersion"] == 1:
|
205
183
|
pipeline = dag_generator(
|
206
184
|
pipeline_id=pipeline_id,
|
207
185
|
event=chain_payload,
|
208
186
|
)
|
209
|
-
elif pipeline_config["schemaVersion"] == 2
|
187
|
+
elif pipeline_config["schemaVersion"] == 2:
|
210
188
|
pipeline = dag_generator(
|
211
189
|
pipeline_id=pipeline_id,
|
212
|
-
task_replacements=task_replacements,
|
213
190
|
scenarios=scenarios,
|
214
|
-
settings=settings,
|
215
191
|
)
|
216
192
|
retval["scenarios"] = pipeline.scenarios
|
217
193
|
pipeline.run()
|
@@ -17,8 +17,6 @@ class Pypeline:
|
|
17
17
|
def __init__(
|
18
18
|
self,
|
19
19
|
pipeline: dict,
|
20
|
-
pipeline_settings: dict = None,
|
21
|
-
task_replacements: dict = {},
|
22
20
|
scenarios: dict = {},
|
23
21
|
broker=None,
|
24
22
|
execution_id=None,
|
@@ -26,11 +24,8 @@ class Pypeline:
|
|
26
24
|
# Construct initial properties
|
27
25
|
self.pipeline = pipeline
|
28
26
|
self.broker = broker or get_broker()
|
29
|
-
self.execution_id = execution_id or str(uuid4())
|
30
27
|
self._starting_messages = []
|
31
28
|
self.scenarios = scenarios
|
32
|
-
self.pipeline_settings = pipeline_settings
|
33
|
-
self.task_replacements = task_replacements
|
34
29
|
|
35
30
|
# Get pipeline dag graph and find first task
|
36
31
|
pipeline_config = pipeline["config"]
|
@@ -39,8 +34,19 @@ class Pypeline:
|
|
39
34
|
task_definitions = pipeline_config["taskDefinitions"]
|
40
35
|
first_task = list(pipeline_config["dagAdjacency"].keys())[0]
|
41
36
|
|
37
|
+
base_case_execution_id = None
|
38
|
+
|
42
39
|
# Process the scenarios one by one
|
43
40
|
for scenario in self.scenarios:
|
41
|
+
# The first scenario is the base case and always runs
|
42
|
+
if self.scenarios.index(scenario) == 0:
|
43
|
+
base_case_execution_id = scenario.get("execution_id", None) or str(
|
44
|
+
uuid4()
|
45
|
+
)
|
46
|
+
scenario["execution_id"] = base_case_execution_id
|
47
|
+
scenario["base_case_execution_id"] = base_case_execution_id
|
48
|
+
scenario["tasksToRunInScenario"] = list(self.graph.nodes)
|
49
|
+
continue
|
44
50
|
tasks_in_reruns = scenario["taskReruns"]
|
45
51
|
|
46
52
|
# Find any tasks that have replacements for this scenario
|
@@ -62,6 +68,7 @@ class Pypeline:
|
|
62
68
|
tasks_to_be_rerun_in_scenario
|
63
69
|
)
|
64
70
|
scenario["tasksToRunInScenario"] = tasks_to_be_rerun_in_scenario
|
71
|
+
scenario["base_case_execution_id"] = base_case_execution_id
|
65
72
|
scenario["execution_id"] = scenario.get("execution_id", None) or str(
|
66
73
|
uuid4()
|
67
74
|
)
|
@@ -78,22 +85,31 @@ class Pypeline:
|
|
78
85
|
)
|
79
86
|
message = lazy_actor.message()
|
80
87
|
message.options["pipeline"] = pipeline
|
81
|
-
message.options["task_replacements"] =
|
88
|
+
message.options["task_replacements"] = copy(
|
89
|
+
scenario["taskReplacements"]
|
90
|
+
)
|
82
91
|
message.options["execution_id"] = scenario["execution_id"]
|
83
92
|
message.options["task_name"] = first_task
|
84
|
-
message.options["
|
85
|
-
if
|
86
|
-
message.kwargs["settings"] = copy(
|
93
|
+
message.options["base_case_execution_id"] = base_case_execution_id
|
94
|
+
if scenario["settings"]:
|
95
|
+
message.kwargs["settings"] = copy(scenario["settings"])
|
87
96
|
message.kwargs["settings"]["execution_id"] = scenario[
|
88
97
|
"execution_id"
|
89
98
|
]
|
99
|
+
message.kwargs["settings"][
|
100
|
+
"base_case_execution_id"
|
101
|
+
] = base_case_execution_id
|
90
102
|
self._starting_messages.append(message)
|
91
103
|
|
92
104
|
for m in self._starting_messages:
|
93
105
|
m.options["scenarios"] = self.scenarios
|
94
106
|
|
107
|
+
# Run the first task of the first scenario no matter what
|
108
|
+
first_scenario_task_replacements = scenarios[0]["taskReplacements"]
|
109
|
+
first_scenario_settings = scenarios[0].get("settings", None)
|
110
|
+
|
95
111
|
handler = task_definitions[first_task]["handlers"][
|
96
|
-
|
112
|
+
first_scenario_task_replacements.get(first_task, 0)
|
97
113
|
]
|
98
114
|
lazy_actor = register_lazy_actor(
|
99
115
|
self.broker,
|
@@ -102,15 +118,18 @@ class Pypeline:
|
|
102
118
|
)
|
103
119
|
message = lazy_actor.message()
|
104
120
|
message.options["pipeline"] = pipeline
|
105
|
-
message.options["task_replacements"] =
|
106
|
-
message.options["execution_id"] =
|
121
|
+
message.options["task_replacements"] = first_scenario_task_replacements
|
122
|
+
message.options["execution_id"] = base_case_execution_id
|
107
123
|
message.options["task_name"] = first_task
|
108
124
|
message.options["scenarios"] = self.scenarios
|
109
|
-
message.options["
|
125
|
+
message.options["base_case_execution_id"] = base_case_execution_id
|
110
126
|
|
111
|
-
if
|
112
|
-
message.kwargs["settings"] = copy(
|
113
|
-
message.kwargs["settings"]["execution_id"] =
|
127
|
+
if first_scenario_settings:
|
128
|
+
message.kwargs["settings"] = copy(first_scenario_settings)
|
129
|
+
message.kwargs["settings"]["execution_id"] = base_case_execution_id
|
130
|
+
message.kwargs["settings"][
|
131
|
+
"base_case_execution_id"
|
132
|
+
] = base_case_execution_id
|
114
133
|
|
115
134
|
self._starting_messages.append(message)
|
116
135
|
|
@@ -120,7 +139,9 @@ class Pypeline:
|
|
120
139
|
f"{message.options['execution_id']}-{message.options['task_name']}"
|
121
140
|
)
|
122
141
|
locking_parallel_barrier = LockingParallelBarrier(
|
123
|
-
REDIS_URL,
|
142
|
+
REDIS_URL,
|
143
|
+
task_key=task_key,
|
144
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
124
145
|
)
|
125
146
|
locking_parallel_barrier.set_task_count(1)
|
126
147
|
self.broker.enqueue(message, delay=delay)
|
@@ -131,36 +152,39 @@ class Pypeline:
|
|
131
152
|
return self.number_of_tasks
|
132
153
|
|
133
154
|
def completed(self):
|
134
|
-
|
135
|
-
f"{self.execution_id}-{node}" for node in list(self.graph.nodes)
|
136
|
-
]
|
137
|
-
redis_lock_key = f"{self.execution_id}-lock"
|
138
|
-
for scenario in self.scenarios:
|
139
|
-
scenario_task_keys = [
|
140
|
-
f"{scenario['execution_id']}-{task}"
|
141
|
-
for task in scenario["tasksToRunInScenario"]
|
142
|
-
]
|
143
|
-
redis_task_keys = redis_task_keys + scenario_task_keys
|
155
|
+
locks = []
|
144
156
|
|
145
|
-
for
|
146
|
-
|
147
|
-
|
157
|
+
for scenario in self.scenarios:
|
158
|
+
locks.append(
|
159
|
+
{
|
160
|
+
"scenario_task_keys": [
|
161
|
+
f"{scenario['execution_id']}-{task}"
|
162
|
+
for task in scenario["tasksToRunInScenario"]
|
163
|
+
],
|
164
|
+
"redis_lock_key": f"{scenario['base_case_execution_id']}-lock",
|
165
|
+
}
|
148
166
|
)
|
149
|
-
|
150
|
-
|
151
|
-
|
167
|
+
|
168
|
+
for lock in locks:
|
169
|
+
for task_key in lock["scenario_task_keys"]:
|
170
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
171
|
+
REDIS_URL, task_key=task_key, lock_key=lock["redis_lock_key"]
|
152
172
|
)
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
173
|
+
try:
|
174
|
+
locking_parallel_barrier.acquire_lock(
|
175
|
+
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
176
|
+
)
|
177
|
+
task_complete = True
|
178
|
+
if locking_parallel_barrier.task_exists():
|
179
|
+
remaining_tasks = locking_parallel_barrier.get_task_count()
|
180
|
+
if remaining_tasks >= 1:
|
181
|
+
task_complete = False
|
182
|
+
else:
|
157
183
|
task_complete = False
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
if not task_complete:
|
163
|
-
return task_complete
|
184
|
+
finally:
|
185
|
+
locking_parallel_barrier.release_lock()
|
186
|
+
if not task_complete:
|
187
|
+
return task_complete
|
164
188
|
|
165
189
|
return True
|
166
190
|
|
@@ -168,10 +192,7 @@ class Pypeline:
|
|
168
192
|
return json.dumps(
|
169
193
|
{
|
170
194
|
"pipeline": self.pipeline,
|
171
|
-
"pipeline_settings": self.pipeline_settings,
|
172
|
-
"task_replacements": self.task_replacements,
|
173
195
|
"scenarios": self.scenarios,
|
174
|
-
"execution_id": self.execution_id,
|
175
196
|
}
|
176
197
|
)
|
177
198
|
|
@@ -181,8 +202,5 @@ class Pypeline:
|
|
181
202
|
|
182
203
|
return cls(
|
183
204
|
data["pipeline"],
|
184
|
-
pipeline_settings=data["pipeline_settings"],
|
185
|
-
task_replacements=data["task_replacements"],
|
186
205
|
scenarios=data["scenarios"],
|
187
|
-
execution_id=data["execution_id"],
|
188
206
|
)
|
pypeline/pipelines/factory.py
CHANGED
@@ -20,11 +20,7 @@ from pypeline.utils.pipeline_utils import (
|
|
20
20
|
|
21
21
|
|
22
22
|
def dag_generator(
|
23
|
-
pipeline_id: str,
|
24
|
-
task_replacements: dict = {},
|
25
|
-
scenarios: typing.List[typing.Dict] = [],
|
26
|
-
*args,
|
27
|
-
**kwargs
|
23
|
+
pipeline_id: str, scenarios: typing.List[typing.Dict] = [], *args, **kwargs
|
28
24
|
) -> typing.Union[parallel_pipeline, Pypeline]:
|
29
25
|
"""Generates a pipeline dag from a pre-defined pipeline yaml
|
30
26
|
|
@@ -43,40 +39,19 @@ def dag_generator(
|
|
43
39
|
broker.actors.clear()
|
44
40
|
|
45
41
|
if pipeline["schemaVersion"] == 2:
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
and len(pipeline_config["settings"]["required"]) > 0
|
50
|
-
and "settings" not in kwargs
|
51
|
-
):
|
52
|
-
raise MissingSettingsException()
|
42
|
+
supplied_pipeline_settings_schema = create_pipeline_settings_schema(
|
43
|
+
pipeline_config["settings"]
|
44
|
+
)
|
53
45
|
|
54
|
-
#
|
55
|
-
|
56
|
-
if "settings" in pipeline_config:
|
57
|
-
supplied_pipeline_settings_schema = create_pipeline_settings_schema(
|
58
|
-
pipeline_config["settings"]
|
59
|
-
)
|
60
|
-
|
61
|
-
# Validate scenarios settings to make sure they look okay
|
62
|
-
validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
|
46
|
+
# Validate scenarios settings to make sure they look okay
|
47
|
+
validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
|
63
48
|
|
64
|
-
|
65
|
-
|
49
|
+
for scenario in validated_scenarios:
|
50
|
+
supplied_pipeline_settings_schema.load(scenario["settings"])
|
66
51
|
|
67
|
-
|
68
|
-
inputted_settings
|
69
|
-
)
|
70
|
-
p = Pypeline(
|
71
|
-
pipeline,
|
72
|
-
pipeline_settings=validated_settings,
|
73
|
-
task_replacements=task_replacements,
|
74
|
-
scenarios=scenarios,
|
75
|
-
broker=broker,
|
76
|
-
)
|
77
|
-
else:
|
78
|
-
p = Pypeline(pipeline, task_replacements=task_replacements, broker=broker)
|
52
|
+
p = Pypeline(pipeline, scenarios=scenarios, broker=broker)
|
79
53
|
return p
|
54
|
+
|
80
55
|
graph = get_execution_graph(pipeline_config)
|
81
56
|
optimal_execution_graph = topological_sort_with_parallelism(graph.copy())
|
82
57
|
registered_actors: typing.Dict[str, LazyActor] = {}
|
@@ -35,7 +35,7 @@ class PypelineMiddleware(Middleware):
|
|
35
35
|
locking_parallel_barrier = LockingParallelBarrier(
|
36
36
|
self.redis_url,
|
37
37
|
task_key=task_key,
|
38
|
-
lock_key=f"{message.options['
|
38
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
39
39
|
)
|
40
40
|
try:
|
41
41
|
locking_parallel_barrier.acquire_lock(
|
@@ -60,7 +60,7 @@ class PypelineMiddleware(Middleware):
|
|
60
60
|
locking_parallel_barrier = LockingParallelBarrier(
|
61
61
|
self.redis_url,
|
62
62
|
task_key=task_key,
|
63
|
-
lock_key=f"{message.options['
|
63
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
64
64
|
)
|
65
65
|
try:
|
66
66
|
locking_parallel_barrier.acquire_lock(
|
@@ -75,12 +75,12 @@ class PypelineMiddleware(Middleware):
|
|
75
75
|
locking_parallel_barrier.release_lock()
|
76
76
|
|
77
77
|
if not remaining_tasks:
|
78
|
-
task_key = f"{message.options['
|
78
|
+
task_key = f"{message.options['base_case_execution_id']}-{ancestor}"
|
79
79
|
|
80
80
|
locking_parallel_barrier = LockingParallelBarrier(
|
81
81
|
self.redis_url,
|
82
82
|
task_key=task_key,
|
83
|
-
lock_key=f"{message.options['
|
83
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
84
84
|
)
|
85
85
|
try:
|
86
86
|
locking_parallel_barrier.acquire_lock(
|
@@ -103,7 +103,10 @@ class PypelineMiddleware(Middleware):
|
|
103
103
|
if not ancestor_tasks_complete:
|
104
104
|
break
|
105
105
|
|
106
|
-
if
|
106
|
+
if (
|
107
|
+
message.options["base_case_execution_id"]
|
108
|
+
== message.options["execution_id"]
|
109
|
+
):
|
107
110
|
for scenario in message.options["scenarios"]:
|
108
111
|
child_predecessors = list(graph.predecessors(child))
|
109
112
|
if (
|
@@ -115,7 +118,7 @@ class PypelineMiddleware(Middleware):
|
|
115
118
|
locking_parallel_barrier = LockingParallelBarrier(
|
116
119
|
self.redis_url,
|
117
120
|
task_key=task_key,
|
118
|
-
lock_key=f"{message.options['
|
121
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
119
122
|
)
|
120
123
|
locking_parallel_barrier.set_task_count(1)
|
121
124
|
handler = task_definitions[child]["handlers"][
|
@@ -137,9 +140,9 @@ class PypelineMiddleware(Middleware):
|
|
137
140
|
]
|
138
141
|
|
139
142
|
scenario_message.options["task_name"] = child
|
140
|
-
scenario_message.options["
|
141
|
-
"
|
142
|
-
|
143
|
+
scenario_message.options["base_case_execution_id"] = (
|
144
|
+
message.options["base_case_execution_id"]
|
145
|
+
)
|
143
146
|
scenario_message.options["scenarios"] = message.options[
|
144
147
|
"scenarios"
|
145
148
|
]
|
@@ -155,7 +158,7 @@ class PypelineMiddleware(Middleware):
|
|
155
158
|
locking_parallel_barrier = LockingParallelBarrier(
|
156
159
|
self.redis_url,
|
157
160
|
task_key=task_key,
|
158
|
-
lock_key=f"{message.options['
|
161
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
159
162
|
)
|
160
163
|
locking_parallel_barrier.set_task_count(1)
|
161
164
|
handler = task_definitions[child]["handlers"][
|
@@ -172,8 +175,8 @@ class PypelineMiddleware(Middleware):
|
|
172
175
|
child_message.options["task_replacements"] = task_replacements
|
173
176
|
child_message.options["execution_id"] = execution_id
|
174
177
|
child_message.options["task_name"] = child
|
175
|
-
child_message.options["
|
176
|
-
"
|
178
|
+
child_message.options["base_case_execution_id"] = message.options[
|
179
|
+
"base_case_execution_id"
|
177
180
|
]
|
178
181
|
child_message.options["scenarios"] = message.options["scenarios"]
|
179
182
|
if "settings" in message.kwargs:
|
@@ -1,7 +1,7 @@
|
|
1
|
-
pypeline/__init__.py,sha256
|
1
|
+
pypeline/__init__.py,sha256=Ol8KeLnnX1kAXAFgJsZj4d_cZMypHQtaiyICJpuzp64,22
|
2
2
|
pypeline/barrier.py,sha256=oO964l9qOCOibweOHyNivmAvufdXOke9nz2tdgclouo,1172
|
3
|
-
pypeline/constants.py,sha256=
|
4
|
-
pypeline/dramatiq.py,sha256=
|
3
|
+
pypeline/constants.py,sha256=415-5fTJQXPO4by14T4BBC6hOn11m96XFiAHSh9Sfxo,2949
|
4
|
+
pypeline/dramatiq.py,sha256=NyNwAw4iibWnS5GhTVQWxAOfBj3VXkfgSliilMa4ajg,12501
|
5
5
|
pypeline/extensions.py,sha256=BzOTnXhNxap3N7uIUUh_hO6dDwx08Vc_RJDE93_K0Lo,610
|
6
6
|
pypeline/pipeline_config_schema.py,sha256=hK2_egtg-YFx_XJDs_NyrOTGKkel7W83X-G0sic52sM,10592
|
7
7
|
pypeline/pipeline_settings_schema.py,sha256=84AuNFYsOUpoADsjEo_n9T6Ica-c21oK_V9s15I4lCg,20212
|
@@ -11,17 +11,16 @@ pypeline/flask/__init__.py,sha256=AdljRh0lMiS8ExgDmgzObwVs8jW7hqQuf83Ml8kn8GQ,49
|
|
11
11
|
pypeline/flask/decorators.py,sha256=ki6jkjZwbDbCWuj7ET7N-ncZwrASp4Fy7257WIYiAAQ,1102
|
12
12
|
pypeline/flask/flask_pypeline.py,sha256=Uqyu3PnSP3DoVZUJPqV9chjT4xdRgvcL3OMXxkbdTEg,5490
|
13
13
|
pypeline/flask/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
|
-
pypeline/flask/api/pipelines.py,sha256=
|
14
|
+
pypeline/flask/api/pipelines.py,sha256=lw1ggsjp_Iha5MhyQGHtVW0akpVJnxIk0hn6NkC3c8s,9314
|
15
15
|
pypeline/flask/api/schedules.py,sha256=8PKCMdPucaer8opchNlI5aDssK2UqT79hHpeg5BMtTA,1210
|
16
16
|
pypeline/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
|
-
pypeline/pipelines/factory.py,sha256=
|
17
|
+
pypeline/pipelines/factory.py,sha256=356v1S0WPvDkd9f0fKk0H9aXVbOqQYSWt47aOl66EKk,3172
|
18
18
|
pypeline/pipelines/composition/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
pypeline/pipelines/composition/parallel_pipeline_composition.py,sha256=pTw9Xb9h4JnV4siFc3JStm5lB-i9djUADo3Kh5K3s7g,12976
|
20
|
-
pypeline/pipelines/composition/pypeline_composition.py,sha256=
|
20
|
+
pypeline/pipelines/composition/pypeline_composition.py,sha256=UBuDKEfRoIbL-9c-HH2ZTVbzfkwFSlNoFH-AVNqt0QE,7965
|
21
21
|
pypeline/pipelines/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
pypeline/pipelines/middleware/deduplication_middleware.py,sha256=AxlvvTRbXfMtZXW5z7P72HfF3zVM_vKd5VvZeKToZcM,3655
|
23
22
|
pypeline/pipelines/middleware/parallel_pipeline_middleware.py,sha256=kTp6niYoe2nXIiN6EGRfdpxrJyioo0GPxDkfefbGlEk,2821
|
24
|
-
pypeline/pipelines/middleware/pypeline_middleware.py,sha256=
|
23
|
+
pypeline/pipelines/middleware/pypeline_middleware.py,sha256=IXVqzcOlSJ43lsn-i298RkaeygB-PTJjsvdTDtpgfwg,8141
|
25
24
|
pypeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
25
|
pypeline/utils/config_utils.py,sha256=rAIATyoW7kGETZ_Z2DqiXtGd7bJp5uPfcLtfNPOYsNs,2167
|
27
26
|
pypeline/utils/dramatiq_utils.py,sha256=5GDcOvKY-8S8r---wb6Q8QAywhbKVJ-qILjcYNHei8Y,3658
|
@@ -29,9 +28,9 @@ pypeline/utils/module_utils.py,sha256=-yEJIukDCoXnmlZVXB6Dww25tH6GdPE5SoFqv6pfdV
|
|
29
28
|
pypeline/utils/pipeline_utils.py,sha256=kGP1QwCJikGC5QNRtzRXCDVewyRMpWIqERTNnxGLlSY,4795
|
30
29
|
pypeline/utils/schema_utils.py,sha256=Fgl0y9Cuo_TZeEx_S3gaSVnLjn6467LTkjb2ek7Ms98,851
|
31
30
|
tests/fixtures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
|
-
scalable_pypeline-2.1.
|
33
|
-
scalable_pypeline-2.1.
|
34
|
-
scalable_pypeline-2.1.
|
35
|
-
scalable_pypeline-2.1.
|
36
|
-
scalable_pypeline-2.1.
|
37
|
-
scalable_pypeline-2.1.
|
31
|
+
scalable_pypeline-2.1.5.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
|
32
|
+
scalable_pypeline-2.1.5.dist-info/METADATA,sha256=Fsm58uF_UU13xtUbVLq1bGtxPkwIZRnOvwbpB766URM,5926
|
33
|
+
scalable_pypeline-2.1.5.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
34
|
+
scalable_pypeline-2.1.5.dist-info/entry_points.txt,sha256=uWs10ODfHSBKo2Cx_QaUjPHQTpZ3e77j9VlAdRRmMyg,119
|
35
|
+
scalable_pypeline-2.1.5.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
|
36
|
+
scalable_pypeline-2.1.5.dist-info/RECORD,,
|
@@ -1,94 +0,0 @@
|
|
1
|
-
import dramatiq
|
2
|
-
import signal
|
3
|
-
from dramatiq.middleware import Middleware
|
4
|
-
from pypeline.barrier import LockingParallelBarrier
|
5
|
-
from pypeline.constants import DEFAULT_TASK_TTL
|
6
|
-
import logging
|
7
|
-
|
8
|
-
logging.basicConfig(level=logging.INFO)
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
|
11
|
-
|
12
|
-
class DeduplicationMiddleware(Middleware):
|
13
|
-
def __init__(self, redis_url="redis://localhost:6379/0"):
|
14
|
-
self.redis_url = redis_url
|
15
|
-
self.active_locks = {}
|
16
|
-
|
17
|
-
def before_process_message(self, broker, message):
|
18
|
-
task_id = message.message_id
|
19
|
-
task_key = f"dramatiq:task_counter:{task_id}"
|
20
|
-
lock_key = f"dramatiq:lock:{task_id}"
|
21
|
-
try:
|
22
|
-
# Try to acquire a lock for the task
|
23
|
-
locking_parallel_barrier = LockingParallelBarrier(
|
24
|
-
self.redis_url,
|
25
|
-
task_key=task_key,
|
26
|
-
lock_key=lock_key,
|
27
|
-
)
|
28
|
-
if (
|
29
|
-
locking_parallel_barrier.get_task_count() > 0
|
30
|
-
or not locking_parallel_barrier.acquire_lock(timeout=DEFAULT_TASK_TTL)
|
31
|
-
):
|
32
|
-
logger.info(f"Found duplicate task {task_id}. Skipping...")
|
33
|
-
raise dramatiq.middleware.SkipMessage(
|
34
|
-
f"Task {task_id} is already being processed."
|
35
|
-
)
|
36
|
-
|
37
|
-
locking_parallel_barrier.set_task_count(1)
|
38
|
-
# Store the lock reference in the message and track it globally
|
39
|
-
message.options["dedupe_task_key"] = task_key
|
40
|
-
message.options["dedupe_lock_key"] = lock_key
|
41
|
-
self.active_locks[lock_key] = locking_parallel_barrier
|
42
|
-
except dramatiq.middleware.SkipMessage:
|
43
|
-
raise dramatiq.middleware.SkipMessage(
|
44
|
-
f"Task {task_id} is already being processed."
|
45
|
-
)
|
46
|
-
except Exception as e:
|
47
|
-
logger.exception(e)
|
48
|
-
raise e
|
49
|
-
|
50
|
-
def after_process_message(self, broker, message, *, result=None, exception=None):
|
51
|
-
"""Releases lock for the message that just finished."""
|
52
|
-
dedupe_task_key = message.options.get("dedupe_task_key", None)
|
53
|
-
dedupe_lock_key = message.options.get("dedupe_lock_key", None)
|
54
|
-
if not dedupe_lock_key or not dedupe_task_key:
|
55
|
-
logger.warning(
|
56
|
-
"unexpected in after_process_message: dedupe task or lock key not in message"
|
57
|
-
)
|
58
|
-
return
|
59
|
-
if dedupe_lock_key in self.active_locks:
|
60
|
-
try:
|
61
|
-
lock = self.active_locks[dedupe_lock_key]
|
62
|
-
lock.decrement_task_count()
|
63
|
-
lock.release_lock()
|
64
|
-
del self.active_locks[dedupe_lock_key]
|
65
|
-
except Exception as e:
|
66
|
-
logger.info(
|
67
|
-
f"Exception while trying to release lock {dedupe_lock_key}: {e}"
|
68
|
-
)
|
69
|
-
raise e
|
70
|
-
else:
|
71
|
-
lock = LockingParallelBarrier(
|
72
|
-
self.redis_url,
|
73
|
-
task_key=dedupe_task_key,
|
74
|
-
lock_key=dedupe_lock_key,
|
75
|
-
)
|
76
|
-
lock.decrement_task_count()
|
77
|
-
lock.release_lock()
|
78
|
-
|
79
|
-
def before_worker_shutdown(self, *args):
|
80
|
-
self.release_all_locks()
|
81
|
-
|
82
|
-
def before_worker_thread_shutdown(self, *args):
|
83
|
-
self.release_all_locks()
|
84
|
-
|
85
|
-
def release_all_locks(self, *args):
|
86
|
-
"""Release all locks when the worker shuts down."""
|
87
|
-
for lock_key, lock in self.active_locks.items():
|
88
|
-
try:
|
89
|
-
lock.decrement_task_count()
|
90
|
-
lock.release_lock()
|
91
|
-
except Exception as e:
|
92
|
-
logger.info(f"Exception while trying to release lock {lock_key}: {e}")
|
93
|
-
raise e
|
94
|
-
self.active_locks.clear()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|