scalable-pypeline 2.1.3__tar.gz → 2.1.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {scalable-pypeline-2.1.3/scalable_pypeline.egg-info → scalable-pypeline-2.1.5}/PKG-INFO +1 -1
  2. scalable-pypeline-2.1.5/pypeline/__init__.py +1 -0
  3. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/constants.py +1 -0
  4. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/dramatiq.py +28 -23
  5. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/flask/api/pipelines.py +1 -25
  6. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/composition/pypeline_composition.py +67 -49
  7. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/factory.py +10 -35
  8. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/middleware/pypeline_middleware.py +15 -12
  9. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5/scalable_pypeline.egg-info}/PKG-INFO +1 -1
  10. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/scalable_pypeline.egg-info/SOURCES.txt +0 -1
  11. scalable-pypeline-2.1.3/pypeline/__init__.py +0 -1
  12. scalable-pypeline-2.1.3/pypeline/pipelines/middleware/deduplication_middleware.py +0 -94
  13. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/LICENSE +0 -0
  14. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/MANIFEST.in +0 -0
  15. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/README.md +0 -0
  16. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/barrier.py +0 -0
  17. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/extensions.py +0 -0
  18. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/flask/__init__.py +0 -0
  19. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/flask/api/__init__.py +0 -0
  20. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/flask/api/schedules.py +0 -0
  21. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/flask/decorators.py +0 -0
  22. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/flask/flask_pypeline.py +0 -0
  23. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipeline_config_schema.py +0 -0
  24. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipeline_settings_schema.py +0 -0
  25. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/__init__.py +0 -0
  26. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/composition/__init__.py +0 -0
  27. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/composition/parallel_pipeline_composition.py +0 -0
  28. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/middleware/__init__.py +0 -0
  29. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pipelines/middleware/parallel_pipeline_middleware.py +0 -0
  30. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/pypeline_yaml.py +0 -0
  31. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/schedule_config_schema.py +0 -0
  32. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/utils/__init__.py +0 -0
  33. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/utils/config_utils.py +0 -0
  34. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/utils/dramatiq_utils.py +0 -0
  35. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/utils/module_utils.py +0 -0
  36. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/utils/pipeline_utils.py +0 -0
  37. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/pypeline/utils/schema_utils.py +0 -0
  38. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/requirements.txt +0 -0
  39. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/scalable_pypeline.egg-info/dependency_links.txt +0 -0
  40. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/scalable_pypeline.egg-info/entry_points.txt +0 -0
  41. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/scalable_pypeline.egg-info/requires.txt +0 -0
  42. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/scalable_pypeline.egg-info/top_level.txt +0 -0
  43. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/setup.cfg +0 -0
  44. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/setup.py +0 -0
  45. {scalable-pypeline-2.1.3 → scalable-pypeline-2.1.5}/tests/fixtures/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scalable-pypeline
3
- Version: 2.1.3
3
+ Version: 2.1.5
4
4
  Summary: PypeLine - Python pipelines for the Real World
5
5
  Home-page: https://gitlab.com/bravos2/pypeline
6
6
  Author: Bravos Power Corporation
@@ -0,0 +1 @@
1
+ __version__ = "2.1.5"
@@ -36,6 +36,7 @@ DEFAULT_BROKER_CONNECTION_ATTEMPTS = int(
36
36
  DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT = int(
37
37
  os.getenv("DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT", 30)
38
38
  )
39
+ MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
39
40
 
40
41
  MS_IN_SECONDS = 1000
41
42
  API_PATH_V1 = "/api/v1"
@@ -5,6 +5,8 @@ import logging
5
5
  import click
6
6
  from urllib.parse import urlparse
7
7
 
8
+ from dramatiq.brokers.redis import RedisBroker
9
+
8
10
  from pypeline.extensions import pypeline_config
9
11
  from warnings import warn
10
12
  from apscheduler.schedulers.blocking import BlockingScheduler
@@ -31,12 +33,10 @@ from pypeline.constants import (
31
33
  DEFAULT_BROKER_CONNECTION_HEARTBEAT,
32
34
  DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
33
35
  DEFAULT_BROKER_CONNECTION_ATTEMPTS,
36
+ MESSAGE_BROKER,
34
37
  )
35
38
  from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
36
39
  from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
37
- from pypeline.pipelines.middleware.deduplication_middleware import (
38
- DeduplicationMiddleware,
39
- )
40
40
  from pypeline.utils.config_utils import (
41
41
  retrieve_latest_schedule_config,
42
42
  get_service_config_for_worker,
@@ -56,27 +56,32 @@ logger = logging.getLogger(__name__)
56
56
 
57
57
  def configure_default_broker(broker: Broker = None):
58
58
  redis_backend = RedisBackend(url=REDIS_URL)
59
- parsed_url = urlparse(RABBIT_URL)
60
- credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
61
- rabbit_broker = (
62
- broker
63
- if broker is not None
64
- else RabbitmqBroker(
65
- host=parsed_url.hostname,
66
- port=parsed_url.port,
67
- credentials=credentials,
68
- heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
69
- connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
70
- blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
59
+
60
+ if MESSAGE_BROKER == "RABBITMQ":
61
+ parsed_url = urlparse(RABBIT_URL)
62
+ credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
63
+ broker = (
64
+ broker
65
+ if broker is not None
66
+ else RabbitmqBroker(
67
+ host=parsed_url.hostname,
68
+ port=parsed_url.port,
69
+ credentials=credentials,
70
+ heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
71
+ connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
72
+ blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
73
+ )
71
74
  )
72
- )
73
- rabbit_broker.add_middleware(Results(backend=redis_backend))
74
- rabbit_broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
75
- rabbit_broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
76
- rabbit_broker.add_middleware(CurrentMessage())
77
- register_actors_for_workers(rabbit_broker)
78
- rabbit_broker.add_middleware(DeduplicationMiddleware(redis_url=REDIS_URL))
79
- set_broker(rabbit_broker)
75
+
76
+ elif MESSAGE_BROKER == "REDIS":
77
+ broker = broker if broker is not None else RedisBroker(url=REDIS_URL)
78
+
79
+ broker.add_middleware(Results(backend=redis_backend))
80
+ broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
81
+ broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
82
+ broker.add_middleware(CurrentMessage())
83
+ register_actors_for_workers(broker)
84
+ set_broker(broker)
80
85
 
81
86
 
82
87
  def register_actors_for_workers(broker: Broker):
@@ -56,26 +56,6 @@ class InvokePipelineSchema(Schema):
56
56
  example={"document_id": "123", "send_alert": True},
57
57
  required=False,
58
58
  )
59
- settings = fields.Raw(
60
- description="Payload contains settings for a given pipeline",
61
- example={
62
- "param1": "Dataset",
63
- "param2": 1,
64
- "param3": 2,
65
- },
66
- required=False,
67
- )
68
-
69
- task_replacements = fields.Raw(
70
- description="A dictionary of task definitions as the key and the value of the index for which handler"
71
- " should be executed. If none provided it will default to the first handler in the list at index position 0.",
72
- example={
73
- "a": 1,
74
- "b": 3,
75
- },
76
- required=False,
77
- )
78
-
79
59
  scenarios = fields.List(
80
60
  fields.Nested(PipelineScenarioSchema),
81
61
  metadata={"description": "List of scenarios to run for a given pipeline"},
@@ -198,20 +178,16 @@ class PipelineInvoke(MethodView):
198
178
  retval = {"pipeline_id": pipeline_id, "status": "starting"}
199
179
  try:
200
180
  chain_payload = payload.get("chain_payload", {})
201
- settings = payload.get("settings", None)
202
- task_replacements = payload.get("task_replacements", {})
203
181
  scenarios = payload.get("scenarios", [])
204
182
  if pipeline_config["schemaVersion"] == 1:
205
183
  pipeline = dag_generator(
206
184
  pipeline_id=pipeline_id,
207
185
  event=chain_payload,
208
186
  )
209
- elif pipeline_config["schemaVersion"] == 2 and task_replacements:
187
+ elif pipeline_config["schemaVersion"] == 2:
210
188
  pipeline = dag_generator(
211
189
  pipeline_id=pipeline_id,
212
- task_replacements=task_replacements,
213
190
  scenarios=scenarios,
214
- settings=settings,
215
191
  )
216
192
  retval["scenarios"] = pipeline.scenarios
217
193
  pipeline.run()
@@ -17,8 +17,6 @@ class Pypeline:
17
17
  def __init__(
18
18
  self,
19
19
  pipeline: dict,
20
- pipeline_settings: dict = None,
21
- task_replacements: dict = {},
22
20
  scenarios: dict = {},
23
21
  broker=None,
24
22
  execution_id=None,
@@ -26,11 +24,8 @@ class Pypeline:
26
24
  # Construct initial properties
27
25
  self.pipeline = pipeline
28
26
  self.broker = broker or get_broker()
29
- self.execution_id = execution_id or str(uuid4())
30
27
  self._starting_messages = []
31
28
  self.scenarios = scenarios
32
- self.pipeline_settings = pipeline_settings
33
- self.task_replacements = task_replacements
34
29
 
35
30
  # Get pipeline dag graph and find first task
36
31
  pipeline_config = pipeline["config"]
@@ -39,8 +34,19 @@ class Pypeline:
39
34
  task_definitions = pipeline_config["taskDefinitions"]
40
35
  first_task = list(pipeline_config["dagAdjacency"].keys())[0]
41
36
 
37
+ base_case_execution_id = None
38
+
42
39
  # Process the scenarios one by one
43
40
  for scenario in self.scenarios:
41
+ # The first scenario is the base case and always runs
42
+ if self.scenarios.index(scenario) == 0:
43
+ base_case_execution_id = scenario.get("execution_id", None) or str(
44
+ uuid4()
45
+ )
46
+ scenario["execution_id"] = base_case_execution_id
47
+ scenario["base_case_execution_id"] = base_case_execution_id
48
+ scenario["tasksToRunInScenario"] = list(self.graph.nodes)
49
+ continue
44
50
  tasks_in_reruns = scenario["taskReruns"]
45
51
 
46
52
  # Find any tasks that have replacements for this scenario
@@ -62,6 +68,7 @@ class Pypeline:
62
68
  tasks_to_be_rerun_in_scenario
63
69
  )
64
70
  scenario["tasksToRunInScenario"] = tasks_to_be_rerun_in_scenario
71
+ scenario["base_case_execution_id"] = base_case_execution_id
65
72
  scenario["execution_id"] = scenario.get("execution_id", None) or str(
66
73
  uuid4()
67
74
  )
@@ -78,22 +85,31 @@ class Pypeline:
78
85
  )
79
86
  message = lazy_actor.message()
80
87
  message.options["pipeline"] = pipeline
81
- message.options["task_replacements"] = self.task_replacements
88
+ message.options["task_replacements"] = copy(
89
+ scenario["taskReplacements"]
90
+ )
82
91
  message.options["execution_id"] = scenario["execution_id"]
83
92
  message.options["task_name"] = first_task
84
- message.options["root_execution_id"] = self.execution_id
85
- if self.pipeline_settings:
86
- message.kwargs["settings"] = copy(self.pipeline_settings)
93
+ message.options["base_case_execution_id"] = base_case_execution_id
94
+ if scenario["settings"]:
95
+ message.kwargs["settings"] = copy(scenario["settings"])
87
96
  message.kwargs["settings"]["execution_id"] = scenario[
88
97
  "execution_id"
89
98
  ]
99
+ message.kwargs["settings"][
100
+ "base_case_execution_id"
101
+ ] = base_case_execution_id
90
102
  self._starting_messages.append(message)
91
103
 
92
104
  for m in self._starting_messages:
93
105
  m.options["scenarios"] = self.scenarios
94
106
 
107
+ # Run the first task of the first scenario no matter what
108
+ first_scenario_task_replacements = scenarios[0]["taskReplacements"]
109
+ first_scenario_settings = scenarios[0].get("settings", None)
110
+
95
111
  handler = task_definitions[first_task]["handlers"][
96
- self.task_replacements.get(first_task, 0)
112
+ first_scenario_task_replacements.get(first_task, 0)
97
113
  ]
98
114
  lazy_actor = register_lazy_actor(
99
115
  self.broker,
@@ -102,15 +118,18 @@ class Pypeline:
102
118
  )
103
119
  message = lazy_actor.message()
104
120
  message.options["pipeline"] = pipeline
105
- message.options["task_replacements"] = self.task_replacements
106
- message.options["execution_id"] = self.execution_id
121
+ message.options["task_replacements"] = first_scenario_task_replacements
122
+ message.options["execution_id"] = base_case_execution_id
107
123
  message.options["task_name"] = first_task
108
124
  message.options["scenarios"] = self.scenarios
109
- message.options["root_execution_id"] = self.execution_id
125
+ message.options["base_case_execution_id"] = base_case_execution_id
110
126
 
111
- if self.pipeline_settings:
112
- message.kwargs["settings"] = copy(self.pipeline_settings)
113
- message.kwargs["settings"]["execution_id"] = self.execution_id
127
+ if first_scenario_settings:
128
+ message.kwargs["settings"] = copy(first_scenario_settings)
129
+ message.kwargs["settings"]["execution_id"] = base_case_execution_id
130
+ message.kwargs["settings"][
131
+ "base_case_execution_id"
132
+ ] = base_case_execution_id
114
133
 
115
134
  self._starting_messages.append(message)
116
135
 
@@ -120,7 +139,9 @@ class Pypeline:
120
139
  f"{message.options['execution_id']}-{message.options['task_name']}"
121
140
  )
122
141
  locking_parallel_barrier = LockingParallelBarrier(
123
- REDIS_URL, task_key=task_key, lock_key=f"{self.execution_id}-lock"
142
+ REDIS_URL,
143
+ task_key=task_key,
144
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
124
145
  )
125
146
  locking_parallel_barrier.set_task_count(1)
126
147
  self.broker.enqueue(message, delay=delay)
@@ -131,36 +152,39 @@ class Pypeline:
131
152
  return self.number_of_tasks
132
153
 
133
154
  def completed(self):
134
- redis_task_keys = [
135
- f"{self.execution_id}-{node}" for node in list(self.graph.nodes)
136
- ]
137
- redis_lock_key = f"{self.execution_id}-lock"
138
- for scenario in self.scenarios:
139
- scenario_task_keys = [
140
- f"{scenario['execution_id']}-{task}"
141
- for task in scenario["tasksToRunInScenario"]
142
- ]
143
- redis_task_keys = redis_task_keys + scenario_task_keys
155
+ locks = []
144
156
 
145
- for task_key in redis_task_keys:
146
- locking_parallel_barrier = LockingParallelBarrier(
147
- REDIS_URL, task_key=task_key, lock_key=redis_lock_key
157
+ for scenario in self.scenarios:
158
+ locks.append(
159
+ {
160
+ "scenario_task_keys": [
161
+ f"{scenario['execution_id']}-{task}"
162
+ for task in scenario["tasksToRunInScenario"]
163
+ ],
164
+ "redis_lock_key": f"{scenario['base_case_execution_id']}-lock",
165
+ }
148
166
  )
149
- try:
150
- locking_parallel_barrier.acquire_lock(
151
- timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
167
+
168
+ for lock in locks:
169
+ for task_key in lock["scenario_task_keys"]:
170
+ locking_parallel_barrier = LockingParallelBarrier(
171
+ REDIS_URL, task_key=task_key, lock_key=lock["redis_lock_key"]
152
172
  )
153
- task_complete = True
154
- if locking_parallel_barrier.task_exists():
155
- remaining_tasks = locking_parallel_barrier.get_task_count()
156
- if remaining_tasks >= 1:
173
+ try:
174
+ locking_parallel_barrier.acquire_lock(
175
+ timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
176
+ )
177
+ task_complete = True
178
+ if locking_parallel_barrier.task_exists():
179
+ remaining_tasks = locking_parallel_barrier.get_task_count()
180
+ if remaining_tasks >= 1:
181
+ task_complete = False
182
+ else:
157
183
  task_complete = False
158
- else:
159
- task_complete = False
160
- finally:
161
- locking_parallel_barrier.release_lock()
162
- if not task_complete:
163
- return task_complete
184
+ finally:
185
+ locking_parallel_barrier.release_lock()
186
+ if not task_complete:
187
+ return task_complete
164
188
 
165
189
  return True
166
190
 
@@ -168,10 +192,7 @@ class Pypeline:
168
192
  return json.dumps(
169
193
  {
170
194
  "pipeline": self.pipeline,
171
- "pipeline_settings": self.pipeline_settings,
172
- "task_replacements": self.task_replacements,
173
195
  "scenarios": self.scenarios,
174
- "execution_id": self.execution_id,
175
196
  }
176
197
  )
177
198
 
@@ -181,8 +202,5 @@ class Pypeline:
181
202
 
182
203
  return cls(
183
204
  data["pipeline"],
184
- pipeline_settings=data["pipeline_settings"],
185
- task_replacements=data["task_replacements"],
186
205
  scenarios=data["scenarios"],
187
- execution_id=data["execution_id"],
188
206
  )
@@ -20,11 +20,7 @@ from pypeline.utils.pipeline_utils import (
20
20
 
21
21
 
22
22
  def dag_generator(
23
- pipeline_id: str,
24
- task_replacements: dict = {},
25
- scenarios: typing.List[typing.Dict] = [],
26
- *args,
27
- **kwargs
23
+ pipeline_id: str, scenarios: typing.List[typing.Dict] = [], *args, **kwargs
28
24
  ) -> typing.Union[parallel_pipeline, Pypeline]:
29
25
  """Generates a pipeline dag from a pre-defined pipeline yaml
30
26
 
@@ -43,40 +39,19 @@ def dag_generator(
43
39
  broker.actors.clear()
44
40
 
45
41
  if pipeline["schemaVersion"] == 2:
46
- # If the pipeline_config expects settings ensure we have them
47
- if (
48
- "settings" in pipeline_config
49
- and len(pipeline_config["settings"]["required"]) > 0
50
- and "settings" not in kwargs
51
- ):
52
- raise MissingSettingsException()
42
+ supplied_pipeline_settings_schema = create_pipeline_settings_schema(
43
+ pipeline_config["settings"]
44
+ )
53
45
 
54
- # If we're here we expect to have settings. Pop them out of kwargs to validate
55
- inputted_settings = kwargs.pop("settings", {})
56
- if "settings" in pipeline_config:
57
- supplied_pipeline_settings_schema = create_pipeline_settings_schema(
58
- pipeline_config["settings"]
59
- )
60
-
61
- # Validate scenarios settings to make sure they look okay
62
- validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
46
+ # Validate scenarios settings to make sure they look okay
47
+ validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
63
48
 
64
- for scenario in validated_scenarios:
65
- supplied_pipeline_settings_schema.load(scenario["settings"])
49
+ for scenario in validated_scenarios:
50
+ supplied_pipeline_settings_schema.load(scenario["settings"])
66
51
 
67
- validated_settings = supplied_pipeline_settings_schema.load(
68
- inputted_settings
69
- )
70
- p = Pypeline(
71
- pipeline,
72
- pipeline_settings=validated_settings,
73
- task_replacements=task_replacements,
74
- scenarios=scenarios,
75
- broker=broker,
76
- )
77
- else:
78
- p = Pypeline(pipeline, task_replacements=task_replacements, broker=broker)
52
+ p = Pypeline(pipeline, scenarios=scenarios, broker=broker)
79
53
  return p
54
+
80
55
  graph = get_execution_graph(pipeline_config)
81
56
  optimal_execution_graph = topological_sort_with_parallelism(graph.copy())
82
57
  registered_actors: typing.Dict[str, LazyActor] = {}
@@ -35,7 +35,7 @@ class PypelineMiddleware(Middleware):
35
35
  locking_parallel_barrier = LockingParallelBarrier(
36
36
  self.redis_url,
37
37
  task_key=task_key,
38
- lock_key=f"{message.options['root_execution_id']}-lock",
38
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
39
39
  )
40
40
  try:
41
41
  locking_parallel_barrier.acquire_lock(
@@ -60,7 +60,7 @@ class PypelineMiddleware(Middleware):
60
60
  locking_parallel_barrier = LockingParallelBarrier(
61
61
  self.redis_url,
62
62
  task_key=task_key,
63
- lock_key=f"{message.options['root_execution_id']}-lock",
63
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
64
64
  )
65
65
  try:
66
66
  locking_parallel_barrier.acquire_lock(
@@ -75,12 +75,12 @@ class PypelineMiddleware(Middleware):
75
75
  locking_parallel_barrier.release_lock()
76
76
 
77
77
  if not remaining_tasks:
78
- task_key = f"{message.options['root_execution_id']}-{ancestor}"
78
+ task_key = f"{message.options['base_case_execution_id']}-{ancestor}"
79
79
 
80
80
  locking_parallel_barrier = LockingParallelBarrier(
81
81
  self.redis_url,
82
82
  task_key=task_key,
83
- lock_key=f"{message.options['root_execution_id']}-lock",
83
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
84
84
  )
85
85
  try:
86
86
  locking_parallel_barrier.acquire_lock(
@@ -103,7 +103,10 @@ class PypelineMiddleware(Middleware):
103
103
  if not ancestor_tasks_complete:
104
104
  break
105
105
 
106
- if message.options["root_execution_id"] == message.options["execution_id"]:
106
+ if (
107
+ message.options["base_case_execution_id"]
108
+ == message.options["execution_id"]
109
+ ):
107
110
  for scenario in message.options["scenarios"]:
108
111
  child_predecessors = list(graph.predecessors(child))
109
112
  if (
@@ -115,7 +118,7 @@ class PypelineMiddleware(Middleware):
115
118
  locking_parallel_barrier = LockingParallelBarrier(
116
119
  self.redis_url,
117
120
  task_key=task_key,
118
- lock_key=f"{message.options['root_execution_id']}-lock",
121
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
119
122
  )
120
123
  locking_parallel_barrier.set_task_count(1)
121
124
  handler = task_definitions[child]["handlers"][
@@ -137,9 +140,9 @@ class PypelineMiddleware(Middleware):
137
140
  ]
138
141
 
139
142
  scenario_message.options["task_name"] = child
140
- scenario_message.options["root_execution_id"] = message.options[
141
- "root_execution_id"
142
- ]
143
+ scenario_message.options["base_case_execution_id"] = (
144
+ message.options["base_case_execution_id"]
145
+ )
143
146
  scenario_message.options["scenarios"] = message.options[
144
147
  "scenarios"
145
148
  ]
@@ -155,7 +158,7 @@ class PypelineMiddleware(Middleware):
155
158
  locking_parallel_barrier = LockingParallelBarrier(
156
159
  self.redis_url,
157
160
  task_key=task_key,
158
- lock_key=f"{message.options['root_execution_id']}-lock",
161
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
159
162
  )
160
163
  locking_parallel_barrier.set_task_count(1)
161
164
  handler = task_definitions[child]["handlers"][
@@ -172,8 +175,8 @@ class PypelineMiddleware(Middleware):
172
175
  child_message.options["task_replacements"] = task_replacements
173
176
  child_message.options["execution_id"] = execution_id
174
177
  child_message.options["task_name"] = child
175
- child_message.options["root_execution_id"] = message.options[
176
- "root_execution_id"
178
+ child_message.options["base_case_execution_id"] = message.options[
179
+ "base_case_execution_id"
177
180
  ]
178
181
  child_message.options["scenarios"] = message.options["scenarios"]
179
182
  if "settings" in message.kwargs:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scalable-pypeline
3
- Version: 2.1.3
3
+ Version: 2.1.5
4
4
  Summary: PypeLine - Python pipelines for the Real World
5
5
  Home-page: https://gitlab.com/bravos2/pypeline
6
6
  Author: Bravos Power Corporation
@@ -25,7 +25,6 @@ pypeline/pipelines/composition/__init__.py
25
25
  pypeline/pipelines/composition/parallel_pipeline_composition.py
26
26
  pypeline/pipelines/composition/pypeline_composition.py
27
27
  pypeline/pipelines/middleware/__init__.py
28
- pypeline/pipelines/middleware/deduplication_middleware.py
29
28
  pypeline/pipelines/middleware/parallel_pipeline_middleware.py
30
29
  pypeline/pipelines/middleware/pypeline_middleware.py
31
30
  pypeline/utils/__init__.py
@@ -1 +0,0 @@
1
- __version__ = "2.1.3"
@@ -1,94 +0,0 @@
1
- import dramatiq
2
- import signal
3
- from dramatiq.middleware import Middleware
4
- from pypeline.barrier import LockingParallelBarrier
5
- from pypeline.constants import DEFAULT_TASK_TTL
6
- import logging
7
-
8
- logging.basicConfig(level=logging.INFO)
9
- logger = logging.getLogger(__name__)
10
-
11
-
12
- class DeduplicationMiddleware(Middleware):
13
- def __init__(self, redis_url="redis://localhost:6379/0"):
14
- self.redis_url = redis_url
15
- self.active_locks = {}
16
-
17
- def before_process_message(self, broker, message):
18
- task_id = message.message_id
19
- task_key = f"dramatiq:task_counter:{task_id}"
20
- lock_key = f"dramatiq:lock:{task_id}"
21
- try:
22
- # Try to acquire a lock for the task
23
- locking_parallel_barrier = LockingParallelBarrier(
24
- self.redis_url,
25
- task_key=task_key,
26
- lock_key=lock_key,
27
- )
28
- if (
29
- locking_parallel_barrier.get_task_count() > 0
30
- or not locking_parallel_barrier.acquire_lock(timeout=DEFAULT_TASK_TTL)
31
- ):
32
- logger.info(f"Found duplicate task {task_id}. Skipping...")
33
- raise dramatiq.middleware.SkipMessage(
34
- f"Task {task_id} is already being processed."
35
- )
36
-
37
- locking_parallel_barrier.set_task_count(1)
38
- # Store the lock reference in the message and track it globally
39
- message.options["dedupe_task_key"] = task_key
40
- message.options["dedupe_lock_key"] = lock_key
41
- self.active_locks[lock_key] = locking_parallel_barrier
42
- except dramatiq.middleware.SkipMessage:
43
- raise dramatiq.middleware.SkipMessage(
44
- f"Task {task_id} is already being processed."
45
- )
46
- except Exception as e:
47
- logger.exception(e)
48
- raise e
49
-
50
- def after_process_message(self, broker, message, *, result=None, exception=None):
51
- """Releases lock for the message that just finished."""
52
- dedupe_task_key = message.options.get("dedupe_task_key", None)
53
- dedupe_lock_key = message.options.get("dedupe_lock_key", None)
54
- if not dedupe_lock_key or not dedupe_task_key:
55
- logger.warning(
56
- "unexpected in after_process_message: dedupe task or lock key not in message"
57
- )
58
- return
59
- if dedupe_lock_key in self.active_locks:
60
- try:
61
- lock = self.active_locks[dedupe_lock_key]
62
- lock.decrement_task_count()
63
- lock.release_lock()
64
- del self.active_locks[dedupe_lock_key]
65
- except Exception as e:
66
- logger.info(
67
- f"Exception while trying to release lock {dedupe_lock_key}: {e}"
68
- )
69
- raise e
70
- else:
71
- lock = LockingParallelBarrier(
72
- self.redis_url,
73
- task_key=dedupe_task_key,
74
- lock_key=dedupe_lock_key,
75
- )
76
- lock.decrement_task_count()
77
- lock.release_lock()
78
-
79
- def before_worker_shutdown(self, *args):
80
- self.release_all_locks()
81
-
82
- def before_worker_thread_shutdown(self, *args):
83
- self.release_all_locks()
84
-
85
- def release_all_locks(self, *args):
86
- """Release all locks when the worker shuts down."""
87
- for lock_key, lock in self.active_locks.items():
88
- try:
89
- lock.decrement_task_count()
90
- lock.release_lock()
91
- except Exception as e:
92
- logger.info(f"Exception while trying to release lock {lock_key}: {e}")
93
- raise e
94
- self.active_locks.clear()