scalable-pypeline 2.1.31__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. pypeline/__init__.py +1 -0
  2. pypeline/barrier.py +63 -0
  3. pypeline/constants.py +94 -0
  4. pypeline/dramatiq.py +455 -0
  5. pypeline/executable_job_config_schema.py +35 -0
  6. pypeline/extensions.py +17 -0
  7. pypeline/flask/__init__.py +16 -0
  8. pypeline/flask/api/__init__.py +0 -0
  9. pypeline/flask/api/pipelines.py +275 -0
  10. pypeline/flask/api/schedules.py +40 -0
  11. pypeline/flask/decorators.py +41 -0
  12. pypeline/flask/flask_pypeline.py +156 -0
  13. pypeline/job_runner.py +205 -0
  14. pypeline/pipeline_config_schema.py +352 -0
  15. pypeline/pipeline_settings_schema.py +561 -0
  16. pypeline/pipelines/__init__.py +0 -0
  17. pypeline/pipelines/composition/__init__.py +0 -0
  18. pypeline/pipelines/composition/parallel_pipeline_composition.py +375 -0
  19. pypeline/pipelines/composition/pypeline_composition.py +215 -0
  20. pypeline/pipelines/factory.py +86 -0
  21. pypeline/pipelines/middleware/__init__.py +0 -0
  22. pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
  23. pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
  24. pypeline/pipelines/middleware/parallel_pipeline_middleware.py +60 -0
  25. pypeline/pipelines/middleware/pypeline_middleware.py +202 -0
  26. pypeline/pypeline_yaml.py +468 -0
  27. pypeline/schedule_config_schema.py +125 -0
  28. pypeline/utils/__init__.py +0 -0
  29. pypeline/utils/config_utils.py +81 -0
  30. pypeline/utils/dramatiq_utils.py +134 -0
  31. pypeline/utils/executable_job_util.py +35 -0
  32. pypeline/utils/graceful_shutdown_util.py +39 -0
  33. pypeline/utils/module_utils.py +108 -0
  34. pypeline/utils/pipeline_utils.py +144 -0
  35. pypeline/utils/schema_utils.py +24 -0
  36. scalable_pypeline-2.1.31.dist-info/LICENSE +177 -0
  37. scalable_pypeline-2.1.31.dist-info/METADATA +212 -0
  38. scalable_pypeline-2.1.31.dist-info/RECORD +42 -0
  39. scalable_pypeline-2.1.31.dist-info/WHEEL +6 -0
  40. scalable_pypeline-2.1.31.dist-info/entry_points.txt +6 -0
  41. scalable_pypeline-2.1.31.dist-info/top_level.txt +2 -0
  42. tests/fixtures/__init__.py +0 -0
@@ -0,0 +1,22 @@
1
+ import contextvars
2
+ from dramatiq import Middleware
3
+
4
+
5
+ class GetActiveWorkerIdMiddleware(Middleware):
6
+ _ACTIVE_WORKER_ID: contextvars.ContextVar["Optional[Message[Any]]"] = (
7
+ contextvars.ContextVar("_ACTIVE_WORKER_ID", default=None)
8
+ )
9
+
10
+ @classmethod
11
+ def get_active_worker_id(cls):
12
+ return cls._ACTIVE_WORKER_ID.get()
13
+
14
+ def before_process_message(self, broker, message):
15
+ """Sets the active worker ID, verifying broker has a broker_id first."""
16
+ broker_id = getattr(broker, "broker_id", None)
17
+ if not broker_id:
18
+ return
19
+ self._ACTIVE_WORKER_ID.set(broker_id)
20
+
21
+ def after_process_message(self, broker, message, *, result=None, exception=None):
22
+ self._ACTIVE_WORKER_ID.set(None)
@@ -0,0 +1,50 @@
1
+ import os
2
+ import socket
3
+ import logging
4
+ import redis
5
+
6
+ from dramatiq.middleware import Middleware
7
+ from tenacity import retry, stop_after_attempt, wait_exponential, after_log
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class GraceFulShutdownMiddleware(Middleware):
13
+ def __init__(self, redis_url, key_prefix="busy"):
14
+ self.redis = redis.Redis.from_url(redis_url)
15
+ self.hostname = socket.gethostname()
16
+ self.pid = os.getpid()
17
+ self.key_prefix = key_prefix
18
+ self.key = f"{self.key_prefix}:{self.hostname}-{self.pid}"
19
+
20
+ @retry(
21
+ stop=stop_after_attempt(3),
22
+ wait=wait_exponential(multiplier=2, min=2, max=10),
23
+ after=after_log(logger, logging.WARNING),
24
+ reraise=True,
25
+ )
26
+ def _set_busy_flag(self, message_ttl):
27
+ self.redis.set(self.key, "1", ex=message_ttl)
28
+ logger.debug(f"[GracefulShutdownMiddleware] Set busy flag: {self.key}")
29
+
30
+ @retry(
31
+ stop=stop_after_attempt(3),
32
+ wait=wait_exponential(multiplier=2, min=2, max=10),
33
+ after=after_log(logger, logging.WARNING),
34
+ reraise=True,
35
+ )
36
+ def _clear_busy_flag(self):
37
+ self.redis.delete(self.key)
38
+ logger.debug(f"[GracefulShutdownMiddleware] Cleared busy flag: {self.key}")
39
+
40
+ def before_process_message(self, broker, message):
41
+ try:
42
+ self._set_busy_flag(message_ttl=message.options["task_ttl"])
43
+ except Exception as e:
44
+ logger.error(f"[GracefulShutdownMiddleware] Failed to set busy flag: {e}")
45
+
46
+ def after_process_message(self, broker, message, *, result=None, exception=None):
47
+ try:
48
+ self._clear_busy_flag()
49
+ except Exception as e:
50
+ logger.error(f"[GracefulShutdownMiddleware] Failed to clear busy flag: {e}")
@@ -0,0 +1,60 @@
1
+ import copy
2
+ from dramatiq.middleware import Middleware
3
+
4
+ from pypeline.barrier import LockingParallelBarrier
5
+ from pypeline.constants import PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
6
+
7
+
8
+ class ParallelPipeline(Middleware):
9
+ def __init__(self, redis_url):
10
+ self.redis_url = redis_url
11
+
12
+ def after_process_message(self, broker, message, *, result=None, exception=None):
13
+ from dramatiq.message import Message
14
+
15
+ if exception is None:
16
+ group_completion_uuid = message.options.get("group_completion_uuid")
17
+ if group_completion_uuid:
18
+ locking_parallel_barrier = LockingParallelBarrier(
19
+ self.redis_url,
20
+ task_key=group_completion_uuid,
21
+ lock_key=f"{group_completion_uuid}-lock",
22
+ )
23
+ try:
24
+ locking_parallel_barrier.acquire_lock(
25
+ timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
26
+ )
27
+ remaining_tasks = locking_parallel_barrier.decrement_task_count()
28
+ finally:
29
+ locking_parallel_barrier.release_lock()
30
+ if remaining_tasks <= 0:
31
+ execution_graph = message.options.get("execution_graph")
32
+
33
+ for i in range(len(execution_graph)):
34
+ message_group = execution_graph[i]
35
+
36
+ # Check if the current group matches the group_completion_uuid
37
+ if (
38
+ message_group[0]["options"]["group_completion_uuid"]
39
+ == group_completion_uuid
40
+ ):
41
+ # Check if there is a next group
42
+ if i + 1 < len(execution_graph):
43
+ next_group = execution_graph[i + 1]
44
+
45
+ completion_uuid = next_group[0]["options"][
46
+ "group_completion_uuid"
47
+ ]
48
+ locking_parallel_barrier = LockingParallelBarrier(
49
+ self.redis_url,
50
+ task_key=completion_uuid,
51
+ lock_key=f"{completion_uuid}-lock",
52
+ )
53
+ locking_parallel_barrier.set_task_count(len(next_group))
54
+ execution_graph_copy = copy.deepcopy(execution_graph)
55
+
56
+ for next_message in next_group:
57
+ next_message["options"][
58
+ "execution_graph"
59
+ ] = execution_graph_copy
60
+ broker.enqueue(Message(**next_message))
@@ -0,0 +1,202 @@
1
+ from copy import copy
2
+
3
+ import networkx as nx
4
+ from dramatiq import Middleware
5
+
6
+ from pypeline.barrier import LockingParallelBarrier
7
+ from pypeline.constants import PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
8
+ from pypeline.utils.module_utils import get_callable
9
+ from pypeline.utils.pipeline_utils import get_execution_graph
10
+ from pypeline.utils.dramatiq_utils import register_lazy_actor
11
+
12
+
13
+ class PypelineMiddleware(Middleware):
14
+
15
+ def __init__(self, redis_url):
16
+ self.redis_url = redis_url
17
+
18
+ def after_process_message(self, broker, message, *, result=None, exception=None):
19
+
20
+ if exception is not None:
21
+ return
22
+
23
+ if "pipeline" not in message.options:
24
+ return
25
+
26
+ pipeline = message.options["pipeline"]
27
+ max_retries = message.options.get("max_retries", None)
28
+ pipeline_config = pipeline["config"]
29
+ task_replacements = message.options["task_replacements"]
30
+ execution_id = message.options["execution_id"]
31
+ task_definitions = pipeline_config["taskDefinitions"]
32
+ task_name = message.options["task_name"]
33
+ task_key = f"{execution_id}-{task_name}"
34
+
35
+ # Signal to other jobs that current task is finished
36
+ locking_parallel_barrier = LockingParallelBarrier(
37
+ self.redis_url,
38
+ task_key=task_key,
39
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
40
+ )
41
+ try:
42
+ locking_parallel_barrier.acquire_lock(timeout=10)
43
+ _ = locking_parallel_barrier.decrement_task_count()
44
+ finally:
45
+ locking_parallel_barrier.release_lock()
46
+
47
+ graph = get_execution_graph(pipeline_config)
48
+ children_tasks = pipeline_config["dagAdjacency"].get(task_name, [])
49
+ messages = []
50
+ for child in children_tasks:
51
+ child_ancestors = sorted(graph.predecessors(child))
52
+ child_ancestors_complete = {a: False for a in child_ancestors}
53
+
54
+ for scenario in message.options["scenarios"]:
55
+ if scenario["execution_id"] == execution_id:
56
+ tasks_to_run_in_scenario = scenario["tasksToRunInScenario"]
57
+
58
+ for ancestor in child_ancestors:
59
+ if ancestor in tasks_to_run_in_scenario:
60
+ current_scenario_ancestor_task_key = f"{execution_id}-{ancestor}"
61
+ locking_parallel_barrier = LockingParallelBarrier(
62
+ self.redis_url,
63
+ task_key=current_scenario_ancestor_task_key,
64
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
65
+ )
66
+ try:
67
+ locking_parallel_barrier.acquire_lock(
68
+ timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
69
+ )
70
+ if not locking_parallel_barrier.task_exists():
71
+ child_ancestors_complete[ancestor] = False
72
+ elif locking_parallel_barrier.get_task_count() <= 0:
73
+ child_ancestors_complete[ancestor] = True
74
+ finally:
75
+ locking_parallel_barrier.release_lock()
76
+ else:
77
+ base_scenario_ancestor_task_key = (
78
+ f"{message.options['base_case_execution_id']}-{ancestor}"
79
+ )
80
+ locking_parallel_barrier = LockingParallelBarrier(
81
+ self.redis_url,
82
+ task_key=base_scenario_ancestor_task_key,
83
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
84
+ )
85
+ try:
86
+ locking_parallel_barrier.acquire_lock(
87
+ timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
88
+ )
89
+ if not locking_parallel_barrier.task_exists():
90
+ child_ancestors_complete[ancestor] = False
91
+ elif locking_parallel_barrier.get_task_count() <= 0:
92
+ child_ancestors_complete[ancestor] = True
93
+ finally:
94
+ locking_parallel_barrier.release_lock()
95
+
96
+ if any(complete is False for complete in child_ancestors_complete.values()):
97
+ continue
98
+
99
+ if (
100
+ message.options["base_case_execution_id"]
101
+ == message.options["execution_id"]
102
+ ):
103
+ for scenario in message.options["scenarios"]:
104
+ child_ancestors = list(graph.predecessors(child))
105
+ child_has_other_ancestors_in_scenario = False
106
+
107
+ for ancestor in child_ancestors:
108
+ if ancestor in scenario["tasksToRunInScenario"]:
109
+ child_has_other_ancestors_in_scenario = True
110
+ break
111
+
112
+ if (
113
+ child in scenario["tasksToRunInScenario"]
114
+ and task_name in child_ancestors
115
+ and task_name not in scenario["tasksToRunInScenario"]
116
+ and not child_has_other_ancestors_in_scenario
117
+ ):
118
+ task_key = f"{scenario['execution_id']}-{child}"
119
+ locking_parallel_barrier = LockingParallelBarrier(
120
+ self.redis_url,
121
+ task_key=task_key,
122
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
123
+ )
124
+ locking_parallel_barrier.set_task_count(1)
125
+ handler = task_definitions[child]["handlers"][
126
+ task_replacements.get(child, 0)
127
+ ]
128
+ server_type = task_definitions[child].get("serverType", None)
129
+
130
+ lazy_actor = register_lazy_actor(
131
+ broker,
132
+ get_callable(handler),
133
+ pipeline_config["metadata"],
134
+ server_type,
135
+ )
136
+ scenario_message = lazy_actor.message()
137
+ scenario_message.options["pipeline"] = pipeline
138
+ if max_retries is not None:
139
+ scenario_message.options["max_retries"] = max_retries
140
+ scenario_message.options["task_replacements"] = (
141
+ task_replacements
142
+ )
143
+ scenario_message.options["execution_id"] = scenario[
144
+ "execution_id"
145
+ ]
146
+
147
+ scenario_message.options["task_name"] = child
148
+ scenario_message.options["base_case_execution_id"] = (
149
+ message.options["base_case_execution_id"]
150
+ )
151
+ scenario_message.options["scenarios"] = message.options[
152
+ "scenarios"
153
+ ]
154
+ if "settings" in message.kwargs:
155
+ scenario_message.kwargs["settings"] = copy(
156
+ message.kwargs["settings"]
157
+ )
158
+ scenario_message.kwargs["settings"]["execution_id"] = (
159
+ scenario["execution_id"]
160
+ )
161
+ messages.append(scenario_message)
162
+
163
+ # If we've made it here all ancestors of this child are complete, and it's time to run.
164
+ task_key = f"{execution_id}-{child}"
165
+ locking_parallel_barrier = LockingParallelBarrier(
166
+ self.redis_url,
167
+ task_key=task_key,
168
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
169
+ )
170
+ locking_parallel_barrier.set_task_count(1)
171
+ handler = task_definitions[child]["handlers"][
172
+ task_replacements.get(child, 0)
173
+ ]
174
+ server_type = task_definitions[child].get("serverType", None)
175
+ lazy_actor = register_lazy_actor(
176
+ broker,
177
+ get_callable(handler),
178
+ pipeline_config["metadata"],
179
+ server_type,
180
+ )
181
+
182
+ child_message = lazy_actor.message()
183
+ child_message.options["pipeline"] = pipeline
184
+ if max_retries is not None:
185
+ child_message.options["max_retries"] = max_retries
186
+ child_message.options["task_replacements"] = task_replacements
187
+ child_message.options["execution_id"] = execution_id
188
+ child_message.options["task_name"] = child
189
+ child_message.options["base_case_execution_id"] = message.options[
190
+ "base_case_execution_id"
191
+ ]
192
+ child_message.options["scenarios"] = message.options["scenarios"]
193
+ if "settings" in message.kwargs:
194
+ child_message.kwargs["settings"] = message.kwargs["settings"]
195
+ child_message.kwargs["settings"]["execution_id"] = message.options[
196
+ "execution_id"
197
+ ]
198
+
199
+ messages.append(child_message)
200
+
201
+ for new_message in messages:
202
+ broker.enqueue(new_message)