scalable-pypeline 2.1.27__py2.py3-none-any.whl → 2.1.29__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scalable-pypeline might be problematic. Click here for more details.

pypeline/__init__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.1.27"
1
+ __version__ = "2.1.29"
pypeline/constants.py CHANGED
@@ -50,7 +50,6 @@ DEFAULT_REDIS_HEALTH_CHECK_INTERVAL = int(
50
50
  )
51
51
 
52
52
  MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
53
-
54
53
  MS_IN_SECONDS = 1000
55
54
  API_PATH_V1 = "/api/v1"
56
55
 
pypeline/dramatiq.py CHANGED
@@ -62,13 +62,21 @@ from pypeline.utils.dramatiq_utils import (
62
62
  )
63
63
  from pypeline.utils.graceful_shutdown_util import enable_graceful_shutdown
64
64
  from pypeline.utils.module_utils import get_callable
65
-
65
+ from dramatiq.middleware import (
66
+ Retries,
67
+ Callbacks,
68
+ TimeLimit,
69
+ AgeLimit,
70
+ ShutdownNotifications,
71
+ Pipelines,
72
+ )
66
73
 
67
74
  logging.basicConfig(level=logging.INFO)
68
75
  logger = logging.getLogger(__name__)
69
76
 
70
77
 
71
78
  def configure_default_broker(broker: Broker = None):
79
+ reworked_defaults=[AgeLimit(), TimeLimit(), ShutdownNotifications(), Callbacks(), Pipelines(), Retries()]
72
80
  redis_client = None
73
81
  if REDIS_SENTINEL_MASTER_NAME is not None:
74
82
  parsed_redis_url = urlparse(REDIS_URL)
@@ -100,6 +108,7 @@ def configure_default_broker(broker: Broker = None):
100
108
  heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
101
109
  connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
102
110
  blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
111
+ middleware=reworked_defaults
103
112
  )
104
113
  )
105
114
 
@@ -111,14 +120,15 @@ def configure_default_broker(broker: Broker = None):
111
120
  client=redis_client,
112
121
  url=REDIS_URL,
113
122
  heartbeat_timeout=DEFAULT_BROKER_HEARTBEAT_TIMEOUT,
123
+ middleware=reworked_defaults
114
124
  )
115
125
  )
116
126
 
117
127
  broker.add_middleware(Results(backend=redis_backend))
118
128
  broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
119
129
  broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
120
- broker.add_middleware(CurrentMessage())
121
130
  broker.add_middleware(GetActiveWorkerIdMiddleware())
131
+ broker.add_middleware(CurrentMessage())
122
132
  if (
123
133
  os.getenv("RESTRICT_WORKER_SHUTDOWN_WHILE_JOBS_RUNNING", "false").lower()
124
134
  == "true"
@@ -126,6 +136,7 @@ def configure_default_broker(broker: Broker = None):
126
136
  enable_graceful_shutdown(broker=broker, redis_url=REDIS_URL)
127
137
 
128
138
  register_actors_for_workers(broker)
139
+
129
140
  set_broker(broker)
130
141
 
131
142
 
pypeline/job_runner.py ADDED
@@ -0,0 +1,123 @@
1
+ import os
2
+ import sys
3
+ import signal
4
+ import logging
5
+ import argparse
6
+ import threading
7
+ import multiprocessing as mp
8
+
9
+ # Prefer spawn for user code using multiprocessing
10
+ if mp.get_start_method(allow_none=True) != "spawn":
11
+ mp.set_start_method("spawn", force=True)
12
+
13
+ # Avoid staging more than one message; must be set before Dramatiq import path runs
14
+ os.environ.setdefault("dramatiq_queue_prefetch", "1")
15
+
16
+ from dramatiq import Worker, get_broker, set_broker # noqa: E402
17
+ try:
18
+ from pypeline.dramatiq import configure_default_broker # adjust path/name if needed
19
+ broker = configure_default_broker() or get_broker()
20
+ set_broker(broker)
21
+ except Exception:
22
+ import pypeline.dramatiq # noqa: F401
23
+ broker = get_broker()
24
+
25
+ from dramatiq.middleware import Middleware # noqa: E402
26
+
27
+
28
+ class OneAndDone(Middleware):
29
+ """Flip an event after the first successful message in THIS process."""
30
+ def __init__(self, done_event: threading.Event):
31
+ self.done = done_event
32
+
33
+ def after_process_message(self, broker, message, *, result=None, exception=None):
34
+ if exception is None and not self.done.is_set():
35
+ self.done.set()
36
+
37
+
38
+ def job_runner(queues, idle_timeout_ms: int = 0):
39
+ """
40
+ Start a single-thread Dramatiq worker, process exactly one successful message, then exit.
41
+ - queues: list[str]
42
+ - idle_timeout_ms: 0 or <0 => wait forever; >0 => exit if nothing processed in time
43
+ """
44
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
45
+ log = logging.getLogger("oneshot")
46
+
47
+ # Normalize timeout
48
+ timeout_ms = int(idle_timeout_ms) if idle_timeout_ms and int(idle_timeout_ms) > 0 else 0
49
+ log.info("Launching worker with queues=%s, idle_timeout_ms=%s", queues, timeout_ms or "∞")
50
+
51
+ done = threading.Event()
52
+ broker.add_middleware(OneAndDone(done))
53
+
54
+ worker = Worker(
55
+ broker,
56
+ worker_threads=1,
57
+ queues=queues,
58
+ worker_timeout=1000, # ms; how often the worker checks for stop
59
+ )
60
+
61
+ worker.start()
62
+
63
+ def controller():
64
+ try:
65
+ if timeout_ms > 0:
66
+ finished = done.wait(timeout=timeout_ms / 1000.0)
67
+ if not finished:
68
+ log.info("Idle timeout reached (%d ms); stopping worker.", timeout_ms)
69
+ else:
70
+ done.wait()
71
+
72
+ log.info("Stopping worker now.")
73
+ worker.stop() # halts consumers; no new message will start
74
+ worker.join()
75
+ finally:
76
+ # Exit cleanly so K8s Job is marked Succeeded
77
+ sys.exit(0)
78
+
79
+ t = threading.Thread(target=controller, name="oneshot-controller", daemon=False)
80
+ t.start()
81
+
82
+ # Block main thread until controller finishes (which joins the worker)
83
+ t.join()
84
+
85
+
86
+ def main(argv=None):
87
+ parser = argparse.ArgumentParser(description="Run a one-shot Dramatiq worker.")
88
+ parser.add_argument(
89
+ "-q", "--queue",
90
+ action="append",
91
+ default=None,
92
+ help="Queue to listen to (repeatable). You can also pass a comma-separated list."
93
+ )
94
+ parser.add_argument(
95
+ "--idle-timeout-ms",
96
+ type=int,
97
+ default=int(os.getenv("IDLE_TIMEOUT_MS", "0")),
98
+ help="Exit if no job arrives within this time (0 or negative = wait forever)."
99
+ )
100
+ args = parser.parse_args(argv)
101
+
102
+ # Build the queue list from flags or env, splitting on commas for each entry.
103
+ raw_entries = args.queue if args.queue else [os.getenv("JOB_QUEUE", "pipeline-queue")]
104
+ queues = []
105
+ for entry in raw_entries:
106
+ queues.extend([q.strip() for q in str(entry).split(",") if q and q.strip()])
107
+
108
+ if not queues:
109
+ raise SystemExit("No queues provided. Use -q ... or set JOB_QUEUE.")
110
+
111
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
112
+ log = logging.getLogger("oneshot")
113
+
114
+ pid = os.getpid()
115
+ ppid = os.getppid()
116
+ log.info("Starting one-shot worker PID=%s, Parent PID=%s, queues=%s, idle_timeout_ms=%s",
117
+ pid, ppid, queues, args.idle_timeout_ms if args.idle_timeout_ms > 0 else "∞")
118
+
119
+ job_runner(queues, idle_timeout_ms=args.idle_timeout_ms)
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()
@@ -46,82 +46,74 @@ class PypelineMiddleware(Middleware):
46
46
 
47
47
  graph = get_execution_graph(pipeline_config)
48
48
  children_tasks = pipeline_config["dagAdjacency"].get(task_name, [])
49
-
50
49
  messages = []
51
50
  for child in children_tasks:
52
51
  child_ancestors = sorted(graph.predecessors(child))
52
+ child_ancestors_complete = {a: False for a in child_ancestors}
53
53
 
54
- ancestor_tasks_complete = True
54
+ for scenario in message.options["scenarios"]:
55
+ if scenario["execution_id"] == execution_id:
56
+ tasks_to_run_in_scenario = scenario["tasksToRunInScenario"]
55
57
 
56
58
  for ancestor in child_ancestors:
57
- ancestor_task_key = f"{execution_id}-{ancestor}"
58
-
59
- locking_parallel_barrier = LockingParallelBarrier(
60
- self.redis_url,
61
- task_key=ancestor_task_key,
62
- lock_key=f"{message.options['base_case_execution_id']}-lock",
63
- )
64
- try:
65
- locking_parallel_barrier.acquire_lock(
66
- timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
59
+ if ancestor in tasks_to_run_in_scenario:
60
+ current_scenario_ancestor_task_key = f"{execution_id}-{ancestor}"
61
+ locking_parallel_barrier = LockingParallelBarrier(
62
+ self.redis_url,
63
+ task_key=current_scenario_ancestor_task_key,
64
+ lock_key=f"{message.options['base_case_execution_id']}-lock",
67
65
  )
68
-
69
- if locking_parallel_barrier.task_exists():
70
- remaining_tasks = locking_parallel_barrier.get_task_count()
71
- else:
72
- remaining_tasks = None
73
- finally:
74
- locking_parallel_barrier.release_lock()
75
-
76
- # If the lock didn't exist for the current tasks execution id then it would indicate
77
- # that this is the start of a new scenario. Therefore we need to find the ancestor
78
- # that is executed in the base case execution id and make sure it has completed
79
- tasks_to_run_in_scenario = None
80
-
81
- for scenario in message.options["scenarios"]:
82
- if scenario["execution_id"] == execution_id:
83
- tasks_to_run_in_scenario = scenario["tasksToRunInScenario"]
84
-
85
- if ancestor not in tasks_to_run_in_scenario and remaining_tasks is None:
86
- ancestor_task_key = (
66
+ try:
67
+ locking_parallel_barrier.acquire_lock(
68
+ timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
69
+ )
70
+ if not locking_parallel_barrier.task_exists():
71
+ child_ancestors_complete[ancestor] = False
72
+ elif locking_parallel_barrier.get_task_count() <= 0:
73
+ child_ancestors_complete[ancestor] = True
74
+ finally:
75
+ locking_parallel_barrier.release_lock()
76
+ else:
77
+ base_scenario_ancestor_task_key = (
87
78
  f"{message.options['base_case_execution_id']}-{ancestor}"
88
79
  )
89
-
90
80
  locking_parallel_barrier = LockingParallelBarrier(
91
81
  self.redis_url,
92
- task_key=ancestor_task_key,
82
+ task_key=base_scenario_ancestor_task_key,
93
83
  lock_key=f"{message.options['base_case_execution_id']}-lock",
94
84
  )
95
85
  try:
96
86
  locking_parallel_barrier.acquire_lock(
97
87
  timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
98
88
  )
99
-
100
- if locking_parallel_barrier.task_exists():
101
- remaining_tasks = locking_parallel_barrier.get_task_count()
89
+ if not locking_parallel_barrier.task_exists():
90
+ child_ancestors_complete[ancestor] = False
91
+ elif locking_parallel_barrier.get_task_count() <= 0:
92
+ child_ancestors_complete[ancestor] = True
102
93
  finally:
103
94
  locking_parallel_barrier.release_lock()
104
95
 
105
-
106
- if remaining_tasks is None or remaining_tasks >= 1:
107
- ancestor_tasks_complete = False
108
- break
109
-
110
- # If the child's ancestor tasks aren't complete move onto the next child to check
111
- if not ancestor_tasks_complete:
96
+ if any(complete is False for complete in child_ancestors_complete.values()):
112
97
  continue
113
98
 
114
- # Handle situation where base case kicks off new scenario.
115
99
  if (
116
100
  message.options["base_case_execution_id"]
117
101
  == message.options["execution_id"]
118
102
  ):
119
103
  for scenario in message.options["scenarios"]:
120
- child_predecessors = list(graph.predecessors(child))
104
+ child_ancestors = list(graph.predecessors(child))
105
+ child_has_other_ancestors_in_scenario = False
106
+
107
+ for ancestor in child_ancestors:
108
+ if ancestor in scenario["tasksToRunInScenario"]:
109
+ child_has_other_ancestors_in_scenario = True
110
+ break
111
+
121
112
  if (
122
113
  child in scenario["tasksToRunInScenario"]
123
- and task_name in child_predecessors
114
+ and task_name in child_ancestors
124
115
  and task_name not in scenario["tasksToRunInScenario"]
116
+ and not child_has_other_ancestors_in_scenario
125
117
  ):
126
118
  task_key = f"{scenario['execution_id']}-{child}"
127
119
  locking_parallel_barrier = LockingParallelBarrier(
@@ -168,7 +160,7 @@ class PypelineMiddleware(Middleware):
168
160
  )
169
161
  messages.append(scenario_message)
170
162
 
171
- # If we've made it here all ancestors of this child are complete and it's time to run.
163
+ # If we've made it here all ancestors of this child are complete, and it's time to run.
172
164
  task_key = f"{execution_id}-{child}"
173
165
  locking_parallel_barrier = LockingParallelBarrier(
174
166
  self.redis_url,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scalable-pypeline
3
- Version: 2.1.27
3
+ Version: 2.1.29
4
4
  Summary: PypeLine - Python pipelines for the Real World
5
5
  Home-page: https://gitlab.com/bravos2/pypeline
6
6
  Author: Bravos Power Corporation
@@ -1,9 +1,10 @@
1
- pypeline/__init__.py,sha256=nRu7ZwJqfHD5II0bBxcWlJ8czOdWw1bjjPVhymDs-NE,23
1
+ pypeline/__init__.py,sha256=U4bLKPYS0d1L7N6hY7AxGvLuei8dvTxJ7xg7Ag0OAG0,23
2
2
  pypeline/barrier.py,sha256=ojSgbuZnGKpKiSBYXTV4CxG9j1Z01YdzBSORli4MnzI,2376
3
- pypeline/constants.py,sha256=7COt9jfmLDvCNAFeN6ddRpwdvv2LpbYOCIQs6dPXpOQ,3592
4
- pypeline/dramatiq.py,sha256=XPpgPgiOaEFK8zORx9eveJ45wzcUXMjVGryFKY5Xiwg,15527
3
+ pypeline/constants.py,sha256=SSRWNCpJ-VjwZrwCzfE1bLXyD6_h9_bHBr4olG9Oab0,3591
4
+ pypeline/dramatiq.py,sha256=NHwKrXG_BN7O9zjGt8y04DfynsHJmINzxPiJl4_0rZY,15862
5
5
  pypeline/executable_job_config_schema.py,sha256=P2Z8SO057Jgyt4I5oZxcbEi1iaZkLoAh7qp8PtuqcqU,1010
6
6
  pypeline/extensions.py,sha256=BzOTnXhNxap3N7uIUUh_hO6dDwx08Vc_RJDE93_K0Lo,610
7
+ pypeline/job_runner.py,sha256=_8yp6s4pJBUoR_Eu0SYcIvdQFRsFn2pHeru1uHooyzw,4172
7
8
  pypeline/pipeline_config_schema.py,sha256=kRZcCMlk2FIITDzVrAfcSmHnxi1mIWmDzasTW0TnaAU,11169
8
9
  pypeline/pipeline_settings_schema.py,sha256=s_oqZ-TBiLdInSprR9k6myw1zykV2PpEbEfVaMfb5VY,21010
9
10
  pypeline/pypeline_yaml.py,sha256=hbOdwKDUg10wsZnwVaBt46FbpS3iuB3bLwVuYyXh4OY,17270
@@ -23,7 +24,7 @@ pypeline/pipelines/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
23
24
  pypeline/pipelines/middleware/get_active_worker_id_middleware.py,sha256=X4ZfRk3L8MD00DTsGHth7oOdy-W7LQV96T8vu5UC42A,755
24
25
  pypeline/pipelines/middleware/graceful_shutdown_middleware.py,sha256=k37zmFk9dOye05BoQP7KcB9MEQgvodI16kOJyYhRyAc,1764
25
26
  pypeline/pipelines/middleware/parallel_pipeline_middleware.py,sha256=kTp6niYoe2nXIiN6EGRfdpxrJyioo0GPxDkfefbGlEk,2821
26
- pypeline/pipelines/middleware/pypeline_middleware.py,sha256=tnQcewRCCaQaNMTx9Kz0gx47YZxBJCDW9UH_8cBLlwY,9317
27
+ pypeline/pipelines/middleware/pypeline_middleware.py,sha256=FjREuPDdTfeYOVGVUjJgx8Szh6yu7g8OnHRc5N__448,9385
27
28
  pypeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
29
  pypeline/utils/config_utils.py,sha256=bblh8clRDDNQpQIDkBrtskZBo-csakoO1IJiaqVGyr8,2508
29
30
  pypeline/utils/dramatiq_utils.py,sha256=tbG3o5FD0zUOKtJJJECE2cM2ovDv3OLQ5CplJ9cXfM4,4001
@@ -33,9 +34,9 @@ pypeline/utils/module_utils.py,sha256=-yEJIukDCoXnmlZVXB6Dww25tH6GdPE5SoFqv6pfdV
33
34
  pypeline/utils/pipeline_utils.py,sha256=kGP1QwCJikGC5QNRtzRXCDVewyRMpWIqERTNnxGLlSY,4795
34
35
  pypeline/utils/schema_utils.py,sha256=Fgl0y9Cuo_TZeEx_S3gaSVnLjn6467LTkjb2ek7Ms98,851
35
36
  tests/fixtures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- scalable_pypeline-2.1.27.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
37
- scalable_pypeline-2.1.27.dist-info/METADATA,sha256=D-3zqFydd6j3FTAvnz1VSjQBobpx7XcR_Oz4J_-2BU8,5985
38
- scalable_pypeline-2.1.27.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
39
- scalable_pypeline-2.1.27.dist-info/entry_points.txt,sha256=uWs10ODfHSBKo2Cx_QaUjPHQTpZ3e77j9VlAdRRmMyg,119
40
- scalable_pypeline-2.1.27.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
41
- scalable_pypeline-2.1.27.dist-info/RECORD,,
37
+ scalable_pypeline-2.1.29.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
38
+ scalable_pypeline-2.1.29.dist-info/METADATA,sha256=jBs9oLJgLvPN75iy2QqHHB4YBhQMZX3dhXEo63pdIy0,5985
39
+ scalable_pypeline-2.1.29.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
40
+ scalable_pypeline-2.1.29.dist-info/entry_points.txt,sha256=q5V8Qe0-5mrZ982FGbUaT2-bEURsiGjLNKEvpR6m7VU,176
41
+ scalable_pypeline-2.1.29.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
42
+ scalable_pypeline-2.1.29.dist-info/RECORD,,
@@ -1,3 +1,6 @@
1
+ [console_scripts]
2
+ job-runner = pypeline.job_runner:main
3
+
1
4
  [flask.commands]
2
5
  cron-scheduler = pypeline.dramatiq:cron_scheduler
3
6
  pypeline-worker = pypeline.dramatiq:pypeline_worker