scalable-pypeline 2.1.3__tar.gz → 2.1.31__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of scalable-pypeline might be problematic. Click here for more details.

Files changed (52) hide show
  1. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/PKG-INFO +2 -2
  2. scalable_pypeline-2.1.31/pypeline/__init__.py +1 -0
  3. scalable_pypeline-2.1.31/pypeline/barrier.py +63 -0
  4. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/constants.py +15 -2
  5. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/dramatiq.py +141 -62
  6. scalable_pypeline-2.1.31/pypeline/executable_job_config_schema.py +35 -0
  7. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/flask/api/pipelines.py +1 -25
  8. scalable_pypeline-2.1.31/pypeline/job_runner.py +205 -0
  9. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipeline_config_schema.py +15 -0
  10. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipeline_settings_schema.py +22 -2
  11. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/composition/parallel_pipeline_composition.py +29 -2
  12. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/composition/pypeline_composition.py +77 -50
  13. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/factory.py +19 -40
  14. scalable_pypeline-2.1.31/pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
  15. scalable_pypeline-2.1.31/pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
  16. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/pypeline_middleware.py +68 -54
  17. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pypeline_yaml.py +10 -0
  18. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/utils/config_utils.py +13 -0
  19. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/utils/dramatiq_utils.py +14 -6
  20. scalable_pypeline-2.1.31/pypeline/utils/executable_job_util.py +35 -0
  21. scalable_pypeline-2.1.31/pypeline/utils/graceful_shutdown_util.py +39 -0
  22. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/PKG-INFO +1 -1
  23. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/SOURCES.txt +6 -1
  24. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/entry_points.txt +3 -0
  25. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/requires.txt +2 -1
  26. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/setup.cfg +3 -0
  27. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/setup.py +6 -5
  28. scalable-pypeline-2.1.3/pypeline/__init__.py +0 -1
  29. scalable-pypeline-2.1.3/pypeline/barrier.py +0 -37
  30. scalable-pypeline-2.1.3/pypeline/pipelines/middleware/deduplication_middleware.py +0 -94
  31. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/LICENSE +0 -0
  32. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/MANIFEST.in +0 -0
  33. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/README.md +0 -0
  34. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/extensions.py +0 -0
  35. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/flask/__init__.py +0 -0
  36. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/flask/api/__init__.py +0 -0
  37. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/flask/api/schedules.py +0 -0
  38. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/flask/decorators.py +0 -0
  39. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/flask/flask_pypeline.py +0 -0
  40. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/__init__.py +0 -0
  41. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/composition/__init__.py +0 -0
  42. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/__init__.py +0 -0
  43. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/parallel_pipeline_middleware.py +0 -0
  44. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/schedule_config_schema.py +0 -0
  45. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/utils/__init__.py +0 -0
  46. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/utils/module_utils.py +0 -0
  47. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/utils/pipeline_utils.py +0 -0
  48. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/pypeline/utils/schema_utils.py +0 -0
  49. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/requirements.txt +0 -0
  50. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/dependency_links.txt +0 -0
  51. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/top_level.txt +0 -0
  52. {scalable-pypeline-2.1.3 → scalable_pypeline-2.1.31}/tests/fixtures/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
- Name: scalable-pypeline
3
- Version: 2.1.3
2
+ Name: scalable_pypeline
3
+ Version: 2.1.31
4
4
  Summary: PypeLine - Python pipelines for the Real World
5
5
  Home-page: https://gitlab.com/bravos2/pypeline
6
6
  Author: Bravos Power Corporation
@@ -0,0 +1 @@
1
+ __version__ = "2.1.31"
@@ -0,0 +1,63 @@
1
+ import time
2
+ import redis
3
+ from redis.sentinel import Sentinel
4
+ from urllib.parse import urlparse
5
+ from pypeline.constants import (
6
+ REDIS_SENTINEL_MASTER_NAME,
7
+ DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
8
+ DEFAULT_REDIS_SOCKET_TIMEOUT,
9
+ DEFAULT_REDIS_RETRY_ON_TIMEOUT,
10
+ DEFAULT_REDIS_SOCKET_KEEPALIVE,
11
+ DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
12
+ )
13
+
14
+
15
+ class LockingParallelBarrier:
16
+ def __init__(self, redis_url, task_key="task_counter", lock_key="task_lock"):
17
+ # Connect to Redis using the provided URL
18
+ if REDIS_SENTINEL_MASTER_NAME is not None:
19
+ parsed_redis_url = urlparse(redis_url)
20
+ redis_sentinel = Sentinel(
21
+ sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
22
+ )
23
+ self.redis = redis_sentinel.master_for(
24
+ REDIS_SENTINEL_MASTER_NAME,
25
+ db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
26
+ password=parsed_redis_url.password,
27
+ socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
28
+ socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
29
+ retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
30
+ socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
31
+ health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
32
+ decode_responses=True,
33
+ )
34
+ else:
35
+ self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True)
36
+ self.task_key = task_key
37
+ self.lock_key = lock_key
38
+
39
+ def acquire_lock(self, timeout=5):
40
+ """Acquire a lock using Redis."""
41
+ while True:
42
+ if self.redis.set(self.lock_key, "locked", nx=True, ex=timeout):
43
+ return True
44
+ time.sleep(0.1)
45
+
46
+ def release_lock(self):
47
+ """Release the lock in Redis."""
48
+ self.redis.delete(self.lock_key)
49
+
50
+ def set_task_count(self, count):
51
+ """Initialize the task counter in Redis."""
52
+ self.redis.set(self.task_key, count)
53
+
54
+ def decrement_task_count(self):
55
+ """Decrement the task counter in Redis."""
56
+ return self.redis.decr(self.task_key)
57
+
58
+ def task_exists(self):
59
+ return self.redis.exists(self.task_key)
60
+
61
+ def get_task_count(self):
62
+ """Get the current value of the task counter."""
63
+ return int(self.redis.get(self.task_key) or 0)
@@ -1,5 +1,4 @@
1
- """ Pypeline Constants
2
- """
1
+ """Pypeline Constants"""
3
2
 
4
3
  import os
5
4
 
@@ -15,6 +14,7 @@ DEFAULT_BROKER_CALLABLE = os.environ.get(
15
14
  # Pypeline broker connections
16
15
  RABBIT_URL = os.environ.get("RABBIT_URL", "amqp://admin:password@127.0.0.1:5672")
17
16
  REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
17
+ REDIS_SENTINEL_MASTER_NAME = os.environ.get("REDIS_SENTINEL_MASTER_NAME", None)
18
18
 
19
19
  # Pypeline task defaults
20
20
  PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL = int(
@@ -36,7 +36,20 @@ DEFAULT_BROKER_CONNECTION_ATTEMPTS = int(
36
36
  DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT = int(
37
37
  os.getenv("DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT", 30)
38
38
  )
39
+ DEFAULT_BROKER_HEARTBEAT_TIMEOUT = int(
40
+ os.getenv("DEFAULT_BROKER_HEARTBEAT_TIMEOUT", 300000)
41
+ )
42
+ DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT = int(
43
+ os.getenv("DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT", 1)
44
+ )
45
+ DEFAULT_REDIS_SOCKET_TIMEOUT = int(os.getenv("DEFAULT_REDIS_SOCKET_TIMEOUT", 2))
46
+ DEFAULT_REDIS_RETRY_ON_TIMEOUT = bool(os.getenv("DEFAULT_REDIS_RETRY_ON_TIMEOUT", True))
47
+ DEFAULT_REDIS_SOCKET_KEEPALIVE = bool(os.getenv("DEFAULT_REDIS_SOCKET_KEEPALIVE", True))
48
+ DEFAULT_REDIS_HEALTH_CHECK_INTERVAL = int(
49
+ os.getenv("DEFAULT_REDIS_HEALTH_CHECK_INTERVAL", 30)
50
+ )
39
51
 
52
+ MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
40
53
  MS_IN_SECONDS = 1000
41
54
  API_PATH_V1 = "/api/v1"
42
55
 
@@ -1,10 +1,14 @@
1
+ import copy
1
2
  import typing
2
3
  import pika
3
4
  import logging
5
+ import os
4
6
 
5
7
  import click
6
8
  from urllib.parse import urlparse
7
9
 
10
+ from dramatiq.brokers.redis import RedisBroker
11
+ from redis.sentinel import Sentinel
8
12
  from pypeline.extensions import pypeline_config
9
13
  from warnings import warn
10
14
  from apscheduler.schedulers.blocking import BlockingScheduler
@@ -26,20 +30,29 @@ from flask.cli import with_appcontext
26
30
 
27
31
  from pypeline.constants import (
28
32
  REDIS_URL,
33
+ REDIS_SENTINEL_MASTER_NAME,
29
34
  RABBIT_URL,
30
35
  DEFAULT_BROKER_CALLABLE,
31
36
  DEFAULT_BROKER_CONNECTION_HEARTBEAT,
32
37
  DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
33
38
  DEFAULT_BROKER_CONNECTION_ATTEMPTS,
39
+ MESSAGE_BROKER,
40
+ DEFAULT_BROKER_HEARTBEAT_TIMEOUT,
41
+ DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
42
+ DEFAULT_REDIS_SOCKET_TIMEOUT,
43
+ DEFAULT_REDIS_RETRY_ON_TIMEOUT,
44
+ DEFAULT_REDIS_SOCKET_KEEPALIVE,
45
+ DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
46
+ )
47
+ from pypeline.pipelines.middleware.get_active_worker_id_middleware import (
48
+ GetActiveWorkerIdMiddleware,
34
49
  )
35
50
  from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
36
51
  from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
37
- from pypeline.pipelines.middleware.deduplication_middleware import (
38
- DeduplicationMiddleware,
39
- )
40
52
  from pypeline.utils.config_utils import (
41
53
  retrieve_latest_schedule_config,
42
54
  get_service_config_for_worker,
55
+ retrieve_executable_job_config,
43
56
  )
44
57
  from pypeline.utils.dramatiq_utils import (
45
58
  guess_code_directory,
@@ -47,83 +60,149 @@ from pypeline.utils.dramatiq_utils import (
47
60
  register_lazy_actor,
48
61
  LazyActor,
49
62
  )
63
+ from pypeline.utils.graceful_shutdown_util import enable_graceful_shutdown
50
64
  from pypeline.utils.module_utils import get_callable
51
-
65
+ from dramatiq.middleware import (
66
+ Retries,
67
+ Callbacks,
68
+ TimeLimit,
69
+ AgeLimit,
70
+ ShutdownNotifications,
71
+ Pipelines,
72
+ )
52
73
 
53
74
  logging.basicConfig(level=logging.INFO)
54
75
  logger = logging.getLogger(__name__)
55
76
 
56
77
 
57
78
  def configure_default_broker(broker: Broker = None):
58
- redis_backend = RedisBackend(url=REDIS_URL)
59
- parsed_url = urlparse(RABBIT_URL)
60
- credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
61
- rabbit_broker = (
62
- broker
63
- if broker is not None
64
- else RabbitmqBroker(
65
- host=parsed_url.hostname,
66
- port=parsed_url.port,
67
- credentials=credentials,
68
- heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
69
- connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
70
- blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
79
+ reworked_defaults=[AgeLimit(), TimeLimit(), ShutdownNotifications(), Callbacks(), Pipelines(), Retries()]
80
+ redis_client = None
81
+ if REDIS_SENTINEL_MASTER_NAME is not None:
82
+ parsed_redis_url = urlparse(REDIS_URL)
83
+ redis_sentinel = Sentinel(
84
+ sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
85
+ )
86
+ redis_client = redis_sentinel.master_for(
87
+ REDIS_SENTINEL_MASTER_NAME,
88
+ db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
89
+ password=parsed_redis_url.password,
90
+ socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
91
+ socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
92
+ retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
93
+ socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
94
+ health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
95
+ )
96
+ redis_backend = RedisBackend(client=redis_client, url=REDIS_URL)
97
+
98
+ if MESSAGE_BROKER == "RABBITMQ":
99
+ parsed_url = urlparse(RABBIT_URL)
100
+ credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
101
+ broker = (
102
+ broker
103
+ if broker is not None
104
+ else RabbitmqBroker(
105
+ host=parsed_url.hostname,
106
+ port=parsed_url.port,
107
+ credentials=credentials,
108
+ heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
109
+ connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
110
+ blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
111
+ middleware=reworked_defaults
112
+ )
113
+ )
114
+
115
+ elif MESSAGE_BROKER == "REDIS":
116
+ broker = (
117
+ broker
118
+ if broker is not None
119
+ else RedisBroker(
120
+ client=redis_client,
121
+ url=REDIS_URL,
122
+ heartbeat_timeout=DEFAULT_BROKER_HEARTBEAT_TIMEOUT,
123
+ middleware=reworked_defaults
124
+ )
71
125
  )
72
- )
73
- rabbit_broker.add_middleware(Results(backend=redis_backend))
74
- rabbit_broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
75
- rabbit_broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
76
- rabbit_broker.add_middleware(CurrentMessage())
77
- register_actors_for_workers(rabbit_broker)
78
- rabbit_broker.add_middleware(DeduplicationMiddleware(redis_url=REDIS_URL))
79
- set_broker(rabbit_broker)
126
+
127
+ broker.add_middleware(Results(backend=redis_backend))
128
+ broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
129
+ broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
130
+ broker.add_middleware(GetActiveWorkerIdMiddleware())
131
+ broker.add_middleware(CurrentMessage())
132
+ if (
133
+ os.getenv("RESTRICT_WORKER_SHUTDOWN_WHILE_JOBS_RUNNING", "false").lower()
134
+ == "true"
135
+ ):
136
+ enable_graceful_shutdown(broker=broker, redis_url=REDIS_URL)
137
+
138
+ register_actors_for_workers(broker)
139
+
140
+ set_broker(broker)
80
141
 
81
142
 
82
143
  def register_actors_for_workers(broker: Broker):
83
144
  service = get_service_config_for_worker(pypeline_config)
84
145
  scheduled_jobs_config = retrieve_latest_schedule_config()
146
+ executable_jobs_config = retrieve_executable_job_config()
85
147
 
86
148
  if not service:
87
149
  return
88
- for task in service.get("registeredTasks", []):
89
- pipeline_meta = None
90
- for pipeline_key, pipeline in pypeline_config["pipelines"].items():
91
- pipeline_config = pipeline["config"]
150
+
151
+ worker_registered_tasks = [
152
+ task_handler["handler"] for task_handler in service.get("registeredTasks")
153
+ ]
154
+
155
+ # Loop over the pipelines to get metadata and other information about the task for registration
156
+ for pipeline_key, pipeline in pypeline_config["pipelines"].items():
157
+ for task, task_handler_meta in pipeline["config"]["taskDefinitions"].items():
92
158
  if pipeline["schemaVersion"] == 1:
93
- pipeline_tasks = [
94
- t["handler"] for t in pipeline_config["taskDefinitions"].values()
95
- ]
159
+ # Check if any task in this pipeline is registered
160
+ task_handlers = [task_handler_meta["handler"]]
96
161
  elif pipeline["schemaVersion"] == 2:
97
- pipeline_tasks = [
98
- handler
99
- for key in pipeline_config["taskDefinitions"]
100
- for handler in pipeline_config["taskDefinitions"][key].get(
101
- "handlers", []
102
- )
103
- ]
104
- if task["handler"] in pipeline_tasks:
105
- pipeline_meta = pipeline_config["metadata"]
106
- break
107
-
108
- if pipeline_meta is None:
109
- for job in scheduled_jobs_config:
110
- config = job["config"]
111
- if config["task"] == task["handler"]:
112
- pipeline_meta = {"queue": config.get("queue", "default")}
113
-
114
- if pipeline_meta is None:
115
- raise ValueError(
116
- f"Registered task {task['handler']} is not defined in a pipeline or scheduled task"
117
- )
118
-
119
- try:
120
- worker_path = task["handler"] # Required, no default
121
- tmp_handler = get_callable(worker_path)
122
- if pipeline_meta and pipeline_meta.get("maxRetry", 0) >= 0:
123
- pipeline_meta["store_results"] = True
124
- _ = register_lazy_actor(broker, tmp_handler, pipeline_meta)
125
- except Exception as e:
126
- logger.exception(f"Unable to add a task to dramatiq: {e}")
162
+ task_handlers = [t for t in task_handler_meta["handlers"]]
163
+
164
+ for task_handler in task_handlers:
165
+ if task_handler in worker_registered_tasks:
166
+ server_type = task_handler_meta.get("serverType", None)
167
+
168
+ try:
169
+ pipeline_metadata = copy.deepcopy(
170
+ pipeline["config"]["metadata"]
171
+ )
172
+ tmp_handler = get_callable(task_handler)
173
+ if pipeline_metadata.get("maxRetry", 0) >= 0:
174
+ pipeline_metadata["store_results"] = True
175
+ _ = register_lazy_actor(
176
+ broker, tmp_handler, pipeline_metadata, server_type
177
+ )
178
+ except Exception as e:
179
+ logger.exception(
180
+ f"Unable to add a task {task_handler} to dramatiq: {e}"
181
+ )
182
+ # Loop over the scheduled jobs and create metadata and other information about the task for registration
183
+ for job in scheduled_jobs_config:
184
+ config = job["config"]
185
+ if config["task"] in worker_registered_tasks:
186
+ pipeline_meta = {"queue": config.get("queue", "default")}
187
+ try:
188
+ tmp_handler = get_callable(config["task"])
189
+ if pipeline_meta and pipeline_meta.get("maxRetry", 0) >= 0:
190
+ pipeline_meta["store_results"] = True
191
+ _ = register_lazy_actor(broker, tmp_handler, pipeline_meta, None)
192
+ except Exception as e:
193
+ logger.exception(f"Unable to add a task to dramatiq: {e}")
194
+
195
+ for job in executable_jobs_config or []:
196
+ config = job["config"]
197
+ if config["task"] in worker_registered_tasks:
198
+ pipeline_meta = {"queue": config.get("queue", "default")}
199
+ try:
200
+ tmp_handler = get_callable(config["task"])
201
+ if pipeline_meta and pipeline_meta.get("maxRetry", 0) >= 0:
202
+ pipeline_meta["store_results"] = True
203
+ _ = register_lazy_actor(broker, tmp_handler, pipeline_meta, None)
204
+ except Exception as e:
205
+ logger.exception(f"Unable to add a task to dramatiq: {e}")
127
206
 
128
207
 
129
208
  class Dramatiq:
@@ -0,0 +1,35 @@
1
+ from marshmallow import Schema, EXCLUDE, fields
2
+
3
+ class ExecutableJobConfigSchema(Schema):
4
+ queue = fields.String(
5
+ required=True,
6
+ description="Name of queue on which to place task.",
7
+ example="my-default-queue",
8
+ )
9
+ task = fields.String(
10
+ required=True,
11
+ description="Path to task to invoke.",
12
+ example="my_app.module.method",
13
+ )
14
+
15
+ class ExecutableJobSchema(Schema):
16
+ """Definition of a single schedule entry"""
17
+ class Meta:
18
+ unknown = EXCLUDE
19
+
20
+ name = fields.String(
21
+ required=True,
22
+ description="Name of schedule entry.",
23
+ example="My Scheduled Task",
24
+ )
25
+ schemaVersion = fields.Integer(required=True)
26
+ config = fields.Dict(required=True)
27
+ enabled = fields.Boolean(
28
+ required=True, description="Whether entry is enabled.", example=True
29
+ )
30
+ config = fields.Nested(
31
+ ExecutableJobConfigSchema,
32
+ required=True,
33
+ description="Configuration information for this job.",
34
+ )
35
+
@@ -56,26 +56,6 @@ class InvokePipelineSchema(Schema):
56
56
  example={"document_id": "123", "send_alert": True},
57
57
  required=False,
58
58
  )
59
- settings = fields.Raw(
60
- description="Payload contains settings for a given pipeline",
61
- example={
62
- "param1": "Dataset",
63
- "param2": 1,
64
- "param3": 2,
65
- },
66
- required=False,
67
- )
68
-
69
- task_replacements = fields.Raw(
70
- description="A dictionary of task definitions as the key and the value of the index for which handler"
71
- " should be executed. If none provided it will default to the first handler in the list at index position 0.",
72
- example={
73
- "a": 1,
74
- "b": 3,
75
- },
76
- required=False,
77
- )
78
-
79
59
  scenarios = fields.List(
80
60
  fields.Nested(PipelineScenarioSchema),
81
61
  metadata={"description": "List of scenarios to run for a given pipeline"},
@@ -198,20 +178,16 @@ class PipelineInvoke(MethodView):
198
178
  retval = {"pipeline_id": pipeline_id, "status": "starting"}
199
179
  try:
200
180
  chain_payload = payload.get("chain_payload", {})
201
- settings = payload.get("settings", None)
202
- task_replacements = payload.get("task_replacements", {})
203
181
  scenarios = payload.get("scenarios", [])
204
182
  if pipeline_config["schemaVersion"] == 1:
205
183
  pipeline = dag_generator(
206
184
  pipeline_id=pipeline_id,
207
185
  event=chain_payload,
208
186
  )
209
- elif pipeline_config["schemaVersion"] == 2 and task_replacements:
187
+ elif pipeline_config["schemaVersion"] == 2:
210
188
  pipeline = dag_generator(
211
189
  pipeline_id=pipeline_id,
212
- task_replacements=task_replacements,
213
190
  scenarios=scenarios,
214
- settings=settings,
215
191
  )
216
192
  retval["scenarios"] = pipeline.scenarios
217
193
  pipeline.run()
@@ -0,0 +1,205 @@
1
+ import os
2
+ import logging
3
+ import argparse
4
+ import threading
5
+ import multiprocessing as mp
6
+
7
+ # Prefer 'spawn' for user code using multiprocessing
8
+ if mp.get_start_method(allow_none=True) != "spawn":
9
+ mp.set_start_method("spawn", force=True)
10
+
11
+ # Avoid staging more than one message; must be set before Dramatiq import path runs
12
+ os.environ.setdefault("dramatiq_queue_prefetch", "1")
13
+
14
+ from dramatiq import Worker, get_broker, set_broker
15
+ from dramatiq.middleware import Middleware
16
+
17
+
18
+ try:
19
+ # If your project exposes a helper to configure the default broker, use it.
20
+ from pypeline.dramatiq import configure_default_broker # adjust import if needed
21
+
22
+ broker = configure_default_broker() or get_broker()
23
+ set_broker(broker)
24
+ except Exception:
25
+ # Fall back to whatever Dramatiq has as the active broker.
26
+ import pypeline.dramatiq # noqa: F401 (ensure module side-effects run)
27
+
28
+ broker = get_broker()
29
+
30
+
31
+ class OneAndDone(Middleware):
32
+ """
33
+ Signals when the first message starts ('got_work') and completes ('done').
34
+ If stop_on_failure=True, we'll also mark done after the first failure.
35
+ """
36
+
37
+ def __init__(
38
+ self,
39
+ got_work: threading.Event,
40
+ done: threading.Event,
41
+ *,
42
+ stop_on_failure: bool = False
43
+ ):
44
+ self.got_work = got_work
45
+ self.done = done
46
+ self.stop_on_failure = stop_on_failure
47
+
48
+ def before_process_message(self, broker, message):
49
+ # First time we see a message begin processing in this process
50
+ if not self.got_work.is_set():
51
+ self.got_work.set()
52
+
53
+ def after_process_message(self, broker, message, *, result=None, exception=None):
54
+ # On success (or also on failure if configured), finish this worker
55
+ if exception is None or self.stop_on_failure:
56
+ if not self.done.is_set():
57
+ self.done.set()
58
+
59
+
60
+ def _graceful_stop(worker: Worker, log: logging.Logger):
61
+ try:
62
+ log.info("Stopping dramatiq worker...")
63
+ worker.stop() # stop consumers; no new messages will start
64
+ worker.join()
65
+ log.info("Worker stopped.")
66
+ except Exception as e:
67
+ log.exception("Error stopping worker: %s", e)
68
+
69
+
70
+ def _close_broker(log: logging.Logger):
71
+ try:
72
+ b = get_broker()
73
+ if b is not None and hasattr(b, "close"):
74
+ b.close()
75
+ log.info("Broker closed.")
76
+ except Exception as e:
77
+ log.exception("Error closing broker: %s", e)
78
+
79
+
80
+ def job_runner(queues, idle_timeout_ms: int = 0, *, stop_on_failure: bool = False):
81
+ """
82
+ Start a single-thread Dramatiq worker. Behavior:
83
+ - Wait up to `idle_timeout_ms` for *a job to start* (time-to-first-job).
84
+ - Once a job begins, wait indefinitely for it to complete.
85
+ - After the first successful job completes (or first job, if stop_on_failure=True), stop and exit.
86
+
87
+ Args:
88
+ queues (list[str]): queues to listen to
89
+ idle_timeout_ms (int): <=0 => wait forever for first job; >0 => exit if no job starts in time
90
+ stop_on_failure (bool): if True, exit after first job even if it fails
91
+ """
92
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
93
+ log = logging.getLogger("oneshot")
94
+
95
+ # Normalize timeout (treat non-positive as "infinite")
96
+ timeout_ms = (
97
+ int(idle_timeout_ms) if idle_timeout_ms and int(idle_timeout_ms) > 0 else 0
98
+ )
99
+ log.info(
100
+ "Launching worker with queues=%s, idle_timeout_ms=%s", queues, timeout_ms or "∞"
101
+ )
102
+
103
+ got_work = threading.Event()
104
+ done = threading.Event()
105
+ broker.add_middleware(OneAndDone(got_work, done, stop_on_failure=stop_on_failure))
106
+
107
+ worker = Worker(
108
+ broker,
109
+ worker_threads=1, # strictly one at a time
110
+ queues=queues,
111
+ worker_timeout=1000, # ms; how often the worker checks for stop
112
+ )
113
+
114
+ worker.start()
115
+
116
+ def controller():
117
+ log.debug("Controller thread started.")
118
+ try:
119
+ # Phase 1: Wait for *first job to start*
120
+ if timeout_ms > 0:
121
+ started = got_work.wait(timeout_ms / 1000.0)
122
+ if not started:
123
+ log.info(
124
+ "Idle timeout reached (%d ms); no jobs started. Stopping worker.",
125
+ timeout_ms,
126
+ )
127
+ return
128
+ else:
129
+ got_work.wait()
130
+
131
+ log.info("First job started; waiting for it to finish...")
132
+ # Phase 2: Wait for the first job to complete (no timeout)
133
+ done.wait()
134
+ log.info("First job finished; shutting down.")
135
+ finally:
136
+ _graceful_stop(worker, log)
137
+ _close_broker(log)
138
+ # Hard-exit to ensure K8s Job is marked Succeeded promptly, no lingering threads.
139
+ os._exit(0)
140
+
141
+ t = threading.Thread(target=controller, name="oneshot-controller", daemon=False)
142
+ t.start()
143
+ t.join() # Block until controller completes (which shuts everything down)
144
+
145
+
146
+ def _parse_args(argv=None):
147
+ ap = argparse.ArgumentParser(description="Run a one-shot Dramatiq worker.")
148
+ ap.add_argument(
149
+ "-q",
150
+ "--queue",
151
+ action="append",
152
+ default=None,
153
+ help="Queue to listen to (repeatable). You can also pass a comma-separated list.",
154
+ )
155
+ ap.add_argument(
156
+ "--idle-timeout-ms",
157
+ type=int,
158
+ default=int(os.getenv("IDLE_TIMEOUT_MS", "0")),
159
+ help="Exit if no job starts within this time (<=0 = wait forever).",
160
+ )
161
+ ap.add_argument(
162
+ "--stop-on-failure",
163
+ action="store_true",
164
+ help="Exit after the first job even if it fails.",
165
+ )
166
+ return ap.parse_args(argv)
167
+
168
+
169
+ def main(argv=None):
170
+ args = _parse_args(argv)
171
+
172
+ # Build queue list from flags or env, support comma-separated entries.
173
+ raw_entries = (
174
+ args.queue if args.queue else [os.getenv("JOB_QUEUE", "pipeline-queue")]
175
+ )
176
+ queues = []
177
+ for entry in raw_entries:
178
+ queues.extend([q.strip() for q in str(entry).split(",") if q and q.strip()])
179
+
180
+ if not queues:
181
+ raise SystemExit("No queues provided. Use -q ... or set JOB_QUEUE.")
182
+
183
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
184
+ log = logging.getLogger("oneshot")
185
+
186
+ pid = os.getpid()
187
+ ppid = os.getppid()
188
+ log.info(
189
+ "Starting one-shot worker PID=%s, Parent PID=%s, queues=%s, idle_timeout_ms=%s, stop_on_failure=%s",
190
+ pid,
191
+ ppid,
192
+ queues,
193
+ args.idle_timeout_ms if args.idle_timeout_ms > 0 else "∞",
194
+ args.stop_on_failure,
195
+ )
196
+
197
+ job_runner(
198
+ queues,
199
+ idle_timeout_ms=args.idle_timeout_ms,
200
+ stop_on_failure=args.stop_on_failure,
201
+ )
202
+
203
+
204
+ if __name__ == "__main__":
205
+ main()