scalable-pypeline 2.1.12__tar.gz → 2.1.31__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of scalable-pypeline might be problematic. Click here for more details.
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/PKG-INFO +2 -2
- scalable_pypeline-2.1.31/pypeline/__init__.py +1 -0
- scalable_pypeline-2.1.31/pypeline/barrier.py +63 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/constants.py +12 -3
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/dramatiq.py +62 -5
- scalable_pypeline-2.1.31/pypeline/executable_job_config_schema.py +35 -0
- scalable_pypeline-2.1.31/pypeline/job_runner.py +205 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipeline_config_schema.py +6 -8
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipeline_settings_schema.py +15 -1
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/composition/parallel_pipeline_composition.py +29 -2
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/composition/pypeline_composition.py +6 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/factory.py +7 -4
- scalable_pypeline-2.1.31/pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/pypeline_middleware.py +45 -44
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pypeline_yaml.py +10 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/utils/config_utils.py +13 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/utils/dramatiq_utils.py +9 -5
- scalable_pypeline-2.1.31/pypeline/utils/executable_job_util.py +35 -0
- scalable_pypeline-2.1.31/pypeline/utils/graceful_shutdown_util.py +39 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/PKG-INFO +1 -1
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/SOURCES.txt +5 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/entry_points.txt +3 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/requires.txt +2 -1
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/setup.cfg +3 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/setup.py +6 -5
- scalable-pypeline-2.1.12/pypeline/__init__.py +0 -1
- scalable-pypeline-2.1.12/pypeline/barrier.py +0 -37
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/LICENSE +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/MANIFEST.in +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/README.md +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/extensions.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/flask/__init__.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/flask/api/__init__.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/flask/api/pipelines.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/flask/api/schedules.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/flask/decorators.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/flask/flask_pypeline.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/__init__.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/composition/__init__.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/__init__.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/get_active_worker_id_middleware.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/parallel_pipeline_middleware.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/schedule_config_schema.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/utils/__init__.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/utils/module_utils.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/utils/pipeline_utils.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/utils/schema_utils.py +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/requirements.txt +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/dependency_links.txt +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/top_level.txt +0 -0
- {scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/tests/fixtures/__init__.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.1.31"
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import redis
|
|
3
|
+
from redis.sentinel import Sentinel
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
from pypeline.constants import (
|
|
6
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
7
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
8
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
9
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
10
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
11
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LockingParallelBarrier:
|
|
16
|
+
def __init__(self, redis_url, task_key="task_counter", lock_key="task_lock"):
|
|
17
|
+
# Connect to Redis using the provided URL
|
|
18
|
+
if REDIS_SENTINEL_MASTER_NAME is not None:
|
|
19
|
+
parsed_redis_url = urlparse(redis_url)
|
|
20
|
+
redis_sentinel = Sentinel(
|
|
21
|
+
sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
|
|
22
|
+
)
|
|
23
|
+
self.redis = redis_sentinel.master_for(
|
|
24
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
25
|
+
db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
|
|
26
|
+
password=parsed_redis_url.password,
|
|
27
|
+
socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
28
|
+
socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
29
|
+
retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
30
|
+
socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
31
|
+
health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
32
|
+
decode_responses=True,
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
35
|
+
self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True)
|
|
36
|
+
self.task_key = task_key
|
|
37
|
+
self.lock_key = lock_key
|
|
38
|
+
|
|
39
|
+
def acquire_lock(self, timeout=5):
|
|
40
|
+
"""Acquire a lock using Redis."""
|
|
41
|
+
while True:
|
|
42
|
+
if self.redis.set(self.lock_key, "locked", nx=True, ex=timeout):
|
|
43
|
+
return True
|
|
44
|
+
time.sleep(0.1)
|
|
45
|
+
|
|
46
|
+
def release_lock(self):
|
|
47
|
+
"""Release the lock in Redis."""
|
|
48
|
+
self.redis.delete(self.lock_key)
|
|
49
|
+
|
|
50
|
+
def set_task_count(self, count):
|
|
51
|
+
"""Initialize the task counter in Redis."""
|
|
52
|
+
self.redis.set(self.task_key, count)
|
|
53
|
+
|
|
54
|
+
def decrement_task_count(self):
|
|
55
|
+
"""Decrement the task counter in Redis."""
|
|
56
|
+
return self.redis.decr(self.task_key)
|
|
57
|
+
|
|
58
|
+
def task_exists(self):
|
|
59
|
+
return self.redis.exists(self.task_key)
|
|
60
|
+
|
|
61
|
+
def get_task_count(self):
|
|
62
|
+
"""Get the current value of the task counter."""
|
|
63
|
+
return int(self.redis.get(self.task_key) or 0)
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
"""
|
|
1
|
+
"""Pypeline Constants"""
|
|
3
2
|
|
|
4
3
|
import os
|
|
5
4
|
|
|
@@ -15,6 +14,7 @@ DEFAULT_BROKER_CALLABLE = os.environ.get(
|
|
|
15
14
|
# Pypeline broker connections
|
|
16
15
|
RABBIT_URL = os.environ.get("RABBIT_URL", "amqp://admin:password@127.0.0.1:5672")
|
|
17
16
|
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
|
|
17
|
+
REDIS_SENTINEL_MASTER_NAME = os.environ.get("REDIS_SENTINEL_MASTER_NAME", None)
|
|
18
18
|
|
|
19
19
|
# Pypeline task defaults
|
|
20
20
|
PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL = int(
|
|
@@ -39,8 +39,17 @@ DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT = int(
|
|
|
39
39
|
DEFAULT_BROKER_HEARTBEAT_TIMEOUT = int(
|
|
40
40
|
os.getenv("DEFAULT_BROKER_HEARTBEAT_TIMEOUT", 300000)
|
|
41
41
|
)
|
|
42
|
-
|
|
42
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT = int(
|
|
43
|
+
os.getenv("DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT", 1)
|
|
44
|
+
)
|
|
45
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT = int(os.getenv("DEFAULT_REDIS_SOCKET_TIMEOUT", 2))
|
|
46
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT = bool(os.getenv("DEFAULT_REDIS_RETRY_ON_TIMEOUT", True))
|
|
47
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE = bool(os.getenv("DEFAULT_REDIS_SOCKET_KEEPALIVE", True))
|
|
48
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL = int(
|
|
49
|
+
os.getenv("DEFAULT_REDIS_HEALTH_CHECK_INTERVAL", 30)
|
|
50
|
+
)
|
|
43
51
|
|
|
52
|
+
MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
|
|
44
53
|
MS_IN_SECONDS = 1000
|
|
45
54
|
API_PATH_V1 = "/api/v1"
|
|
46
55
|
|
|
@@ -2,12 +2,13 @@ import copy
|
|
|
2
2
|
import typing
|
|
3
3
|
import pika
|
|
4
4
|
import logging
|
|
5
|
+
import os
|
|
5
6
|
|
|
6
7
|
import click
|
|
7
8
|
from urllib.parse import urlparse
|
|
8
9
|
|
|
9
10
|
from dramatiq.brokers.redis import RedisBroker
|
|
10
|
-
|
|
11
|
+
from redis.sentinel import Sentinel
|
|
11
12
|
from pypeline.extensions import pypeline_config
|
|
12
13
|
from warnings import warn
|
|
13
14
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
|
@@ -29,6 +30,7 @@ from flask.cli import with_appcontext
|
|
|
29
30
|
|
|
30
31
|
from pypeline.constants import (
|
|
31
32
|
REDIS_URL,
|
|
33
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
32
34
|
RABBIT_URL,
|
|
33
35
|
DEFAULT_BROKER_CALLABLE,
|
|
34
36
|
DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
|
@@ -36,6 +38,11 @@ from pypeline.constants import (
|
|
|
36
38
|
DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
|
37
39
|
MESSAGE_BROKER,
|
|
38
40
|
DEFAULT_BROKER_HEARTBEAT_TIMEOUT,
|
|
41
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
42
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
43
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
44
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
45
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
39
46
|
)
|
|
40
47
|
from pypeline.pipelines.middleware.get_active_worker_id_middleware import (
|
|
41
48
|
GetActiveWorkerIdMiddleware,
|
|
@@ -45,6 +52,7 @@ from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
|
|
|
45
52
|
from pypeline.utils.config_utils import (
|
|
46
53
|
retrieve_latest_schedule_config,
|
|
47
54
|
get_service_config_for_worker,
|
|
55
|
+
retrieve_executable_job_config,
|
|
48
56
|
)
|
|
49
57
|
from pypeline.utils.dramatiq_utils import (
|
|
50
58
|
guess_code_directory,
|
|
@@ -52,15 +60,40 @@ from pypeline.utils.dramatiq_utils import (
|
|
|
52
60
|
register_lazy_actor,
|
|
53
61
|
LazyActor,
|
|
54
62
|
)
|
|
63
|
+
from pypeline.utils.graceful_shutdown_util import enable_graceful_shutdown
|
|
55
64
|
from pypeline.utils.module_utils import get_callable
|
|
56
|
-
|
|
65
|
+
from dramatiq.middleware import (
|
|
66
|
+
Retries,
|
|
67
|
+
Callbacks,
|
|
68
|
+
TimeLimit,
|
|
69
|
+
AgeLimit,
|
|
70
|
+
ShutdownNotifications,
|
|
71
|
+
Pipelines,
|
|
72
|
+
)
|
|
57
73
|
|
|
58
74
|
logging.basicConfig(level=logging.INFO)
|
|
59
75
|
logger = logging.getLogger(__name__)
|
|
60
76
|
|
|
61
77
|
|
|
62
78
|
def configure_default_broker(broker: Broker = None):
|
|
63
|
-
|
|
79
|
+
reworked_defaults=[AgeLimit(), TimeLimit(), ShutdownNotifications(), Callbacks(), Pipelines(), Retries()]
|
|
80
|
+
redis_client = None
|
|
81
|
+
if REDIS_SENTINEL_MASTER_NAME is not None:
|
|
82
|
+
parsed_redis_url = urlparse(REDIS_URL)
|
|
83
|
+
redis_sentinel = Sentinel(
|
|
84
|
+
sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
|
|
85
|
+
)
|
|
86
|
+
redis_client = redis_sentinel.master_for(
|
|
87
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
88
|
+
db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
|
|
89
|
+
password=parsed_redis_url.password,
|
|
90
|
+
socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
91
|
+
socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
92
|
+
retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
93
|
+
socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
94
|
+
health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
95
|
+
)
|
|
96
|
+
redis_backend = RedisBackend(client=redis_client, url=REDIS_URL)
|
|
64
97
|
|
|
65
98
|
if MESSAGE_BROKER == "RABBITMQ":
|
|
66
99
|
parsed_url = urlparse(RABBIT_URL)
|
|
@@ -75,6 +108,7 @@ def configure_default_broker(broker: Broker = None):
|
|
|
75
108
|
heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
|
76
109
|
connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
|
77
110
|
blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
|
111
|
+
middleware=reworked_defaults
|
|
78
112
|
)
|
|
79
113
|
)
|
|
80
114
|
|
|
@@ -83,22 +117,33 @@ def configure_default_broker(broker: Broker = None):
|
|
|
83
117
|
broker
|
|
84
118
|
if broker is not None
|
|
85
119
|
else RedisBroker(
|
|
86
|
-
|
|
120
|
+
client=redis_client,
|
|
121
|
+
url=REDIS_URL,
|
|
122
|
+
heartbeat_timeout=DEFAULT_BROKER_HEARTBEAT_TIMEOUT,
|
|
123
|
+
middleware=reworked_defaults
|
|
87
124
|
)
|
|
88
125
|
)
|
|
89
126
|
|
|
90
127
|
broker.add_middleware(Results(backend=redis_backend))
|
|
91
128
|
broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
|
|
92
129
|
broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
|
|
93
|
-
broker.add_middleware(CurrentMessage())
|
|
94
130
|
broker.add_middleware(GetActiveWorkerIdMiddleware())
|
|
131
|
+
broker.add_middleware(CurrentMessage())
|
|
132
|
+
if (
|
|
133
|
+
os.getenv("RESTRICT_WORKER_SHUTDOWN_WHILE_JOBS_RUNNING", "false").lower()
|
|
134
|
+
== "true"
|
|
135
|
+
):
|
|
136
|
+
enable_graceful_shutdown(broker=broker, redis_url=REDIS_URL)
|
|
137
|
+
|
|
95
138
|
register_actors_for_workers(broker)
|
|
139
|
+
|
|
96
140
|
set_broker(broker)
|
|
97
141
|
|
|
98
142
|
|
|
99
143
|
def register_actors_for_workers(broker: Broker):
|
|
100
144
|
service = get_service_config_for_worker(pypeline_config)
|
|
101
145
|
scheduled_jobs_config = retrieve_latest_schedule_config()
|
|
146
|
+
executable_jobs_config = retrieve_executable_job_config()
|
|
102
147
|
|
|
103
148
|
if not service:
|
|
104
149
|
return
|
|
@@ -147,6 +192,18 @@ def register_actors_for_workers(broker: Broker):
|
|
|
147
192
|
except Exception as e:
|
|
148
193
|
logger.exception(f"Unable to add a task to dramatiq: {e}")
|
|
149
194
|
|
|
195
|
+
for job in executable_jobs_config or []:
|
|
196
|
+
config = job["config"]
|
|
197
|
+
if config["task"] in worker_registered_tasks:
|
|
198
|
+
pipeline_meta = {"queue": config.get("queue", "default")}
|
|
199
|
+
try:
|
|
200
|
+
tmp_handler = get_callable(config["task"])
|
|
201
|
+
if pipeline_meta and pipeline_meta.get("maxRetry", 0) >= 0:
|
|
202
|
+
pipeline_meta["store_results"] = True
|
|
203
|
+
_ = register_lazy_actor(broker, tmp_handler, pipeline_meta, None)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.exception(f"Unable to add a task to dramatiq: {e}")
|
|
206
|
+
|
|
150
207
|
|
|
151
208
|
class Dramatiq:
|
|
152
209
|
"""Flask extension bridging Dramatiq broker and Flask app.
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from marshmallow import Schema, EXCLUDE, fields
|
|
2
|
+
|
|
3
|
+
class ExecutableJobConfigSchema(Schema):
|
|
4
|
+
queue = fields.String(
|
|
5
|
+
required=True,
|
|
6
|
+
description="Name of queue on which to place task.",
|
|
7
|
+
example="my-default-queue",
|
|
8
|
+
)
|
|
9
|
+
task = fields.String(
|
|
10
|
+
required=True,
|
|
11
|
+
description="Path to task to invoke.",
|
|
12
|
+
example="my_app.module.method",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
class ExecutableJobSchema(Schema):
|
|
16
|
+
"""Definition of a single schedule entry"""
|
|
17
|
+
class Meta:
|
|
18
|
+
unknown = EXCLUDE
|
|
19
|
+
|
|
20
|
+
name = fields.String(
|
|
21
|
+
required=True,
|
|
22
|
+
description="Name of schedule entry.",
|
|
23
|
+
example="My Scheduled Task",
|
|
24
|
+
)
|
|
25
|
+
schemaVersion = fields.Integer(required=True)
|
|
26
|
+
config = fields.Dict(required=True)
|
|
27
|
+
enabled = fields.Boolean(
|
|
28
|
+
required=True, description="Whether entry is enabled.", example=True
|
|
29
|
+
)
|
|
30
|
+
config = fields.Nested(
|
|
31
|
+
ExecutableJobConfigSchema,
|
|
32
|
+
required=True,
|
|
33
|
+
description="Configuration information for this job.",
|
|
34
|
+
)
|
|
35
|
+
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
import argparse
|
|
4
|
+
import threading
|
|
5
|
+
import multiprocessing as mp
|
|
6
|
+
|
|
7
|
+
# Prefer 'spawn' for user code using multiprocessing
|
|
8
|
+
if mp.get_start_method(allow_none=True) != "spawn":
|
|
9
|
+
mp.set_start_method("spawn", force=True)
|
|
10
|
+
|
|
11
|
+
# Avoid staging more than one message; must be set before Dramatiq import path runs
|
|
12
|
+
os.environ.setdefault("dramatiq_queue_prefetch", "1")
|
|
13
|
+
|
|
14
|
+
from dramatiq import Worker, get_broker, set_broker
|
|
15
|
+
from dramatiq.middleware import Middleware
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
# If your project exposes a helper to configure the default broker, use it.
|
|
20
|
+
from pypeline.dramatiq import configure_default_broker # adjust import if needed
|
|
21
|
+
|
|
22
|
+
broker = configure_default_broker() or get_broker()
|
|
23
|
+
set_broker(broker)
|
|
24
|
+
except Exception:
|
|
25
|
+
# Fall back to whatever Dramatiq has as the active broker.
|
|
26
|
+
import pypeline.dramatiq # noqa: F401 (ensure module side-effects run)
|
|
27
|
+
|
|
28
|
+
broker = get_broker()
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class OneAndDone(Middleware):
|
|
32
|
+
"""
|
|
33
|
+
Signals when the first message starts ('got_work') and completes ('done').
|
|
34
|
+
If stop_on_failure=True, we'll also mark done after the first failure.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
got_work: threading.Event,
|
|
40
|
+
done: threading.Event,
|
|
41
|
+
*,
|
|
42
|
+
stop_on_failure: bool = False
|
|
43
|
+
):
|
|
44
|
+
self.got_work = got_work
|
|
45
|
+
self.done = done
|
|
46
|
+
self.stop_on_failure = stop_on_failure
|
|
47
|
+
|
|
48
|
+
def before_process_message(self, broker, message):
|
|
49
|
+
# First time we see a message begin processing in this process
|
|
50
|
+
if not self.got_work.is_set():
|
|
51
|
+
self.got_work.set()
|
|
52
|
+
|
|
53
|
+
def after_process_message(self, broker, message, *, result=None, exception=None):
|
|
54
|
+
# On success (or also on failure if configured), finish this worker
|
|
55
|
+
if exception is None or self.stop_on_failure:
|
|
56
|
+
if not self.done.is_set():
|
|
57
|
+
self.done.set()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _graceful_stop(worker: Worker, log: logging.Logger):
|
|
61
|
+
try:
|
|
62
|
+
log.info("Stopping dramatiq worker...")
|
|
63
|
+
worker.stop() # stop consumers; no new messages will start
|
|
64
|
+
worker.join()
|
|
65
|
+
log.info("Worker stopped.")
|
|
66
|
+
except Exception as e:
|
|
67
|
+
log.exception("Error stopping worker: %s", e)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _close_broker(log: logging.Logger):
|
|
71
|
+
try:
|
|
72
|
+
b = get_broker()
|
|
73
|
+
if b is not None and hasattr(b, "close"):
|
|
74
|
+
b.close()
|
|
75
|
+
log.info("Broker closed.")
|
|
76
|
+
except Exception as e:
|
|
77
|
+
log.exception("Error closing broker: %s", e)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def job_runner(queues, idle_timeout_ms: int = 0, *, stop_on_failure: bool = False):
|
|
81
|
+
"""
|
|
82
|
+
Start a single-thread Dramatiq worker. Behavior:
|
|
83
|
+
- Wait up to `idle_timeout_ms` for *a job to start* (time-to-first-job).
|
|
84
|
+
- Once a job begins, wait indefinitely for it to complete.
|
|
85
|
+
- After the first successful job completes (or first job, if stop_on_failure=True), stop and exit.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
queues (list[str]): queues to listen to
|
|
89
|
+
idle_timeout_ms (int): <=0 => wait forever for first job; >0 => exit if no job starts in time
|
|
90
|
+
stop_on_failure (bool): if True, exit after first job even if it fails
|
|
91
|
+
"""
|
|
92
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
|
93
|
+
log = logging.getLogger("oneshot")
|
|
94
|
+
|
|
95
|
+
# Normalize timeout (treat non-positive as "infinite")
|
|
96
|
+
timeout_ms = (
|
|
97
|
+
int(idle_timeout_ms) if idle_timeout_ms and int(idle_timeout_ms) > 0 else 0
|
|
98
|
+
)
|
|
99
|
+
log.info(
|
|
100
|
+
"Launching worker with queues=%s, idle_timeout_ms=%s", queues, timeout_ms or "∞"
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
got_work = threading.Event()
|
|
104
|
+
done = threading.Event()
|
|
105
|
+
broker.add_middleware(OneAndDone(got_work, done, stop_on_failure=stop_on_failure))
|
|
106
|
+
|
|
107
|
+
worker = Worker(
|
|
108
|
+
broker,
|
|
109
|
+
worker_threads=1, # strictly one at a time
|
|
110
|
+
queues=queues,
|
|
111
|
+
worker_timeout=1000, # ms; how often the worker checks for stop
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
worker.start()
|
|
115
|
+
|
|
116
|
+
def controller():
|
|
117
|
+
log.debug("Controller thread started.")
|
|
118
|
+
try:
|
|
119
|
+
# Phase 1: Wait for *first job to start*
|
|
120
|
+
if timeout_ms > 0:
|
|
121
|
+
started = got_work.wait(timeout_ms / 1000.0)
|
|
122
|
+
if not started:
|
|
123
|
+
log.info(
|
|
124
|
+
"Idle timeout reached (%d ms); no jobs started. Stopping worker.",
|
|
125
|
+
timeout_ms,
|
|
126
|
+
)
|
|
127
|
+
return
|
|
128
|
+
else:
|
|
129
|
+
got_work.wait()
|
|
130
|
+
|
|
131
|
+
log.info("First job started; waiting for it to finish...")
|
|
132
|
+
# Phase 2: Wait for the first job to complete (no timeout)
|
|
133
|
+
done.wait()
|
|
134
|
+
log.info("First job finished; shutting down.")
|
|
135
|
+
finally:
|
|
136
|
+
_graceful_stop(worker, log)
|
|
137
|
+
_close_broker(log)
|
|
138
|
+
# Hard-exit to ensure K8s Job is marked Succeeded promptly, no lingering threads.
|
|
139
|
+
os._exit(0)
|
|
140
|
+
|
|
141
|
+
t = threading.Thread(target=controller, name="oneshot-controller", daemon=False)
|
|
142
|
+
t.start()
|
|
143
|
+
t.join() # Block until controller completes (which shuts everything down)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _parse_args(argv=None):
|
|
147
|
+
ap = argparse.ArgumentParser(description="Run a one-shot Dramatiq worker.")
|
|
148
|
+
ap.add_argument(
|
|
149
|
+
"-q",
|
|
150
|
+
"--queue",
|
|
151
|
+
action="append",
|
|
152
|
+
default=None,
|
|
153
|
+
help="Queue to listen to (repeatable). You can also pass a comma-separated list.",
|
|
154
|
+
)
|
|
155
|
+
ap.add_argument(
|
|
156
|
+
"--idle-timeout-ms",
|
|
157
|
+
type=int,
|
|
158
|
+
default=int(os.getenv("IDLE_TIMEOUT_MS", "0")),
|
|
159
|
+
help="Exit if no job starts within this time (<=0 = wait forever).",
|
|
160
|
+
)
|
|
161
|
+
ap.add_argument(
|
|
162
|
+
"--stop-on-failure",
|
|
163
|
+
action="store_true",
|
|
164
|
+
help="Exit after the first job even if it fails.",
|
|
165
|
+
)
|
|
166
|
+
return ap.parse_args(argv)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def main(argv=None):
|
|
170
|
+
args = _parse_args(argv)
|
|
171
|
+
|
|
172
|
+
# Build queue list from flags or env, support comma-separated entries.
|
|
173
|
+
raw_entries = (
|
|
174
|
+
args.queue if args.queue else [os.getenv("JOB_QUEUE", "pipeline-queue")]
|
|
175
|
+
)
|
|
176
|
+
queues = []
|
|
177
|
+
for entry in raw_entries:
|
|
178
|
+
queues.extend([q.strip() for q in str(entry).split(",") if q and q.strip()])
|
|
179
|
+
|
|
180
|
+
if not queues:
|
|
181
|
+
raise SystemExit("No queues provided. Use -q ... or set JOB_QUEUE.")
|
|
182
|
+
|
|
183
|
+
logging.basicConfig(level=logging.INFO, format="%(levelname)s - %(message)s")
|
|
184
|
+
log = logging.getLogger("oneshot")
|
|
185
|
+
|
|
186
|
+
pid = os.getpid()
|
|
187
|
+
ppid = os.getppid()
|
|
188
|
+
log.info(
|
|
189
|
+
"Starting one-shot worker PID=%s, Parent PID=%s, queues=%s, idle_timeout_ms=%s, stop_on_failure=%s",
|
|
190
|
+
pid,
|
|
191
|
+
ppid,
|
|
192
|
+
queues,
|
|
193
|
+
args.idle_timeout_ms if args.idle_timeout_ms > 0 else "∞",
|
|
194
|
+
args.stop_on_failure,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
job_runner(
|
|
198
|
+
queues,
|
|
199
|
+
idle_timeout_ms=args.idle_timeout_ms,
|
|
200
|
+
stop_on_failure=args.stop_on_failure,
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
if __name__ == "__main__":
|
|
205
|
+
main()
|
|
@@ -119,10 +119,9 @@ class TaskDefinitionsSchemaV1(ExcludeUnknownSchema):
|
|
|
119
119
|
|
|
120
120
|
serverType = fields.String(
|
|
121
121
|
required=False,
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
),
|
|
122
|
+
description="Recommended presets are listed in enum; custom strings are allowed.",
|
|
123
|
+
example="m",
|
|
124
|
+
metadata={"enum": ["xs", "s", "m", "l", "xl", "xxl", "xxxl", "cpu-xl"]}, # docs only
|
|
126
125
|
)
|
|
127
126
|
|
|
128
127
|
|
|
@@ -151,10 +150,9 @@ class TaskDefinitionsSchemaV2(ExcludeUnknownSchema):
|
|
|
151
150
|
|
|
152
151
|
serverType = fields.String(
|
|
153
152
|
required=False,
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
),
|
|
153
|
+
description="Recommended presets are listed in enum; custom strings are allowed.",
|
|
154
|
+
example="m",
|
|
155
|
+
metadata={"enum": ["xs", "s", "m", "l", "xl", "xxl", "xxxl", "cpu-xl"]}, # docs only
|
|
158
156
|
)
|
|
159
157
|
|
|
160
158
|
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from datetime import date
|
|
2
|
+
|
|
1
3
|
from marshmallow import Schema, fields, validate, ValidationError, validates_schema, INCLUDE
|
|
2
4
|
|
|
3
5
|
|
|
@@ -34,6 +36,7 @@ def create_pipeline_settings_schema(pipeline_settings_schema_data):
|
|
|
34
36
|
"datetime": fields.DateTime,
|
|
35
37
|
"array": fields.List,
|
|
36
38
|
"object": fields.Nested,
|
|
39
|
+
"date": fields.Date,
|
|
37
40
|
}.get(data_type)
|
|
38
41
|
|
|
39
42
|
if not field_type:
|
|
@@ -50,6 +53,7 @@ def create_pipeline_settings_schema(pipeline_settings_schema_data):
|
|
|
50
53
|
"float": fields.Float,
|
|
51
54
|
"boolean": fields.Boolean,
|
|
52
55
|
"datetime": fields.DateTime,
|
|
56
|
+
"date": fields.Date,
|
|
53
57
|
}.get(element_type)
|
|
54
58
|
if not field_args["cls_or_instance"]:
|
|
55
59
|
raise ValidationError(
|
|
@@ -119,6 +123,7 @@ def create_pipeline_settings_schema(pipeline_settings_schema_data):
|
|
|
119
123
|
"float": fields.Float,
|
|
120
124
|
"boolean": fields.Boolean,
|
|
121
125
|
"datetime": fields.DateTime,
|
|
126
|
+
"date": fields.Date,
|
|
122
127
|
"array": fields.List,
|
|
123
128
|
"object": fields.Nested,
|
|
124
129
|
}.get(data_type)
|
|
@@ -137,6 +142,7 @@ def create_pipeline_settings_schema(pipeline_settings_schema_data):
|
|
|
137
142
|
"float": fields.Float,
|
|
138
143
|
"boolean": fields.Boolean,
|
|
139
144
|
"datetime": fields.DateTime,
|
|
145
|
+
"date": fields.Date,
|
|
140
146
|
}.get(element_type)
|
|
141
147
|
if not field_args["cls_or_instance"]:
|
|
142
148
|
raise ValidationError(
|
|
@@ -229,7 +235,7 @@ class SettingSchema(Schema):
|
|
|
229
235
|
dataType = fields.String(
|
|
230
236
|
required=True,
|
|
231
237
|
validate=validate.OneOf(
|
|
232
|
-
["string", "int", "float", "boolean", "datetime", "array", "object"]
|
|
238
|
+
["string", "int", "float", "boolean", "datetime", "array", "object", "date"]
|
|
233
239
|
),
|
|
234
240
|
metadata={"description": "The underlying data type of the setting"},
|
|
235
241
|
)
|
|
@@ -317,6 +323,13 @@ class SettingSchema(Schema):
|
|
|
317
323
|
raise ValidationError(
|
|
318
324
|
f"Option value `{value}` must be an ISO 8601 string for `datetime`."
|
|
319
325
|
)
|
|
326
|
+
elif data_type == "date":
|
|
327
|
+
try:
|
|
328
|
+
date.fromisoformat(value)
|
|
329
|
+
except Exception:
|
|
330
|
+
raise ValidationError(
|
|
331
|
+
f"Option value `{value}` must be an ISO 8601 string for `date`."
|
|
332
|
+
)
|
|
320
333
|
|
|
321
334
|
@validates_schema
|
|
322
335
|
def validate_search_endpoint(self, data, **kwargs):
|
|
@@ -452,6 +465,7 @@ class PipelineScenarioSchema(Schema):
|
|
|
452
465
|
"produce alternative calculations and or results."
|
|
453
466
|
},
|
|
454
467
|
)
|
|
468
|
+
execution_id = fields.String(required=False, metadata={"description":"Execution id for a known scenario"})
|
|
455
469
|
|
|
456
470
|
|
|
457
471
|
class PipelineScenariosSchema(Schema):
|
|
@@ -5,12 +5,22 @@ import json
|
|
|
5
5
|
import time
|
|
6
6
|
import typing
|
|
7
7
|
from uuid import uuid4
|
|
8
|
+
from urllib.parse import urlparse
|
|
8
9
|
|
|
9
10
|
from dramatiq.broker import get_broker
|
|
10
11
|
from dramatiq.results import ResultMissing
|
|
11
12
|
from db_medley.redis_conf import RedisConnector
|
|
12
13
|
from redis.exceptions import RedisError
|
|
13
|
-
|
|
14
|
+
from redis.sentinel import Sentinel
|
|
15
|
+
from pypeline.constants import (
|
|
16
|
+
REDIS_URL,
|
|
17
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
18
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
19
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
20
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
21
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
22
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
23
|
+
)
|
|
14
24
|
from pypeline.barrier import LockingParallelBarrier
|
|
15
25
|
from pypeline.constants import DEFAULT_RESULT_TTL
|
|
16
26
|
from pypeline.dramatiq import REDIS_URL
|
|
@@ -240,9 +250,26 @@ class PipelineResult:
|
|
|
240
250
|
self.pipeline: parallel_pipeline = None
|
|
241
251
|
self.execution_id = execution_id
|
|
242
252
|
self.redis_key = f"{execution_id}-results-key"
|
|
243
|
-
self.redis_conn = RedisConnector().get_connection()
|
|
244
253
|
self.result_ttl = result_ttl
|
|
245
254
|
|
|
255
|
+
if REDIS_SENTINEL_MASTER_NAME is not None:
|
|
256
|
+
parsed_redis_url = urlparse(REDIS_URL)
|
|
257
|
+
redis_sentinel = Sentinel(
|
|
258
|
+
sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
|
|
259
|
+
)
|
|
260
|
+
self.redis_conn = redis_sentinel.master_for(
|
|
261
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
262
|
+
db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
|
|
263
|
+
password=parsed_redis_url.password,
|
|
264
|
+
socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
265
|
+
socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
266
|
+
retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
267
|
+
socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
268
|
+
health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
269
|
+
)
|
|
270
|
+
else:
|
|
271
|
+
self.redis_conn = RedisConnector().get_connection()
|
|
272
|
+
|
|
246
273
|
def create_result_entry(self, pipeline_json_str: str):
|
|
247
274
|
"""
|
|
248
275
|
Store the serialized pipeline data in Redis with a specified TTL.
|
|
@@ -86,6 +86,10 @@ class Pypeline:
|
|
|
86
86
|
)
|
|
87
87
|
message = lazy_actor.message()
|
|
88
88
|
message.options["pipeline"] = pipeline
|
|
89
|
+
if pipeline_config["metadata"].get("maxRetry", None) is not None:
|
|
90
|
+
message.options["max_retries"] = pipeline_config["metadata"][
|
|
91
|
+
"maxRetry"
|
|
92
|
+
]
|
|
89
93
|
message.options["task_replacements"] = copy(
|
|
90
94
|
scenario["taskReplacements"]
|
|
91
95
|
)
|
|
@@ -121,6 +125,8 @@ class Pypeline:
|
|
|
121
125
|
)
|
|
122
126
|
message = lazy_actor.message()
|
|
123
127
|
message.options["pipeline"] = pipeline
|
|
128
|
+
if pipeline_config["metadata"].get("maxRetry", None) is not None:
|
|
129
|
+
message.options["max_retries"] = pipeline_config["metadata"]["maxRetry"]
|
|
124
130
|
message.options["task_replacements"] = first_scenario_task_replacements
|
|
125
131
|
message.options["execution_id"] = base_case_execution_id
|
|
126
132
|
message.options["task_name"] = first_task
|
|
@@ -70,13 +70,16 @@ def dag_generator(
|
|
|
70
70
|
)
|
|
71
71
|
registered_actors[task] = lazy_actor
|
|
72
72
|
if args and not kwargs:
|
|
73
|
-
|
|
73
|
+
msg = registered_actors[task].message(*args)
|
|
74
74
|
elif kwargs and not args:
|
|
75
|
-
|
|
75
|
+
msg = registered_actors[task].message(**kwargs)
|
|
76
76
|
elif args and kwargs:
|
|
77
|
-
|
|
77
|
+
msg = registered_actors[task].message(*args, **kwargs)
|
|
78
78
|
else:
|
|
79
|
-
|
|
79
|
+
msg = registered_actors[task].message()
|
|
80
|
+
msg.options["task_ttl"] = pipeline_config["metadata"]["maxTtl"]
|
|
81
|
+
message_group.append(msg)
|
|
82
|
+
|
|
80
83
|
messages.append(message_group)
|
|
81
84
|
p = parallel_pipeline(messages)
|
|
82
85
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import socket
|
|
3
|
+
import logging
|
|
4
|
+
import redis
|
|
5
|
+
|
|
6
|
+
from dramatiq.middleware import Middleware
|
|
7
|
+
from tenacity import retry, stop_after_attempt, wait_exponential, after_log
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class GraceFulShutdownMiddleware(Middleware):
|
|
13
|
+
def __init__(self, redis_url, key_prefix="busy"):
|
|
14
|
+
self.redis = redis.Redis.from_url(redis_url)
|
|
15
|
+
self.hostname = socket.gethostname()
|
|
16
|
+
self.pid = os.getpid()
|
|
17
|
+
self.key_prefix = key_prefix
|
|
18
|
+
self.key = f"{self.key_prefix}:{self.hostname}-{self.pid}"
|
|
19
|
+
|
|
20
|
+
@retry(
|
|
21
|
+
stop=stop_after_attempt(3),
|
|
22
|
+
wait=wait_exponential(multiplier=2, min=2, max=10),
|
|
23
|
+
after=after_log(logger, logging.WARNING),
|
|
24
|
+
reraise=True,
|
|
25
|
+
)
|
|
26
|
+
def _set_busy_flag(self, message_ttl):
|
|
27
|
+
self.redis.set(self.key, "1", ex=message_ttl)
|
|
28
|
+
logger.debug(f"[GracefulShutdownMiddleware] Set busy flag: {self.key}")
|
|
29
|
+
|
|
30
|
+
@retry(
|
|
31
|
+
stop=stop_after_attempt(3),
|
|
32
|
+
wait=wait_exponential(multiplier=2, min=2, max=10),
|
|
33
|
+
after=after_log(logger, logging.WARNING),
|
|
34
|
+
reraise=True,
|
|
35
|
+
)
|
|
36
|
+
def _clear_busy_flag(self):
|
|
37
|
+
self.redis.delete(self.key)
|
|
38
|
+
logger.debug(f"[GracefulShutdownMiddleware] Cleared busy flag: {self.key}")
|
|
39
|
+
|
|
40
|
+
def before_process_message(self, broker, message):
|
|
41
|
+
try:
|
|
42
|
+
self._set_busy_flag(message_ttl=message.options["task_ttl"])
|
|
43
|
+
except Exception as e:
|
|
44
|
+
logger.error(f"[GracefulShutdownMiddleware] Failed to set busy flag: {e}")
|
|
45
|
+
|
|
46
|
+
def after_process_message(self, broker, message, *, result=None, exception=None):
|
|
47
|
+
try:
|
|
48
|
+
self._clear_busy_flag()
|
|
49
|
+
except Exception as e:
|
|
50
|
+
logger.error(f"[GracefulShutdownMiddleware] Failed to clear busy flag: {e}")
|
|
@@ -24,6 +24,7 @@ class PypelineMiddleware(Middleware):
|
|
|
24
24
|
return
|
|
25
25
|
|
|
26
26
|
pipeline = message.options["pipeline"]
|
|
27
|
+
max_retries = message.options.get("max_retries", None)
|
|
27
28
|
pipeline_config = pipeline["config"]
|
|
28
29
|
task_replacements = message.options["task_replacements"]
|
|
29
30
|
execution_id = message.options["execution_id"]
|
|
@@ -45,78 +46,74 @@ class PypelineMiddleware(Middleware):
|
|
|
45
46
|
|
|
46
47
|
graph = get_execution_graph(pipeline_config)
|
|
47
48
|
children_tasks = pipeline_config["dagAdjacency"].get(task_name, [])
|
|
48
|
-
|
|
49
49
|
messages = []
|
|
50
50
|
for child in children_tasks:
|
|
51
51
|
child_ancestors = sorted(graph.predecessors(child))
|
|
52
|
+
child_ancestors_complete = {a: False for a in child_ancestors}
|
|
52
53
|
|
|
53
|
-
|
|
54
|
+
for scenario in message.options["scenarios"]:
|
|
55
|
+
if scenario["execution_id"] == execution_id:
|
|
56
|
+
tasks_to_run_in_scenario = scenario["tasksToRunInScenario"]
|
|
54
57
|
|
|
55
58
|
for ancestor in child_ancestors:
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
)
|
|
63
|
-
try:
|
|
64
|
-
locking_parallel_barrier.acquire_lock(
|
|
65
|
-
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
|
59
|
+
if ancestor in tasks_to_run_in_scenario:
|
|
60
|
+
current_scenario_ancestor_task_key = f"{execution_id}-{ancestor}"
|
|
61
|
+
locking_parallel_barrier = LockingParallelBarrier(
|
|
62
|
+
self.redis_url,
|
|
63
|
+
task_key=current_scenario_ancestor_task_key,
|
|
64
|
+
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
|
66
65
|
)
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
ancestor_task_key = (
|
|
66
|
+
try:
|
|
67
|
+
locking_parallel_barrier.acquire_lock(
|
|
68
|
+
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
|
69
|
+
)
|
|
70
|
+
if not locking_parallel_barrier.task_exists():
|
|
71
|
+
child_ancestors_complete[ancestor] = False
|
|
72
|
+
elif locking_parallel_barrier.get_task_count() <= 0:
|
|
73
|
+
child_ancestors_complete[ancestor] = True
|
|
74
|
+
finally:
|
|
75
|
+
locking_parallel_barrier.release_lock()
|
|
76
|
+
else:
|
|
77
|
+
base_scenario_ancestor_task_key = (
|
|
80
78
|
f"{message.options['base_case_execution_id']}-{ancestor}"
|
|
81
79
|
)
|
|
82
|
-
|
|
83
80
|
locking_parallel_barrier = LockingParallelBarrier(
|
|
84
81
|
self.redis_url,
|
|
85
|
-
task_key=
|
|
82
|
+
task_key=base_scenario_ancestor_task_key,
|
|
86
83
|
lock_key=f"{message.options['base_case_execution_id']}-lock",
|
|
87
84
|
)
|
|
88
85
|
try:
|
|
89
86
|
locking_parallel_barrier.acquire_lock(
|
|
90
87
|
timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
|
|
91
88
|
)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
raise Exception(
|
|
97
|
-
f"Parent task {ancestor} barrier lock not found"
|
|
98
|
-
)
|
|
89
|
+
if not locking_parallel_barrier.task_exists():
|
|
90
|
+
child_ancestors_complete[ancestor] = False
|
|
91
|
+
elif locking_parallel_barrier.get_task_count() <= 0:
|
|
92
|
+
child_ancestors_complete[ancestor] = True
|
|
99
93
|
finally:
|
|
100
94
|
locking_parallel_barrier.release_lock()
|
|
101
|
-
if remaining_tasks is None or remaining_tasks >= 1:
|
|
102
|
-
ancestor_tasks_complete = False
|
|
103
|
-
break
|
|
104
95
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
break
|
|
96
|
+
if any(complete is False for complete in child_ancestors_complete.values()):
|
|
97
|
+
continue
|
|
108
98
|
|
|
109
|
-
# Handle situation where base case kicks off new scenario
|
|
110
99
|
if (
|
|
111
100
|
message.options["base_case_execution_id"]
|
|
112
101
|
== message.options["execution_id"]
|
|
113
102
|
):
|
|
114
103
|
for scenario in message.options["scenarios"]:
|
|
115
|
-
|
|
104
|
+
child_ancestors = list(graph.predecessors(child))
|
|
105
|
+
child_has_other_ancestors_in_scenario = False
|
|
106
|
+
|
|
107
|
+
for ancestor in child_ancestors:
|
|
108
|
+
if ancestor in scenario["tasksToRunInScenario"]:
|
|
109
|
+
child_has_other_ancestors_in_scenario = True
|
|
110
|
+
break
|
|
111
|
+
|
|
116
112
|
if (
|
|
117
113
|
child in scenario["tasksToRunInScenario"]
|
|
118
|
-
and task_name in
|
|
114
|
+
and task_name in child_ancestors
|
|
119
115
|
and task_name not in scenario["tasksToRunInScenario"]
|
|
116
|
+
and not child_has_other_ancestors_in_scenario
|
|
120
117
|
):
|
|
121
118
|
task_key = f"{scenario['execution_id']}-{child}"
|
|
122
119
|
locking_parallel_barrier = LockingParallelBarrier(
|
|
@@ -138,6 +135,8 @@ class PypelineMiddleware(Middleware):
|
|
|
138
135
|
)
|
|
139
136
|
scenario_message = lazy_actor.message()
|
|
140
137
|
scenario_message.options["pipeline"] = pipeline
|
|
138
|
+
if max_retries is not None:
|
|
139
|
+
scenario_message.options["max_retries"] = max_retries
|
|
141
140
|
scenario_message.options["task_replacements"] = (
|
|
142
141
|
task_replacements
|
|
143
142
|
)
|
|
@@ -161,7 +160,7 @@ class PypelineMiddleware(Middleware):
|
|
|
161
160
|
)
|
|
162
161
|
messages.append(scenario_message)
|
|
163
162
|
|
|
164
|
-
#
|
|
163
|
+
# If we've made it here all ancestors of this child are complete, and it's time to run.
|
|
165
164
|
task_key = f"{execution_id}-{child}"
|
|
166
165
|
locking_parallel_barrier = LockingParallelBarrier(
|
|
167
166
|
self.redis_url,
|
|
@@ -182,6 +181,8 @@ class PypelineMiddleware(Middleware):
|
|
|
182
181
|
|
|
183
182
|
child_message = lazy_actor.message()
|
|
184
183
|
child_message.options["pipeline"] = pipeline
|
|
184
|
+
if max_retries is not None:
|
|
185
|
+
child_message.options["max_retries"] = max_retries
|
|
185
186
|
child_message.options["task_replacements"] = task_replacements
|
|
186
187
|
child_message.options["execution_id"] = execution_id
|
|
187
188
|
child_message.options["task_name"] = child
|
|
@@ -54,6 +54,8 @@ import yaml
|
|
|
54
54
|
from yaml.loader import SafeLoader
|
|
55
55
|
from marshmallow import Schema, fields, pre_load, EXCLUDE, INCLUDE, validates_schema
|
|
56
56
|
from marshmallow.exceptions import ValidationError
|
|
57
|
+
|
|
58
|
+
from pypeline.executable_job_config_schema import ExecutableJobSchema
|
|
57
59
|
from pypeline.utils.module_utils import PypelineModuleLoader, normalized_pkg_name
|
|
58
60
|
from pypeline.constants import PYPELINE_YAML_PATH, PYPELINE_CLIENT_PKG_NAME
|
|
59
61
|
from pypeline.pipeline_config_schema import BasePipelineSchema
|
|
@@ -165,6 +167,14 @@ class PypelineYamlSchema(ExcludeUnknownSchema):
|
|
|
165
167
|
required=False,
|
|
166
168
|
)
|
|
167
169
|
|
|
170
|
+
executableJobs = fields.Dict(
|
|
171
|
+
keys=fields.String(),
|
|
172
|
+
values=fields.Nested(ExecutableJobSchema),
|
|
173
|
+
description="List of executable jobs",
|
|
174
|
+
required=False,
|
|
175
|
+
allow_none=True,
|
|
176
|
+
)
|
|
177
|
+
|
|
168
178
|
def validate_errors(self, schema: Schema, value: dict):
|
|
169
179
|
"""Run Marshmallow validate() and raise if any errors"""
|
|
170
180
|
schema = schema()
|
|
@@ -41,6 +41,19 @@ def retrieve_latest_schedule_config():
|
|
|
41
41
|
return None
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
def retrieve_executable_job_config():
|
|
45
|
+
pypeline_config = load_pypeline_config()
|
|
46
|
+
|
|
47
|
+
if not pypeline_config:
|
|
48
|
+
return None
|
|
49
|
+
if "executableJobs" in pypeline_config:
|
|
50
|
+
tasks = []
|
|
51
|
+
for task_id, config in pypeline_config["executableJobs"].items():
|
|
52
|
+
tasks.append(config)
|
|
53
|
+
return tasks
|
|
54
|
+
return None
|
|
55
|
+
|
|
56
|
+
|
|
44
57
|
def get_service_config_for_worker(
|
|
45
58
|
pypeline_config: dict, worker_name: str = None
|
|
46
59
|
) -> Union[dict, None]:
|
|
@@ -113,11 +113,15 @@ class LazyActor(object):
|
|
|
113
113
|
return getattr(self.actor, name)
|
|
114
114
|
|
|
115
115
|
def register(self, broker):
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
116
|
+
actor_name = f"{self.fn.__module__}.{self.fn.__name__}-{self.kw['queue_name']}"
|
|
117
|
+
if actor_name in broker.actors:
|
|
118
|
+
self.actor = broker.actors[actor_name]
|
|
119
|
+
else:
|
|
120
|
+
self.actor = register_actor(
|
|
121
|
+
actor_name=actor_name,
|
|
122
|
+
broker=broker,
|
|
123
|
+
**self.kw,
|
|
124
|
+
)(ensure_return_value(default_value=True)(self.fn))
|
|
121
125
|
|
|
122
126
|
# Next is regular actor API.
|
|
123
127
|
def send(self, *a, **kw):
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from dramatiq.broker import get_broker
|
|
2
|
+
|
|
3
|
+
from pypeline.utils.config_utils import retrieve_executable_job_config
|
|
4
|
+
from pypeline.utils.dramatiq_utils import register_lazy_actor, LazyActor
|
|
5
|
+
from pypeline.utils.module_utils import get_callable
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def execute_job(fn, *args, **kwargs):
|
|
9
|
+
executable_jobs_config = retrieve_executable_job_config()
|
|
10
|
+
|
|
11
|
+
module_path = kwargs.get("module_path", None)
|
|
12
|
+
|
|
13
|
+
job = None
|
|
14
|
+
|
|
15
|
+
for j in executable_jobs_config or []:
|
|
16
|
+
if module_path and module_path == j["config"]["task"]:
|
|
17
|
+
job = j
|
|
18
|
+
break
|
|
19
|
+
elif fn.__name__ in j["config"]["task"]:
|
|
20
|
+
if job:
|
|
21
|
+
raise ValueError(
|
|
22
|
+
f"Multiple matches found in yaml for {fn.__name__}, "
|
|
23
|
+
f"Consider passing module_path as a kwarg to avoid ambiguity."
|
|
24
|
+
)
|
|
25
|
+
job = j
|
|
26
|
+
|
|
27
|
+
if job is None:
|
|
28
|
+
raise ValueError(f"No match found in yaml for {fn.__name__} function.")
|
|
29
|
+
|
|
30
|
+
pipeline_meta = {"queue": job["config"].get("queue", "default")}
|
|
31
|
+
tmp_handler = get_callable(job["config"]["task"])
|
|
32
|
+
|
|
33
|
+
actor: LazyActor = register_lazy_actor(get_broker(), tmp_handler, pipeline_meta, None)
|
|
34
|
+
|
|
35
|
+
return actor.send(*args, **kwargs)
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import signal
|
|
3
|
+
import os
|
|
4
|
+
import redis
|
|
5
|
+
import socket
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
import logging
|
|
9
|
+
from pypeline.pipelines.middleware.graceful_shutdown_middleware import (
|
|
10
|
+
GraceFulShutdownMiddleware,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
logging.basicConfig(level=logging.INFO)
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def enable_graceful_shutdown(broker, redis_url):
|
|
18
|
+
"""Attach GracefulShutdownMiddleware and a SIGTERM handler to the current process."""
|
|
19
|
+
broker.add_middleware(GraceFulShutdownMiddleware(redis_url=redis_url))
|
|
20
|
+
|
|
21
|
+
if threading.current_thread().name == "MainThread":
|
|
22
|
+
key_prefix = "busy"
|
|
23
|
+
hostname = socket.gethostname()
|
|
24
|
+
pid = os.getpid()
|
|
25
|
+
busy_key = f"{key_prefix}:{hostname}-{pid}"
|
|
26
|
+
r = redis.Redis.from_url(redis_url)
|
|
27
|
+
|
|
28
|
+
def shutdown_handler(signum, frame):
|
|
29
|
+
logger.info(f"[Signal Handler] Received signal {signum}")
|
|
30
|
+
wait_counter = 0
|
|
31
|
+
while r.get(busy_key):
|
|
32
|
+
if wait_counter % 30 == 0: # Only log every 30 checks
|
|
33
|
+
logger.info(f"[Signal Handler] Busy ({busy_key}), waiting...")
|
|
34
|
+
time.sleep(1)
|
|
35
|
+
wait_counter += 1
|
|
36
|
+
logger.info(f"[Signal Handler] Done. Exiting.")
|
|
37
|
+
sys.exit(0)
|
|
38
|
+
|
|
39
|
+
signal.signal(signal.SIGTERM, shutdown_handler)
|
{scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/SOURCES.txt
RENAMED
|
@@ -8,7 +8,9 @@ pypeline/__init__.py
|
|
|
8
8
|
pypeline/barrier.py
|
|
9
9
|
pypeline/constants.py
|
|
10
10
|
pypeline/dramatiq.py
|
|
11
|
+
pypeline/executable_job_config_schema.py
|
|
11
12
|
pypeline/extensions.py
|
|
13
|
+
pypeline/job_runner.py
|
|
12
14
|
pypeline/pipeline_config_schema.py
|
|
13
15
|
pypeline/pipeline_settings_schema.py
|
|
14
16
|
pypeline/pypeline_yaml.py
|
|
@@ -26,11 +28,14 @@ pypeline/pipelines/composition/parallel_pipeline_composition.py
|
|
|
26
28
|
pypeline/pipelines/composition/pypeline_composition.py
|
|
27
29
|
pypeline/pipelines/middleware/__init__.py
|
|
28
30
|
pypeline/pipelines/middleware/get_active_worker_id_middleware.py
|
|
31
|
+
pypeline/pipelines/middleware/graceful_shutdown_middleware.py
|
|
29
32
|
pypeline/pipelines/middleware/parallel_pipeline_middleware.py
|
|
30
33
|
pypeline/pipelines/middleware/pypeline_middleware.py
|
|
31
34
|
pypeline/utils/__init__.py
|
|
32
35
|
pypeline/utils/config_utils.py
|
|
33
36
|
pypeline/utils/dramatiq_utils.py
|
|
37
|
+
pypeline/utils/executable_job_util.py
|
|
38
|
+
pypeline/utils/graceful_shutdown_util.py
|
|
34
39
|
pypeline/utils/module_utils.py
|
|
35
40
|
pypeline/utils/pipeline_utils.py
|
|
36
41
|
pypeline/utils/schema_utils.py
|
{scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/requires.txt
RENAMED
|
@@ -20,7 +20,7 @@ pytest-cov<3,>=2.6.1
|
|
|
20
20
|
tox<4,>=3.14.1
|
|
21
21
|
mock<2,>=1
|
|
22
22
|
responses<0.11,>=0.10.16
|
|
23
|
-
fakeredis<
|
|
23
|
+
fakeredis<2.31,>=2.10.3
|
|
24
24
|
|
|
25
25
|
[web]
|
|
26
26
|
gunicorn
|
|
@@ -30,3 +30,4 @@ gevent<22,>=21.12.0
|
|
|
30
30
|
networkx>=2.4
|
|
31
31
|
dramatiq[rabbitmq]==1.17.0
|
|
32
32
|
apscheduler<4,>=3.10.4
|
|
33
|
+
tenacity==8.0.1
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
"""
|
|
1
|
+
"""PypeLine Library Setup"""
|
|
3
2
|
|
|
4
3
|
import re
|
|
5
4
|
import ast
|
|
@@ -159,7 +158,7 @@ with open("requirements.txt", "r") as f:
|
|
|
159
158
|
install_requires = f.read().splitlines()
|
|
160
159
|
|
|
161
160
|
setup(
|
|
162
|
-
name="
|
|
161
|
+
name="scalable_pypeline",
|
|
163
162
|
version=__version__,
|
|
164
163
|
description="PypeLine - Python pipelines for the Real World",
|
|
165
164
|
long_description=open("README.md", "r").read(),
|
|
@@ -183,6 +182,7 @@ setup(
|
|
|
183
182
|
"networkx>=2.4",
|
|
184
183
|
"dramatiq[rabbitmq]==1.17.0",
|
|
185
184
|
"apscheduler>=3.10.4,<4",
|
|
185
|
+
"tenacity==8.0.1",
|
|
186
186
|
],
|
|
187
187
|
"dev": ["black"],
|
|
188
188
|
"test": [
|
|
@@ -190,13 +190,14 @@ setup(
|
|
|
190
190
|
"tox>=3.14.1,<4",
|
|
191
191
|
"mock>=1,<2",
|
|
192
192
|
"responses>=0.10.16,<0.11",
|
|
193
|
-
"fakeredis>=2.10.3,<
|
|
193
|
+
"fakeredis>=2.10.3,<2.31", # fakeredis version compatible with redis 4.x
|
|
194
194
|
],
|
|
195
195
|
},
|
|
196
196
|
entry_points={
|
|
197
197
|
"flask.commands": [
|
|
198
198
|
"pypeline-worker=pypeline.dramatiq:pypeline_worker",
|
|
199
199
|
"cron-scheduler=pypeline.dramatiq:cron_scheduler",
|
|
200
|
-
]
|
|
200
|
+
],
|
|
201
|
+
"console_scripts": ["job-runner = pypeline.job_runner:main"],
|
|
201
202
|
},
|
|
202
203
|
)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.1.12"
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
import time
|
|
2
|
-
|
|
3
|
-
import redis
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class LockingParallelBarrier:
|
|
7
|
-
def __init__(self, redis_url, task_key="task_counter", lock_key="task_lock"):
|
|
8
|
-
# Connect to Redis using the provided URL
|
|
9
|
-
self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True)
|
|
10
|
-
self.task_key = task_key
|
|
11
|
-
self.lock_key = lock_key
|
|
12
|
-
|
|
13
|
-
def acquire_lock(self, timeout=5):
|
|
14
|
-
"""Acquire a lock using Redis."""
|
|
15
|
-
while True:
|
|
16
|
-
if self.redis.set(self.lock_key, "locked", nx=True, ex=timeout):
|
|
17
|
-
return True
|
|
18
|
-
time.sleep(0.1)
|
|
19
|
-
|
|
20
|
-
def release_lock(self):
|
|
21
|
-
"""Release the lock in Redis."""
|
|
22
|
-
self.redis.delete(self.lock_key)
|
|
23
|
-
|
|
24
|
-
def set_task_count(self, count):
|
|
25
|
-
"""Initialize the task counter in Redis."""
|
|
26
|
-
self.redis.set(self.task_key, count)
|
|
27
|
-
|
|
28
|
-
def decrement_task_count(self):
|
|
29
|
-
"""Decrement the task counter in Redis."""
|
|
30
|
-
return self.redis.decr(self.task_key)
|
|
31
|
-
|
|
32
|
-
def task_exists(self):
|
|
33
|
-
return self.redis.exists(self.task_key)
|
|
34
|
-
|
|
35
|
-
def get_task_count(self):
|
|
36
|
-
"""Get the current value of the task counter."""
|
|
37
|
-
return int(self.redis.get(self.task_key) or 0)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/composition/__init__.py
RENAMED
|
File without changes
|
{scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/pypeline/pipelines/middleware/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{scalable-pypeline-2.1.12 → scalable_pypeline-2.1.31}/scalable_pypeline.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|