scalable-pypeline 2.1.31__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypeline/__init__.py +1 -0
- pypeline/barrier.py +63 -0
- pypeline/constants.py +94 -0
- pypeline/dramatiq.py +455 -0
- pypeline/executable_job_config_schema.py +35 -0
- pypeline/extensions.py +17 -0
- pypeline/flask/__init__.py +16 -0
- pypeline/flask/api/__init__.py +0 -0
- pypeline/flask/api/pipelines.py +275 -0
- pypeline/flask/api/schedules.py +40 -0
- pypeline/flask/decorators.py +41 -0
- pypeline/flask/flask_pypeline.py +156 -0
- pypeline/job_runner.py +205 -0
- pypeline/pipeline_config_schema.py +352 -0
- pypeline/pipeline_settings_schema.py +561 -0
- pypeline/pipelines/__init__.py +0 -0
- pypeline/pipelines/composition/__init__.py +0 -0
- pypeline/pipelines/composition/parallel_pipeline_composition.py +375 -0
- pypeline/pipelines/composition/pypeline_composition.py +215 -0
- pypeline/pipelines/factory.py +86 -0
- pypeline/pipelines/middleware/__init__.py +0 -0
- pypeline/pipelines/middleware/get_active_worker_id_middleware.py +22 -0
- pypeline/pipelines/middleware/graceful_shutdown_middleware.py +50 -0
- pypeline/pipelines/middleware/parallel_pipeline_middleware.py +60 -0
- pypeline/pipelines/middleware/pypeline_middleware.py +202 -0
- pypeline/pypeline_yaml.py +468 -0
- pypeline/schedule_config_schema.py +125 -0
- pypeline/utils/__init__.py +0 -0
- pypeline/utils/config_utils.py +81 -0
- pypeline/utils/dramatiq_utils.py +134 -0
- pypeline/utils/executable_job_util.py +35 -0
- pypeline/utils/graceful_shutdown_util.py +39 -0
- pypeline/utils/module_utils.py +108 -0
- pypeline/utils/pipeline_utils.py +144 -0
- pypeline/utils/schema_utils.py +24 -0
- scalable_pypeline-2.1.31.dist-info/LICENSE +177 -0
- scalable_pypeline-2.1.31.dist-info/METADATA +212 -0
- scalable_pypeline-2.1.31.dist-info/RECORD +42 -0
- scalable_pypeline-2.1.31.dist-info/WHEEL +6 -0
- scalable_pypeline-2.1.31.dist-info/entry_points.txt +6 -0
- scalable_pypeline-2.1.31.dist-info/top_level.txt +2 -0
- tests/fixtures/__init__.py +0 -0
pypeline/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.1.31"
|
pypeline/barrier.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import redis
|
|
3
|
+
from redis.sentinel import Sentinel
|
|
4
|
+
from urllib.parse import urlparse
|
|
5
|
+
from pypeline.constants import (
|
|
6
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
7
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
8
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
9
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
10
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
11
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LockingParallelBarrier:
|
|
16
|
+
def __init__(self, redis_url, task_key="task_counter", lock_key="task_lock"):
|
|
17
|
+
# Connect to Redis using the provided URL
|
|
18
|
+
if REDIS_SENTINEL_MASTER_NAME is not None:
|
|
19
|
+
parsed_redis_url = urlparse(redis_url)
|
|
20
|
+
redis_sentinel = Sentinel(
|
|
21
|
+
sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
|
|
22
|
+
)
|
|
23
|
+
self.redis = redis_sentinel.master_for(
|
|
24
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
25
|
+
db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
|
|
26
|
+
password=parsed_redis_url.password,
|
|
27
|
+
socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
28
|
+
socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
29
|
+
retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
30
|
+
socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
31
|
+
health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
32
|
+
decode_responses=True,
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
35
|
+
self.redis = redis.StrictRedis.from_url(redis_url, decode_responses=True)
|
|
36
|
+
self.task_key = task_key
|
|
37
|
+
self.lock_key = lock_key
|
|
38
|
+
|
|
39
|
+
def acquire_lock(self, timeout=5):
|
|
40
|
+
"""Acquire a lock using Redis."""
|
|
41
|
+
while True:
|
|
42
|
+
if self.redis.set(self.lock_key, "locked", nx=True, ex=timeout):
|
|
43
|
+
return True
|
|
44
|
+
time.sleep(0.1)
|
|
45
|
+
|
|
46
|
+
def release_lock(self):
|
|
47
|
+
"""Release the lock in Redis."""
|
|
48
|
+
self.redis.delete(self.lock_key)
|
|
49
|
+
|
|
50
|
+
def set_task_count(self, count):
|
|
51
|
+
"""Initialize the task counter in Redis."""
|
|
52
|
+
self.redis.set(self.task_key, count)
|
|
53
|
+
|
|
54
|
+
def decrement_task_count(self):
|
|
55
|
+
"""Decrement the task counter in Redis."""
|
|
56
|
+
return self.redis.decr(self.task_key)
|
|
57
|
+
|
|
58
|
+
def task_exists(self):
|
|
59
|
+
return self.redis.exists(self.task_key)
|
|
60
|
+
|
|
61
|
+
def get_task_count(self):
|
|
62
|
+
"""Get the current value of the task counter."""
|
|
63
|
+
return int(self.redis.get(self.task_key) or 0)
|
pypeline/constants.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
"""Pypeline Constants"""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
# Pypeline configuration defaults
|
|
6
|
+
PYPELINE_YAML_PATH = os.environ.get("PYPELINE_YAML_PATH", "pypeline.yaml")
|
|
7
|
+
PYPELINE_CLIENT_PKG_NAME = os.environ.get("PYPELINE_CLIENT_PKG_NAME", None)
|
|
8
|
+
WORKER_NAME = os.environ.get("WORKER_NAME", None)
|
|
9
|
+
API_ACCESS_KEY = os.environ.get("API_ACCESS_KEY", None)
|
|
10
|
+
DEFAULT_BROKER_CALLABLE = os.environ.get(
|
|
11
|
+
"DEFAULT_BROKER_CLS", "pypeline.dramatiq:configure_default_broker"
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
# Pypeline broker connections
|
|
15
|
+
RABBIT_URL = os.environ.get("RABBIT_URL", "amqp://admin:password@127.0.0.1:5672")
|
|
16
|
+
REDIS_URL = os.environ.get("REDIS_URL", "redis://localhost:6379/0")
|
|
17
|
+
REDIS_SENTINEL_MASTER_NAME = os.environ.get("REDIS_SENTINEL_MASTER_NAME", None)
|
|
18
|
+
|
|
19
|
+
# Pypeline task defaults
|
|
20
|
+
PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL = int(
|
|
21
|
+
os.getenv("DRAMATIQ_PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL", "86400000")
|
|
22
|
+
)
|
|
23
|
+
DEFAULT_RESULT_TTL = int(os.getenv("DEFAULT_RESULT_TTL", 86400)) # seconds (1 day)
|
|
24
|
+
DEFAULT_TASK_TTL = int(os.getenv("DEFAULT_TASK_TTL", 600)) # seconds (10 minutes)
|
|
25
|
+
DEFAULT_TASK_MAX_RETRY = int(os.getenv("DEFAULT_TASK_MAX_RETRY", 3))
|
|
26
|
+
DEFAULT_TASK_MIN_BACKOFF = int(os.getenv("DEFAULT_TASK_MIN_BACKOFF", 15)) # seconds
|
|
27
|
+
DEFAULT_TASK_MAX_BACKOFF = int(
|
|
28
|
+
os.getenv("DEFAULT_TASK_MAX_BACKOFF", 3600)
|
|
29
|
+
) # seconds (1 hour)
|
|
30
|
+
DEFAULT_BROKER_CONNECTION_HEARTBEAT = int(
|
|
31
|
+
os.getenv("DEFAULT_BROKER_CONNECTION_HEARTBEAT", 5)
|
|
32
|
+
)
|
|
33
|
+
DEFAULT_BROKER_CONNECTION_ATTEMPTS = int(
|
|
34
|
+
os.getenv("DEFAULT_BROKER_CONNECTION_ATTEMPTS", 5)
|
|
35
|
+
)
|
|
36
|
+
DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT = int(
|
|
37
|
+
os.getenv("DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT", 30)
|
|
38
|
+
)
|
|
39
|
+
DEFAULT_BROKER_HEARTBEAT_TIMEOUT = int(
|
|
40
|
+
os.getenv("DEFAULT_BROKER_HEARTBEAT_TIMEOUT", 300000)
|
|
41
|
+
)
|
|
42
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT = int(
|
|
43
|
+
os.getenv("DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT", 1)
|
|
44
|
+
)
|
|
45
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT = int(os.getenv("DEFAULT_REDIS_SOCKET_TIMEOUT", 2))
|
|
46
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT = bool(os.getenv("DEFAULT_REDIS_RETRY_ON_TIMEOUT", True))
|
|
47
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE = bool(os.getenv("DEFAULT_REDIS_SOCKET_KEEPALIVE", True))
|
|
48
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL = int(
|
|
49
|
+
os.getenv("DEFAULT_REDIS_HEALTH_CHECK_INTERVAL", 30)
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
|
|
53
|
+
MS_IN_SECONDS = 1000
|
|
54
|
+
API_PATH_V1 = "/api/v1"
|
|
55
|
+
|
|
56
|
+
# Default 'responses' dictionary when decorating endpoints with @api.doc()
|
|
57
|
+
# Extend as necessary.
|
|
58
|
+
API_DOC_RESPONSES = {
|
|
59
|
+
200: {"code": 200, "description": "Successful response."},
|
|
60
|
+
400: {"code": 400, "description": "Malformed request. Verify payload is correct."},
|
|
61
|
+
401: {
|
|
62
|
+
"code": 401,
|
|
63
|
+
"description": "Unauthorized. Verify your API Key (`accesskey`) header.",
|
|
64
|
+
},
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
# Default 'params' dictionary when decorating endpoints with @api.doc()
|
|
68
|
+
# Extend as necessary.
|
|
69
|
+
API_DOC_PARAMS = {
|
|
70
|
+
"accesskey": {
|
|
71
|
+
"in": "header",
|
|
72
|
+
"name": "accesskey",
|
|
73
|
+
"description": "Your API Consumer's `accesskey`",
|
|
74
|
+
"type": "string",
|
|
75
|
+
"required": False,
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
DEFAULT_OPENAPI_CONFIG = (
|
|
80
|
+
("SWAGGER_UI_DOC_EXPANSION", "list"),
|
|
81
|
+
("API_DOCUMENTATION_TITLE", "Pypeline API Specs"),
|
|
82
|
+
("API_DOCUMENTATION_DESCRIPTION", "Available API Endpoints"),
|
|
83
|
+
("OPENAPI_VERSION", "3.0.2"),
|
|
84
|
+
("OPENAPI_URL_PREFIX", "/api/v1"),
|
|
85
|
+
("OPENAPI_SWAGGER_APP_NAME", "Pypeline - API Reference"),
|
|
86
|
+
("OPENAPI_SWAGGER_UI_PATH", "/docs"),
|
|
87
|
+
("OPENAPI_SWAGGER_BASE_TEMPLATE", "swagger/swagger_ui.html"),
|
|
88
|
+
("OPENAPI_SWAGGER_URL", "/docs"),
|
|
89
|
+
(
|
|
90
|
+
"OPENAPI_SWAGGER_UI_URL",
|
|
91
|
+
"https://cdnjs.cloudflare.com/ajax/libs/swagger-ui/3.24.2/",
|
|
92
|
+
),
|
|
93
|
+
("EXPLAIN_TEMPLATE_LOADING", False),
|
|
94
|
+
)
|
pypeline/dramatiq.py
ADDED
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import typing
|
|
3
|
+
import pika
|
|
4
|
+
import logging
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
from urllib.parse import urlparse
|
|
9
|
+
|
|
10
|
+
from dramatiq.brokers.redis import RedisBroker
|
|
11
|
+
from redis.sentinel import Sentinel
|
|
12
|
+
from pypeline.extensions import pypeline_config
|
|
13
|
+
from warnings import warn
|
|
14
|
+
from apscheduler.schedulers.blocking import BlockingScheduler
|
|
15
|
+
from apscheduler.triggers.cron import CronTrigger
|
|
16
|
+
from dramatiq import Broker, Middleware, set_broker, get_broker
|
|
17
|
+
from dramatiq.brokers.rabbitmq import RabbitmqBroker
|
|
18
|
+
from dramatiq.cli import (
|
|
19
|
+
CPUS,
|
|
20
|
+
HAS_WATCHDOG,
|
|
21
|
+
main as dramatiq_worker,
|
|
22
|
+
make_argument_parser as dramatiq_argument_parser,
|
|
23
|
+
import_object,
|
|
24
|
+
)
|
|
25
|
+
from dramatiq.middleware import default_middleware, CurrentMessage
|
|
26
|
+
from dramatiq.results import Results
|
|
27
|
+
from dramatiq.results.backends.redis import RedisBackend
|
|
28
|
+
from flask import current_app, Flask
|
|
29
|
+
from flask.cli import with_appcontext
|
|
30
|
+
|
|
31
|
+
from pypeline.constants import (
|
|
32
|
+
REDIS_URL,
|
|
33
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
34
|
+
RABBIT_URL,
|
|
35
|
+
DEFAULT_BROKER_CALLABLE,
|
|
36
|
+
DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
|
37
|
+
DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
|
38
|
+
DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
|
39
|
+
MESSAGE_BROKER,
|
|
40
|
+
DEFAULT_BROKER_HEARTBEAT_TIMEOUT,
|
|
41
|
+
DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
42
|
+
DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
43
|
+
DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
44
|
+
DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
45
|
+
DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
46
|
+
)
|
|
47
|
+
from pypeline.pipelines.middleware.get_active_worker_id_middleware import (
|
|
48
|
+
GetActiveWorkerIdMiddleware,
|
|
49
|
+
)
|
|
50
|
+
from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
|
|
51
|
+
from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
|
|
52
|
+
from pypeline.utils.config_utils import (
|
|
53
|
+
retrieve_latest_schedule_config,
|
|
54
|
+
get_service_config_for_worker,
|
|
55
|
+
retrieve_executable_job_config,
|
|
56
|
+
)
|
|
57
|
+
from pypeline.utils.dramatiq_utils import (
|
|
58
|
+
guess_code_directory,
|
|
59
|
+
list_managed_actors,
|
|
60
|
+
register_lazy_actor,
|
|
61
|
+
LazyActor,
|
|
62
|
+
)
|
|
63
|
+
from pypeline.utils.graceful_shutdown_util import enable_graceful_shutdown
|
|
64
|
+
from pypeline.utils.module_utils import get_callable
|
|
65
|
+
from dramatiq.middleware import (
|
|
66
|
+
Retries,
|
|
67
|
+
Callbacks,
|
|
68
|
+
TimeLimit,
|
|
69
|
+
AgeLimit,
|
|
70
|
+
ShutdownNotifications,
|
|
71
|
+
Pipelines,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
logging.basicConfig(level=logging.INFO)
|
|
75
|
+
logger = logging.getLogger(__name__)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def configure_default_broker(broker: Broker = None):
|
|
79
|
+
reworked_defaults=[AgeLimit(), TimeLimit(), ShutdownNotifications(), Callbacks(), Pipelines(), Retries()]
|
|
80
|
+
redis_client = None
|
|
81
|
+
if REDIS_SENTINEL_MASTER_NAME is not None:
|
|
82
|
+
parsed_redis_url = urlparse(REDIS_URL)
|
|
83
|
+
redis_sentinel = Sentinel(
|
|
84
|
+
sentinels=[(parsed_redis_url.hostname, parsed_redis_url.port)],
|
|
85
|
+
)
|
|
86
|
+
redis_client = redis_sentinel.master_for(
|
|
87
|
+
REDIS_SENTINEL_MASTER_NAME,
|
|
88
|
+
db=int(parsed_redis_url.path[1]) if parsed_redis_url.path else 0,
|
|
89
|
+
password=parsed_redis_url.password,
|
|
90
|
+
socket_connect_timeout=DEFAULT_REDIS_SOCKET_CONNECT_TIMEOUT,
|
|
91
|
+
socket_timeout=DEFAULT_REDIS_SOCKET_TIMEOUT,
|
|
92
|
+
retry_on_timeout=DEFAULT_REDIS_RETRY_ON_TIMEOUT,
|
|
93
|
+
socket_keepalive=DEFAULT_REDIS_SOCKET_KEEPALIVE,
|
|
94
|
+
health_check_interval=DEFAULT_REDIS_HEALTH_CHECK_INTERVAL,
|
|
95
|
+
)
|
|
96
|
+
redis_backend = RedisBackend(client=redis_client, url=REDIS_URL)
|
|
97
|
+
|
|
98
|
+
if MESSAGE_BROKER == "RABBITMQ":
|
|
99
|
+
parsed_url = urlparse(RABBIT_URL)
|
|
100
|
+
credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
|
|
101
|
+
broker = (
|
|
102
|
+
broker
|
|
103
|
+
if broker is not None
|
|
104
|
+
else RabbitmqBroker(
|
|
105
|
+
host=parsed_url.hostname,
|
|
106
|
+
port=parsed_url.port,
|
|
107
|
+
credentials=credentials,
|
|
108
|
+
heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
|
109
|
+
connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
|
110
|
+
blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
|
111
|
+
middleware=reworked_defaults
|
|
112
|
+
)
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
elif MESSAGE_BROKER == "REDIS":
|
|
116
|
+
broker = (
|
|
117
|
+
broker
|
|
118
|
+
if broker is not None
|
|
119
|
+
else RedisBroker(
|
|
120
|
+
client=redis_client,
|
|
121
|
+
url=REDIS_URL,
|
|
122
|
+
heartbeat_timeout=DEFAULT_BROKER_HEARTBEAT_TIMEOUT,
|
|
123
|
+
middleware=reworked_defaults
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
broker.add_middleware(Results(backend=redis_backend))
|
|
128
|
+
broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
|
|
129
|
+
broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
|
|
130
|
+
broker.add_middleware(GetActiveWorkerIdMiddleware())
|
|
131
|
+
broker.add_middleware(CurrentMessage())
|
|
132
|
+
if (
|
|
133
|
+
os.getenv("RESTRICT_WORKER_SHUTDOWN_WHILE_JOBS_RUNNING", "false").lower()
|
|
134
|
+
== "true"
|
|
135
|
+
):
|
|
136
|
+
enable_graceful_shutdown(broker=broker, redis_url=REDIS_URL)
|
|
137
|
+
|
|
138
|
+
register_actors_for_workers(broker)
|
|
139
|
+
|
|
140
|
+
set_broker(broker)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def register_actors_for_workers(broker: Broker):
|
|
144
|
+
service = get_service_config_for_worker(pypeline_config)
|
|
145
|
+
scheduled_jobs_config = retrieve_latest_schedule_config()
|
|
146
|
+
executable_jobs_config = retrieve_executable_job_config()
|
|
147
|
+
|
|
148
|
+
if not service:
|
|
149
|
+
return
|
|
150
|
+
|
|
151
|
+
worker_registered_tasks = [
|
|
152
|
+
task_handler["handler"] for task_handler in service.get("registeredTasks")
|
|
153
|
+
]
|
|
154
|
+
|
|
155
|
+
# Loop over the pipelines to get metadata and other information about the task for registration
|
|
156
|
+
for pipeline_key, pipeline in pypeline_config["pipelines"].items():
|
|
157
|
+
for task, task_handler_meta in pipeline["config"]["taskDefinitions"].items():
|
|
158
|
+
if pipeline["schemaVersion"] == 1:
|
|
159
|
+
# Check if any task in this pipeline is registered
|
|
160
|
+
task_handlers = [task_handler_meta["handler"]]
|
|
161
|
+
elif pipeline["schemaVersion"] == 2:
|
|
162
|
+
task_handlers = [t for t in task_handler_meta["handlers"]]
|
|
163
|
+
|
|
164
|
+
for task_handler in task_handlers:
|
|
165
|
+
if task_handler in worker_registered_tasks:
|
|
166
|
+
server_type = task_handler_meta.get("serverType", None)
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
pipeline_metadata = copy.deepcopy(
|
|
170
|
+
pipeline["config"]["metadata"]
|
|
171
|
+
)
|
|
172
|
+
tmp_handler = get_callable(task_handler)
|
|
173
|
+
if pipeline_metadata.get("maxRetry", 0) >= 0:
|
|
174
|
+
pipeline_metadata["store_results"] = True
|
|
175
|
+
_ = register_lazy_actor(
|
|
176
|
+
broker, tmp_handler, pipeline_metadata, server_type
|
|
177
|
+
)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
logger.exception(
|
|
180
|
+
f"Unable to add a task {task_handler} to dramatiq: {e}"
|
|
181
|
+
)
|
|
182
|
+
# Loop over the scheduled jobs and create metadata and other information about the task for registration
|
|
183
|
+
for job in scheduled_jobs_config:
|
|
184
|
+
config = job["config"]
|
|
185
|
+
if config["task"] in worker_registered_tasks:
|
|
186
|
+
pipeline_meta = {"queue": config.get("queue", "default")}
|
|
187
|
+
try:
|
|
188
|
+
tmp_handler = get_callable(config["task"])
|
|
189
|
+
if pipeline_meta and pipeline_meta.get("maxRetry", 0) >= 0:
|
|
190
|
+
pipeline_meta["store_results"] = True
|
|
191
|
+
_ = register_lazy_actor(broker, tmp_handler, pipeline_meta, None)
|
|
192
|
+
except Exception as e:
|
|
193
|
+
logger.exception(f"Unable to add a task to dramatiq: {e}")
|
|
194
|
+
|
|
195
|
+
for job in executable_jobs_config or []:
|
|
196
|
+
config = job["config"]
|
|
197
|
+
if config["task"] in worker_registered_tasks:
|
|
198
|
+
pipeline_meta = {"queue": config.get("queue", "default")}
|
|
199
|
+
try:
|
|
200
|
+
tmp_handler = get_callable(config["task"])
|
|
201
|
+
if pipeline_meta and pipeline_meta.get("maxRetry", 0) >= 0:
|
|
202
|
+
pipeline_meta["store_results"] = True
|
|
203
|
+
_ = register_lazy_actor(broker, tmp_handler, pipeline_meta, None)
|
|
204
|
+
except Exception as e:
|
|
205
|
+
logger.exception(f"Unable to add a task to dramatiq: {e}")
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
class Dramatiq:
|
|
209
|
+
"""Flask extension bridging Dramatiq broker and Flask app.
|
|
210
|
+
|
|
211
|
+
Dramatiq API is eager. Broker initialisation precede actor declaration.
|
|
212
|
+
This breaks application factory pattern and other way to initialize
|
|
213
|
+
configuration after import.
|
|
214
|
+
|
|
215
|
+
This class enables lazy initialization of Dramatiq. Actual Dramatiq broker
|
|
216
|
+
is instanciated only once Flask app is created.
|
|
217
|
+
|
|
218
|
+
.. automethod:: actor
|
|
219
|
+
.. automethod:: init_app
|
|
220
|
+
"""
|
|
221
|
+
|
|
222
|
+
def __init__(
|
|
223
|
+
self,
|
|
224
|
+
app: Flask = None,
|
|
225
|
+
name: str = "dramatiq",
|
|
226
|
+
config_prefix: str = None,
|
|
227
|
+
middleware: typing.List[Middleware] = None,
|
|
228
|
+
):
|
|
229
|
+
"""
|
|
230
|
+
:app: Flask application if created. See :meth:`init_app`.
|
|
231
|
+
|
|
232
|
+
:param broker_configuration_callable_module: In order to work in fork and spawn mode
|
|
233
|
+
we need to configure our broker using a callable function. Default is specified as
|
|
234
|
+
"pypeline.flask_dramatiq:configure_default_broker". This allows the user to
|
|
235
|
+
override if necessary.
|
|
236
|
+
|
|
237
|
+
:param name: Unique identifier for multi-broker app.
|
|
238
|
+
|
|
239
|
+
:param config_prefix: Flask configuration option prefix for this
|
|
240
|
+
broker. By default, it is derived from ``name`` parameter,
|
|
241
|
+
capitalized.
|
|
242
|
+
|
|
243
|
+
:param middleware: List of Dramatiq middleware instances to override
|
|
244
|
+
Dramatiq defaults.
|
|
245
|
+
|
|
246
|
+
Flask-Dramatiq always prepend a custom middleware to the middleware
|
|
247
|
+
stack that setup Flask context. This way, every middleware can use
|
|
248
|
+
Flask app context.
|
|
249
|
+
|
|
250
|
+
"""
|
|
251
|
+
self.actors = []
|
|
252
|
+
self.app = None
|
|
253
|
+
self.config_prefix = config_prefix or name.upper() + "_BROKER"
|
|
254
|
+
self.name = name
|
|
255
|
+
self.broker = None
|
|
256
|
+
if middleware is None:
|
|
257
|
+
middleware = [m() for m in default_middleware]
|
|
258
|
+
self.middleware = middleware
|
|
259
|
+
if app:
|
|
260
|
+
self.init_app(app)
|
|
261
|
+
|
|
262
|
+
def __repr__(self) -> str:
|
|
263
|
+
return "<%s %s>" % (self.__class__.__name__, self.name)
|
|
264
|
+
|
|
265
|
+
def init_app(self, app: Flask):
|
|
266
|
+
"""Initialize extension for one Flask application
|
|
267
|
+
|
|
268
|
+
This method triggers Dramatiq broker instantiation and effective actor
|
|
269
|
+
registration.
|
|
270
|
+
|
|
271
|
+
"""
|
|
272
|
+
if self.app is not None:
|
|
273
|
+
warn(
|
|
274
|
+
"%s is used by more than one flask application. "
|
|
275
|
+
"Actor's context may be set incorrectly." % (self,),
|
|
276
|
+
stacklevel=2,
|
|
277
|
+
)
|
|
278
|
+
self.app = app
|
|
279
|
+
app.extensions["dramatiq-" + self.name] = self
|
|
280
|
+
|
|
281
|
+
module_name, broker_or_callable = import_object(DEFAULT_BROKER_CALLABLE)
|
|
282
|
+
|
|
283
|
+
# Callable function is expected to setBroker()
|
|
284
|
+
if callable(broker_or_callable):
|
|
285
|
+
logger.info(f"Configuring broker via {DEFAULT_BROKER_CALLABLE}")
|
|
286
|
+
broker_or_callable()
|
|
287
|
+
else:
|
|
288
|
+
raise TypeError("DEFAULT_BROKER_CALLABLE must point to a callable function")
|
|
289
|
+
self.broker = get_broker()
|
|
290
|
+
for actor in self.actors:
|
|
291
|
+
actor.register(broker=self.broker)
|
|
292
|
+
|
|
293
|
+
def actor(self, fn=None, **kw):
|
|
294
|
+
"""Register a callable as Dramatiq actor.
|
|
295
|
+
|
|
296
|
+
This decorator lazily register a callable as a Dramatiq actor. The
|
|
297
|
+
actor can't be called before :meth:`init_app` is called.
|
|
298
|
+
|
|
299
|
+
:param kw: Keywords argument passed to :func:`dramatiq.actor`.
|
|
300
|
+
|
|
301
|
+
"""
|
|
302
|
+
# Substitute dramatiq.actor decorator to return a lazy wrapper. This
|
|
303
|
+
# allows to register actors in extension before the broker is
|
|
304
|
+
# effectively configured by init_app.
|
|
305
|
+
|
|
306
|
+
def decorator(fn):
|
|
307
|
+
lazy_actor = LazyActor(self, fn, kw)
|
|
308
|
+
self.actors.append(lazy_actor)
|
|
309
|
+
if self.app:
|
|
310
|
+
lazy_actor.register(self.broker)
|
|
311
|
+
return lazy_actor
|
|
312
|
+
|
|
313
|
+
if fn:
|
|
314
|
+
return decorator(fn)
|
|
315
|
+
return decorator
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
@click.command("cron-scheduler")
|
|
319
|
+
def cron_scheduler(): # pragma: no cover
|
|
320
|
+
# Configure our broker that we will schedule registered tasks for
|
|
321
|
+
scheduler = BlockingScheduler()
|
|
322
|
+
module_name, broker_or_callable = import_object(DEFAULT_BROKER_CALLABLE)
|
|
323
|
+
|
|
324
|
+
# Callable function is expected to setBroker()
|
|
325
|
+
if callable(broker_or_callable):
|
|
326
|
+
logger.info(f"Configuring broker via {DEFAULT_BROKER_CALLABLE}")
|
|
327
|
+
broker_or_callable()
|
|
328
|
+
else:
|
|
329
|
+
raise TypeError("DEFAULT_BROKER_CALLABLE must point to a callable function")
|
|
330
|
+
|
|
331
|
+
broker = get_broker()
|
|
332
|
+
jobs = retrieve_latest_schedule_config()
|
|
333
|
+
|
|
334
|
+
for job in jobs:
|
|
335
|
+
if job["enabled"]:
|
|
336
|
+
config = job["config"]
|
|
337
|
+
worker_path = config["task"]
|
|
338
|
+
tmp_handler = get_callable(worker_path)
|
|
339
|
+
pipeline_meta = {"queue": config.get("queue", "default")}
|
|
340
|
+
actor = register_lazy_actor(broker, tmp_handler, pipeline_meta)
|
|
341
|
+
schedule = config["schedule"]
|
|
342
|
+
scheduler.add_job(
|
|
343
|
+
actor.send,
|
|
344
|
+
CronTrigger.from_crontab(
|
|
345
|
+
f"{schedule['minute']} {schedule['hour']} {schedule['dayOfMonth']} {schedule['monthOfYear']} {schedule['dayOfWeek']}"
|
|
346
|
+
),
|
|
347
|
+
)
|
|
348
|
+
|
|
349
|
+
try:
|
|
350
|
+
scheduler.start()
|
|
351
|
+
except KeyboardInterrupt:
|
|
352
|
+
scheduler.shutdown()
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
@click.command("pypeline-worker")
|
|
356
|
+
@click.argument("broker_name", default="dramatiq")
|
|
357
|
+
@click.option(
|
|
358
|
+
"-v", "--verbose", default=0, count=True, help="turn on verbose log output"
|
|
359
|
+
)
|
|
360
|
+
@click.option(
|
|
361
|
+
"-p",
|
|
362
|
+
"--processes",
|
|
363
|
+
default=CPUS,
|
|
364
|
+
metavar="PROCESSES",
|
|
365
|
+
show_default=True,
|
|
366
|
+
help="the number of worker processes to run",
|
|
367
|
+
)
|
|
368
|
+
@click.option(
|
|
369
|
+
"-t",
|
|
370
|
+
"--threads",
|
|
371
|
+
default=8,
|
|
372
|
+
metavar="THREADS",
|
|
373
|
+
show_default=True,
|
|
374
|
+
help="the number of worker treads per processes",
|
|
375
|
+
)
|
|
376
|
+
@click.option(
|
|
377
|
+
"-Q",
|
|
378
|
+
"--queues",
|
|
379
|
+
type=str,
|
|
380
|
+
default=None,
|
|
381
|
+
metavar="QUEUES",
|
|
382
|
+
show_default=True,
|
|
383
|
+
help="listen to a subset of queues, comma separated",
|
|
384
|
+
)
|
|
385
|
+
@click.option(
|
|
386
|
+
"--use-spawn",
|
|
387
|
+
type=bool,
|
|
388
|
+
default=False,
|
|
389
|
+
metavar="USE_SPAWN",
|
|
390
|
+
show_default=True,
|
|
391
|
+
help="start processes by spawning (default: fork on unix, spawn on windows)",
|
|
392
|
+
)
|
|
393
|
+
@with_appcontext
|
|
394
|
+
def pypeline_worker(
|
|
395
|
+
verbose, processes, threads, queues, broker_name, use_spawn
|
|
396
|
+
): # pragma: no cover
|
|
397
|
+
"""Run dramatiq workers.
|
|
398
|
+
|
|
399
|
+
Setup Dramatiq with broker and task modules from Flask app.
|
|
400
|
+
|
|
401
|
+
\b
|
|
402
|
+
examples:
|
|
403
|
+
# Run dramatiq with 1 thread per process.
|
|
404
|
+
$ flask worker --threads 1
|
|
405
|
+
|
|
406
|
+
\b
|
|
407
|
+
# Listen only to the "foo" and "bar" queues.
|
|
408
|
+
$ flask worker -Q foo,bar
|
|
409
|
+
|
|
410
|
+
\b
|
|
411
|
+
# Consuming from a specific broker
|
|
412
|
+
$ flask worker mybroker
|
|
413
|
+
"""
|
|
414
|
+
# Plugin for flask.commands entrypoint.
|
|
415
|
+
#
|
|
416
|
+
# Wraps dramatiq worker CLI in a Flask command. This is private API of
|
|
417
|
+
# dramatiq.
|
|
418
|
+
|
|
419
|
+
def format_actor(actor):
|
|
420
|
+
return "%s@%s" % (actor.actor_name, actor.queue_name)
|
|
421
|
+
|
|
422
|
+
parser = dramatiq_argument_parser()
|
|
423
|
+
|
|
424
|
+
# Set worker broker globally.
|
|
425
|
+
needle = "dramatiq-" + broker_name
|
|
426
|
+
broker = current_app.extensions[needle].broker
|
|
427
|
+
set_broker(broker)
|
|
428
|
+
|
|
429
|
+
command = [
|
|
430
|
+
"--processes",
|
|
431
|
+
str(processes),
|
|
432
|
+
"--threads",
|
|
433
|
+
str(threads),
|
|
434
|
+
# Fall back to flask_dramatiq global broker
|
|
435
|
+
DEFAULT_BROKER_CALLABLE,
|
|
436
|
+
]
|
|
437
|
+
|
|
438
|
+
if use_spawn:
|
|
439
|
+
command += ["--use-spawn"]
|
|
440
|
+
|
|
441
|
+
if current_app.config["DEBUG"]:
|
|
442
|
+
verbose = max(1, verbose)
|
|
443
|
+
if HAS_WATCHDOG:
|
|
444
|
+
command += ["--watch", guess_code_directory(broker)]
|
|
445
|
+
|
|
446
|
+
queues = queues.split(",") if queues else []
|
|
447
|
+
if queues:
|
|
448
|
+
command += ["--queues"] + queues
|
|
449
|
+
command += verbose * ["-v"]
|
|
450
|
+
args = parser.parse_args(command)
|
|
451
|
+
logger.info("Able to execute the following actors:")
|
|
452
|
+
for actor in list_managed_actors(broker, queues):
|
|
453
|
+
logger.info(" %s.", format_actor(actor))
|
|
454
|
+
|
|
455
|
+
dramatiq_worker(args)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from marshmallow import Schema, EXCLUDE, fields
|
|
2
|
+
|
|
3
|
+
class ExecutableJobConfigSchema(Schema):
|
|
4
|
+
queue = fields.String(
|
|
5
|
+
required=True,
|
|
6
|
+
description="Name of queue on which to place task.",
|
|
7
|
+
example="my-default-queue",
|
|
8
|
+
)
|
|
9
|
+
task = fields.String(
|
|
10
|
+
required=True,
|
|
11
|
+
description="Path to task to invoke.",
|
|
12
|
+
example="my_app.module.method",
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
class ExecutableJobSchema(Schema):
|
|
16
|
+
"""Definition of a single schedule entry"""
|
|
17
|
+
class Meta:
|
|
18
|
+
unknown = EXCLUDE
|
|
19
|
+
|
|
20
|
+
name = fields.String(
|
|
21
|
+
required=True,
|
|
22
|
+
description="Name of schedule entry.",
|
|
23
|
+
example="My Scheduled Task",
|
|
24
|
+
)
|
|
25
|
+
schemaVersion = fields.Integer(required=True)
|
|
26
|
+
config = fields.Dict(required=True)
|
|
27
|
+
enabled = fields.Boolean(
|
|
28
|
+
required=True, description="Whether entry is enabled.", example=True
|
|
29
|
+
)
|
|
30
|
+
config = fields.Nested(
|
|
31
|
+
ExecutableJobConfigSchema,
|
|
32
|
+
required=True,
|
|
33
|
+
description="Configuration information for this job.",
|
|
34
|
+
)
|
|
35
|
+
|
pypeline/extensions.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
""" Initialize most extensions used throughout application
|
|
2
|
+
"""
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
# Client packages *should* provide a `pypeline.yaml` file. This
|
|
9
|
+
# loads the configuration file with the provided name of the client
|
|
10
|
+
# package (e.g. pypeline_demo)
|
|
11
|
+
from pypeline.pypeline_yaml import load_client_config_and_version
|
|
12
|
+
|
|
13
|
+
pypeline_config, pypeline_client_version = load_client_config_and_version()
|
|
14
|
+
except Exception as e:
|
|
15
|
+
pypeline_config = None
|
|
16
|
+
pypeline_client_version = None
|
|
17
|
+
logger.warning("Unable to load client Pypeline config ... {}".format(e))
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
""" Sermos' Flask Implementation and Tooling. Convenience imports here.
|
|
2
|
+
"""
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
logger = logging.getLogger(__name__)
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from flask_smorest import Blueprint, Api
|
|
9
|
+
from flask import abort
|
|
10
|
+
except Exception as e:
|
|
11
|
+
logger.error("Unable to import Web services (Blueprint, API, abort)" f" ... {e}")
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from pypeline.flask.flask_pypeline import FlaskPypeline
|
|
15
|
+
except Exception as e:
|
|
16
|
+
logger.exception("Unable to import Sermos services (FlaskPypeline)" f" ... {e}")
|
|
File without changes
|