scalable-pypeline 2.1.2__py2.py3-none-any.whl → 2.1.4__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pypeline/__init__.py +1 -1
- pypeline/constants.py +1 -0
- pypeline/dramatiq.py +28 -23
- {scalable_pypeline-2.1.2.dist-info → scalable_pypeline-2.1.4.dist-info}/METADATA +1 -1
- {scalable_pypeline-2.1.2.dist-info → scalable_pypeline-2.1.4.dist-info}/RECORD +9 -10
- pypeline/pipelines/middleware/deduplication_middleware.py +0 -91
- {scalable_pypeline-2.1.2.dist-info → scalable_pypeline-2.1.4.dist-info}/LICENSE +0 -0
- {scalable_pypeline-2.1.2.dist-info → scalable_pypeline-2.1.4.dist-info}/WHEEL +0 -0
- {scalable_pypeline-2.1.2.dist-info → scalable_pypeline-2.1.4.dist-info}/entry_points.txt +0 -0
- {scalable_pypeline-2.1.2.dist-info → scalable_pypeline-2.1.4.dist-info}/top_level.txt +0 -0
pypeline/__init__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.1.
|
1
|
+
__version__ = "2.1.4"
|
pypeline/constants.py
CHANGED
@@ -36,6 +36,7 @@ DEFAULT_BROKER_CONNECTION_ATTEMPTS = int(
|
|
36
36
|
DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT = int(
|
37
37
|
os.getenv("DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT", 30)
|
38
38
|
)
|
39
|
+
MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
|
39
40
|
|
40
41
|
MS_IN_SECONDS = 1000
|
41
42
|
API_PATH_V1 = "/api/v1"
|
pypeline/dramatiq.py
CHANGED
@@ -5,6 +5,8 @@ import logging
|
|
5
5
|
import click
|
6
6
|
from urllib.parse import urlparse
|
7
7
|
|
8
|
+
from dramatiq.brokers.redis import RedisBroker
|
9
|
+
|
8
10
|
from pypeline.extensions import pypeline_config
|
9
11
|
from warnings import warn
|
10
12
|
from apscheduler.schedulers.blocking import BlockingScheduler
|
@@ -31,12 +33,10 @@ from pypeline.constants import (
|
|
31
33
|
DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
32
34
|
DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
33
35
|
DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
36
|
+
MESSAGE_BROKER,
|
34
37
|
)
|
35
38
|
from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
|
36
39
|
from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
|
37
|
-
from pypeline.pipelines.middleware.deduplication_middleware import (
|
38
|
-
DeduplicationMiddleware,
|
39
|
-
)
|
40
40
|
from pypeline.utils.config_utils import (
|
41
41
|
retrieve_latest_schedule_config,
|
42
42
|
get_service_config_for_worker,
|
@@ -56,27 +56,32 @@ logger = logging.getLogger(__name__)
|
|
56
56
|
|
57
57
|
def configure_default_broker(broker: Broker = None):
|
58
58
|
redis_backend = RedisBackend(url=REDIS_URL)
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
59
|
+
|
60
|
+
if MESSAGE_BROKER == "RABBITMQ":
|
61
|
+
parsed_url = urlparse(RABBIT_URL)
|
62
|
+
credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
|
63
|
+
broker = (
|
64
|
+
broker
|
65
|
+
if broker is not None
|
66
|
+
else RabbitmqBroker(
|
67
|
+
host=parsed_url.hostname,
|
68
|
+
port=parsed_url.port,
|
69
|
+
credentials=credentials,
|
70
|
+
heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
|
71
|
+
connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
|
72
|
+
blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
|
73
|
+
)
|
71
74
|
)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
75
|
+
|
76
|
+
elif MESSAGE_BROKER == "REDIS":
|
77
|
+
broker = broker if broker is not None else RedisBroker(url=REDIS_URL)
|
78
|
+
|
79
|
+
broker.add_middleware(Results(backend=redis_backend))
|
80
|
+
broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
|
81
|
+
broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
|
82
|
+
broker.add_middleware(CurrentMessage())
|
83
|
+
register_actors_for_workers(broker)
|
84
|
+
set_broker(broker)
|
80
85
|
|
81
86
|
|
82
87
|
def register_actors_for_workers(broker: Broker):
|
@@ -1,7 +1,7 @@
|
|
1
|
-
pypeline/__init__.py,sha256=
|
1
|
+
pypeline/__init__.py,sha256=1yR20YsjyDpnFQgDIQmHfutaSsaW0F7mDqjloRVRIG8,22
|
2
2
|
pypeline/barrier.py,sha256=oO964l9qOCOibweOHyNivmAvufdXOke9nz2tdgclouo,1172
|
3
|
-
pypeline/constants.py,sha256=
|
4
|
-
pypeline/dramatiq.py,sha256=
|
3
|
+
pypeline/constants.py,sha256=415-5fTJQXPO4by14T4BBC6hOn11m96XFiAHSh9Sfxo,2949
|
4
|
+
pypeline/dramatiq.py,sha256=NyNwAw4iibWnS5GhTVQWxAOfBj3VXkfgSliilMa4ajg,12501
|
5
5
|
pypeline/extensions.py,sha256=BzOTnXhNxap3N7uIUUh_hO6dDwx08Vc_RJDE93_K0Lo,610
|
6
6
|
pypeline/pipeline_config_schema.py,sha256=hK2_egtg-YFx_XJDs_NyrOTGKkel7W83X-G0sic52sM,10592
|
7
7
|
pypeline/pipeline_settings_schema.py,sha256=84AuNFYsOUpoADsjEo_n9T6Ica-c21oK_V9s15I4lCg,20212
|
@@ -19,7 +19,6 @@ pypeline/pipelines/composition/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5
|
|
19
19
|
pypeline/pipelines/composition/parallel_pipeline_composition.py,sha256=pTw9Xb9h4JnV4siFc3JStm5lB-i9djUADo3Kh5K3s7g,12976
|
20
20
|
pypeline/pipelines/composition/pypeline_composition.py,sha256=ieTuQZ8zxTtvmPEkrWFbItjGtvO3JUotXcR-Jim2mss,7204
|
21
21
|
pypeline/pipelines/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
|
-
pypeline/pipelines/middleware/deduplication_middleware.py,sha256=IGO9Kc6NPMUaw1ytT3_ud2ITSZuh1-_WaU5_onazeh8,3556
|
23
22
|
pypeline/pipelines/middleware/parallel_pipeline_middleware.py,sha256=kTp6niYoe2nXIiN6EGRfdpxrJyioo0GPxDkfefbGlEk,2821
|
24
23
|
pypeline/pipelines/middleware/pypeline_middleware.py,sha256=kvt5A9OxDwpIo0PsH11Im62tH6VquUc6OFoZDw2Gxsk,8036
|
25
24
|
pypeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -29,9 +28,9 @@ pypeline/utils/module_utils.py,sha256=-yEJIukDCoXnmlZVXB6Dww25tH6GdPE5SoFqv6pfdV
|
|
29
28
|
pypeline/utils/pipeline_utils.py,sha256=kGP1QwCJikGC5QNRtzRXCDVewyRMpWIqERTNnxGLlSY,4795
|
30
29
|
pypeline/utils/schema_utils.py,sha256=Fgl0y9Cuo_TZeEx_S3gaSVnLjn6467LTkjb2ek7Ms98,851
|
31
30
|
tests/fixtures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
32
|
-
scalable_pypeline-2.1.
|
33
|
-
scalable_pypeline-2.1.
|
34
|
-
scalable_pypeline-2.1.
|
35
|
-
scalable_pypeline-2.1.
|
36
|
-
scalable_pypeline-2.1.
|
37
|
-
scalable_pypeline-2.1.
|
31
|
+
scalable_pypeline-2.1.4.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
|
32
|
+
scalable_pypeline-2.1.4.dist-info/METADATA,sha256=FLViI0pH_Zoa13dTAv75RvV-FZgYXZZZR2WIpbB81lg,5926
|
33
|
+
scalable_pypeline-2.1.4.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
|
34
|
+
scalable_pypeline-2.1.4.dist-info/entry_points.txt,sha256=uWs10ODfHSBKo2Cx_QaUjPHQTpZ3e77j9VlAdRRmMyg,119
|
35
|
+
scalable_pypeline-2.1.4.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
|
36
|
+
scalable_pypeline-2.1.4.dist-info/RECORD,,
|
@@ -1,91 +0,0 @@
|
|
1
|
-
import dramatiq
|
2
|
-
import signal
|
3
|
-
from dramatiq.middleware import Middleware
|
4
|
-
from pypeline.barrier import LockingParallelBarrier
|
5
|
-
from pypeline.constants import DEFAULT_TASK_TTL
|
6
|
-
import logging
|
7
|
-
|
8
|
-
logging.basicConfig(level=logging.INFO)
|
9
|
-
logger = logging.getLogger(__name__)
|
10
|
-
|
11
|
-
|
12
|
-
class DeduplicationMiddleware(Middleware):
|
13
|
-
def __init__(self, redis_url="redis://localhost:6379/0"):
|
14
|
-
self.redis_url = redis_url
|
15
|
-
self.active_locks = {}
|
16
|
-
|
17
|
-
def before_process_message(self, broker, message):
|
18
|
-
task_id = message.message_id
|
19
|
-
task_key = f"dramatiq:task_counter:{task_id}"
|
20
|
-
lock_key = f"dramatiq:lock:{task_id}"
|
21
|
-
try:
|
22
|
-
# Try to acquire a lock for the task
|
23
|
-
locking_parallel_barrier = LockingParallelBarrier(
|
24
|
-
self.redis_url,
|
25
|
-
task_key=task_key,
|
26
|
-
lock_key=lock_key,
|
27
|
-
)
|
28
|
-
if (
|
29
|
-
locking_parallel_barrier.get_task_count() > 0
|
30
|
-
or not locking_parallel_barrier.acquire_lock(timeout=DEFAULT_TASK_TTL)
|
31
|
-
):
|
32
|
-
raise dramatiq.middleware.SkipMessage(
|
33
|
-
f"Task {task_id} is already being processed."
|
34
|
-
)
|
35
|
-
|
36
|
-
locking_parallel_barrier.set_task_count(1)
|
37
|
-
# Store the lock reference in the message and track it globally
|
38
|
-
message.options["dedupe_task_key"] = task_key
|
39
|
-
message.options["dedupe_lock_key"] = lock_key
|
40
|
-
self.active_locks[lock_key] = locking_parallel_barrier
|
41
|
-
except dramatiq.middleware.SkipMessage:
|
42
|
-
raise dramatiq.middleware.SkipMessage(
|
43
|
-
f"Task {task_id} is already being processed."
|
44
|
-
)
|
45
|
-
except Exception as e:
|
46
|
-
logger.exception(e)
|
47
|
-
raise e
|
48
|
-
|
49
|
-
def after_process_message(self, broker, message, *, result=None, exception=None):
|
50
|
-
"""Releases lock for the message that just finished."""
|
51
|
-
dedupe_task_key = message.options.get("dedupe_task_key", None)
|
52
|
-
dedupe_lock_key = message.options.get("dedupe_lock_key", None)
|
53
|
-
if not dedupe_lock_key or not dedupe_task_key:
|
54
|
-
logger.warning("unexpected in after_process_message: dedupe task or lock key not in message")
|
55
|
-
return
|
56
|
-
if dedupe_lock_key in self.active_locks:
|
57
|
-
try:
|
58
|
-
lock = self.active_locks[dedupe_lock_key]
|
59
|
-
lock.decrement_task_count()
|
60
|
-
lock.release_lock()
|
61
|
-
del self.active_locks[dedupe_lock_key]
|
62
|
-
except Exception as e:
|
63
|
-
logger.info(
|
64
|
-
f"Exception while trying to release lock {dedupe_lock_key}: {e}"
|
65
|
-
)
|
66
|
-
raise e
|
67
|
-
else:
|
68
|
-
lock = LockingParallelBarrier(
|
69
|
-
self.redis_url,
|
70
|
-
task_key=dedupe_task_key,
|
71
|
-
lock_key=dedupe_lock_key,
|
72
|
-
)
|
73
|
-
lock.decrement_task_count()
|
74
|
-
lock.release_lock()
|
75
|
-
|
76
|
-
def before_worker_shutdown(self, *args):
|
77
|
-
self.release_all_locks()
|
78
|
-
|
79
|
-
def before_worker_thread_shutdown(self, *args):
|
80
|
-
self.release_all_locks()
|
81
|
-
|
82
|
-
def release_all_locks(self, *args):
|
83
|
-
"""Release all locks when the worker shuts down."""
|
84
|
-
for lock_key, lock in self.active_locks.items():
|
85
|
-
try:
|
86
|
-
lock.decrement_task_count()
|
87
|
-
lock.release_lock()
|
88
|
-
except Exception as e:
|
89
|
-
logger.info(f"Exception while trying to release lock {lock_key}: {e}")
|
90
|
-
raise e
|
91
|
-
self.active_locks.clear()
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|