scalable-pypeline 2.1.1__tar.gz → 2.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {scalable-pypeline-2.1.1/scalable_pypeline.egg-info → scalable-pypeline-2.1.3}/PKG-INFO +1 -1
  2. scalable-pypeline-2.1.3/pypeline/__init__.py +1 -0
  3. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/dramatiq.py +4 -0
  4. scalable-pypeline-2.1.3/pypeline/pipelines/middleware/deduplication_middleware.py +94 -0
  5. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3/scalable_pypeline.egg-info}/PKG-INFO +1 -1
  6. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/scalable_pypeline.egg-info/SOURCES.txt +1 -0
  7. scalable-pypeline-2.1.1/pypeline/__init__.py +0 -1
  8. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/LICENSE +0 -0
  9. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/MANIFEST.in +0 -0
  10. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/README.md +0 -0
  11. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/barrier.py +0 -0
  12. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/constants.py +0 -0
  13. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/extensions.py +0 -0
  14. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/flask/__init__.py +0 -0
  15. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/flask/api/__init__.py +0 -0
  16. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/flask/api/pipelines.py +0 -0
  17. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/flask/api/schedules.py +0 -0
  18. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/flask/decorators.py +0 -0
  19. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/flask/flask_pypeline.py +0 -0
  20. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipeline_config_schema.py +0 -0
  21. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipeline_settings_schema.py +0 -0
  22. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/__init__.py +0 -0
  23. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/composition/__init__.py +0 -0
  24. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/composition/parallel_pipeline_composition.py +0 -0
  25. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/composition/pypeline_composition.py +0 -0
  26. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/factory.py +0 -0
  27. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/middleware/__init__.py +0 -0
  28. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/middleware/parallel_pipeline_middleware.py +0 -0
  29. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pipelines/middleware/pypeline_middleware.py +0 -0
  30. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/pypeline_yaml.py +0 -0
  31. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/schedule_config_schema.py +0 -0
  32. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/utils/__init__.py +0 -0
  33. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/utils/config_utils.py +0 -0
  34. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/utils/dramatiq_utils.py +0 -0
  35. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/utils/module_utils.py +0 -0
  36. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/utils/pipeline_utils.py +0 -0
  37. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/pypeline/utils/schema_utils.py +0 -0
  38. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/requirements.txt +0 -0
  39. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/scalable_pypeline.egg-info/dependency_links.txt +0 -0
  40. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/scalable_pypeline.egg-info/entry_points.txt +0 -0
  41. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/scalable_pypeline.egg-info/requires.txt +0 -0
  42. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/scalable_pypeline.egg-info/top_level.txt +0 -0
  43. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/setup.cfg +0 -0
  44. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/setup.py +0 -0
  45. {scalable-pypeline-2.1.1 → scalable-pypeline-2.1.3}/tests/fixtures/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scalable-pypeline
3
- Version: 2.1.1
3
+ Version: 2.1.3
4
4
  Summary: PypeLine - Python pipelines for the Real World
5
5
  Home-page: https://gitlab.com/bravos2/pypeline
6
6
  Author: Bravos Power Corporation
@@ -0,0 +1 @@
1
+ __version__ = "2.1.3"
@@ -34,6 +34,9 @@ from pypeline.constants import (
34
34
  )
35
35
  from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
36
36
  from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
37
+ from pypeline.pipelines.middleware.deduplication_middleware import (
38
+ DeduplicationMiddleware,
39
+ )
37
40
  from pypeline.utils.config_utils import (
38
41
  retrieve_latest_schedule_config,
39
42
  get_service_config_for_worker,
@@ -72,6 +75,7 @@ def configure_default_broker(broker: Broker = None):
72
75
  rabbit_broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
73
76
  rabbit_broker.add_middleware(CurrentMessage())
74
77
  register_actors_for_workers(rabbit_broker)
78
+ rabbit_broker.add_middleware(DeduplicationMiddleware(redis_url=REDIS_URL))
75
79
  set_broker(rabbit_broker)
76
80
 
77
81
 
@@ -0,0 +1,94 @@
1
+ import dramatiq
2
+ import signal
3
+ from dramatiq.middleware import Middleware
4
+ from pypeline.barrier import LockingParallelBarrier
5
+ from pypeline.constants import DEFAULT_TASK_TTL
6
+ import logging
7
+
8
+ logging.basicConfig(level=logging.INFO)
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class DeduplicationMiddleware(Middleware):
13
+ def __init__(self, redis_url="redis://localhost:6379/0"):
14
+ self.redis_url = redis_url
15
+ self.active_locks = {}
16
+
17
+ def before_process_message(self, broker, message):
18
+ task_id = message.message_id
19
+ task_key = f"dramatiq:task_counter:{task_id}"
20
+ lock_key = f"dramatiq:lock:{task_id}"
21
+ try:
22
+ # Try to acquire a lock for the task
23
+ locking_parallel_barrier = LockingParallelBarrier(
24
+ self.redis_url,
25
+ task_key=task_key,
26
+ lock_key=lock_key,
27
+ )
28
+ if (
29
+ locking_parallel_barrier.get_task_count() > 0
30
+ or not locking_parallel_barrier.acquire_lock(timeout=DEFAULT_TASK_TTL)
31
+ ):
32
+ logger.info(f"Found duplicate task {task_id}. Skipping...")
33
+ raise dramatiq.middleware.SkipMessage(
34
+ f"Task {task_id} is already being processed."
35
+ )
36
+
37
+ locking_parallel_barrier.set_task_count(1)
38
+ # Store the lock reference in the message and track it globally
39
+ message.options["dedupe_task_key"] = task_key
40
+ message.options["dedupe_lock_key"] = lock_key
41
+ self.active_locks[lock_key] = locking_parallel_barrier
42
+ except dramatiq.middleware.SkipMessage:
43
+ raise dramatiq.middleware.SkipMessage(
44
+ f"Task {task_id} is already being processed."
45
+ )
46
+ except Exception as e:
47
+ logger.exception(e)
48
+ raise e
49
+
50
+ def after_process_message(self, broker, message, *, result=None, exception=None):
51
+ """Releases lock for the message that just finished."""
52
+ dedupe_task_key = message.options.get("dedupe_task_key", None)
53
+ dedupe_lock_key = message.options.get("dedupe_lock_key", None)
54
+ if not dedupe_lock_key or not dedupe_task_key:
55
+ logger.warning(
56
+ "unexpected in after_process_message: dedupe task or lock key not in message"
57
+ )
58
+ return
59
+ if dedupe_lock_key in self.active_locks:
60
+ try:
61
+ lock = self.active_locks[dedupe_lock_key]
62
+ lock.decrement_task_count()
63
+ lock.release_lock()
64
+ del self.active_locks[dedupe_lock_key]
65
+ except Exception as e:
66
+ logger.info(
67
+ f"Exception while trying to release lock {dedupe_lock_key}: {e}"
68
+ )
69
+ raise e
70
+ else:
71
+ lock = LockingParallelBarrier(
72
+ self.redis_url,
73
+ task_key=dedupe_task_key,
74
+ lock_key=dedupe_lock_key,
75
+ )
76
+ lock.decrement_task_count()
77
+ lock.release_lock()
78
+
79
+ def before_worker_shutdown(self, *args):
80
+ self.release_all_locks()
81
+
82
+ def before_worker_thread_shutdown(self, *args):
83
+ self.release_all_locks()
84
+
85
+ def release_all_locks(self, *args):
86
+ """Release all locks when the worker shuts down."""
87
+ for lock_key, lock in self.active_locks.items():
88
+ try:
89
+ lock.decrement_task_count()
90
+ lock.release_lock()
91
+ except Exception as e:
92
+ logger.info(f"Exception while trying to release lock {lock_key}: {e}")
93
+ raise e
94
+ self.active_locks.clear()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: scalable-pypeline
3
- Version: 2.1.1
3
+ Version: 2.1.3
4
4
  Summary: PypeLine - Python pipelines for the Real World
5
5
  Home-page: https://gitlab.com/bravos2/pypeline
6
6
  Author: Bravos Power Corporation
@@ -25,6 +25,7 @@ pypeline/pipelines/composition/__init__.py
25
25
  pypeline/pipelines/composition/parallel_pipeline_composition.py
26
26
  pypeline/pipelines/composition/pypeline_composition.py
27
27
  pypeline/pipelines/middleware/__init__.py
28
+ pypeline/pipelines/middleware/deduplication_middleware.py
28
29
  pypeline/pipelines/middleware/parallel_pipeline_middleware.py
29
30
  pypeline/pipelines/middleware/pypeline_middleware.py
30
31
  pypeline/utils/__init__.py
@@ -1 +0,0 @@
1
- __version__ = "2.1.1"