PyPI - scalable-pypeline - Versions diffs - 2.1.3__py2.py3-none-any.whl → 2.1.5__py2.py3-none-any.whl - Mend

scalable-pypeline 2.1.3py2.py3-none-any.whl → 2.1.5py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

pypeline/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2.1.3"
1	+ __version__ = "2.1.5"

pypeline/constants.py CHANGED Viewed

@@ -36,6 +36,7 @@ DEFAULT_BROKER_CONNECTION_ATTEMPTS = int(
 DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT = int(
     os.getenv("DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT", 30)
 )
+MESSAGE_BROKER = os.getenv("MESSAGE_BROKER", "RABBITMQ")
 MS_IN_SECONDS = 1000
 API_PATH_V1 = "/api/v1"

pypeline/dramatiq.py CHANGED Viewed

@@ -5,6 +5,8 @@ import logging
 import click
 from urllib.parse import urlparse
+from dramatiq.brokers.redis import RedisBroker
 from pypeline.extensions import pypeline_config
 from warnings import warn
 from apscheduler.schedulers.blocking import BlockingScheduler
@@ -31,12 +33,10 @@ from pypeline.constants import (
     DEFAULT_BROKER_CONNECTION_HEARTBEAT,
     DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
     DEFAULT_BROKER_CONNECTION_ATTEMPTS,
+    MESSAGE_BROKER,
 )
 from pypeline.pipelines.middleware.parallel_pipeline_middleware import ParallelPipeline
 from pypeline.pipelines.middleware.pypeline_middleware import PypelineMiddleware
-from pypeline.pipelines.middleware.deduplication_middleware import (
-    DeduplicationMiddleware,
-)
 from pypeline.utils.config_utils import (
     retrieve_latest_schedule_config,
     get_service_config_for_worker,
@@ -56,27 +56,32 @@ logger = logging.getLogger(__name__)
 def configure_default_broker(broker: Broker = None):
     redis_backend = RedisBackend(url=REDIS_URL)
-    parsed_url = urlparse(RABBIT_URL)
-    credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
-    rabbit_broker = (
-        broker
-        if broker is not None
-        else RabbitmqBroker(
-            host=parsed_url.hostname,
-            port=parsed_url.port,
-            credentials=credentials,
-            heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
-            connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
-            blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
+    if MESSAGE_BROKER == "RABBITMQ":
+        parsed_url = urlparse(RABBIT_URL)
+        credentials = pika.PlainCredentials(parsed_url.username, parsed_url.password)
+        broker = (
+            broker
+            if broker is not None
+            else RabbitmqBroker(
+                host=parsed_url.hostname,
+                port=parsed_url.port,
+                credentials=credentials,
+                heartbeat=DEFAULT_BROKER_CONNECTION_HEARTBEAT,
+                connection_attempts=DEFAULT_BROKER_CONNECTION_ATTEMPTS,
+                blocked_connection_timeout=DEFAULT_BROKER_BLOCKED_CONNECTION_TIMEOUT,
+            )
         )
-    )
-    rabbit_broker.add_middleware(Results(backend=redis_backend))
-    rabbit_broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
-    rabbit_broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
-    rabbit_broker.add_middleware(CurrentMessage())
-    register_actors_for_workers(rabbit_broker)
-    rabbit_broker.add_middleware(DeduplicationMiddleware(redis_url=REDIS_URL))
-    set_broker(rabbit_broker)
+    elif MESSAGE_BROKER == "REDIS":
+        broker = broker if broker is not None else RedisBroker(url=REDIS_URL)
+    broker.add_middleware(Results(backend=redis_backend))
+    broker.add_middleware(ParallelPipeline(redis_url=REDIS_URL))
+    broker.add_middleware(PypelineMiddleware(redis_url=REDIS_URL))
+    broker.add_middleware(CurrentMessage())
+    register_actors_for_workers(broker)
+    set_broker(broker)
 def register_actors_for_workers(broker: Broker):

pypeline/flask/api/pipelines.py CHANGED Viewed

@@ -56,26 +56,6 @@ class InvokePipelineSchema(Schema):
         example={"document_id": "123", "send_alert": True},
         required=False,
     )
-    settings = fields.Raw(
-        description="Payload contains settings for a given pipeline",
-        example={
-            "param1": "Dataset",
-            "param2": 1,
-            "param3": 2,
-        },
-        required=False,
-    )
-    task_replacements = fields.Raw(
-        description="A dictionary of task definitions as the key and the value of the index for which handler"
-        " should be executed.  If none provided it will default to the first handler in the list at index position 0.",
-        example={
-            "a": 1,
-            "b": 3,
-        },
-        required=False,
-    )
     scenarios = fields.List(
         fields.Nested(PipelineScenarioSchema),
         metadata={"description": "List of scenarios to run for a given pipeline"},
@@ -198,20 +178,16 @@ class PipelineInvoke(MethodView):
         retval = {"pipeline_id": pipeline_id, "status": "starting"}
         try:
             chain_payload = payload.get("chain_payload", {})
-            settings = payload.get("settings", None)
-            task_replacements = payload.get("task_replacements", {})
             scenarios = payload.get("scenarios", [])
             if pipeline_config["schemaVersion"] == 1:
                 pipeline = dag_generator(
                     pipeline_id=pipeline_id,
                     event=chain_payload,
                 )
-            elif pipeline_config["schemaVersion"] == 2 and task_replacements:
+            elif pipeline_config["schemaVersion"] == 2:
                 pipeline = dag_generator(
                     pipeline_id=pipeline_id,
-                    task_replacements=task_replacements,
                     scenarios=scenarios,
-                    settings=settings,
                 )
                 retval["scenarios"] = pipeline.scenarios
             pipeline.run()

pypeline/pipelines/composition/pypeline_composition.py CHANGED Viewed

@@ -17,8 +17,6 @@ class Pypeline:
     def __init__(
         self,
         pipeline: dict,
-        pipeline_settings: dict = None,
-        task_replacements: dict = {},
         scenarios: dict = {},
         broker=None,
         execution_id=None,
@@ -26,11 +24,8 @@ class Pypeline:
         # Construct initial properties
         self.pipeline = pipeline
         self.broker = broker or get_broker()
-        self.execution_id = execution_id or str(uuid4())
         self._starting_messages = []
         self.scenarios = scenarios
-        self.pipeline_settings = pipeline_settings
-        self.task_replacements = task_replacements
         # Get pipeline dag graph and find first task
         pipeline_config = pipeline["config"]
@@ -39,8 +34,19 @@ class Pypeline:
         task_definitions = pipeline_config["taskDefinitions"]
         first_task = list(pipeline_config["dagAdjacency"].keys())[0]
+        base_case_execution_id = None
         # Process the scenarios one by one
         for scenario in self.scenarios:
+            # The first scenario is the base case and always runs
+            if self.scenarios.index(scenario) == 0:
+                base_case_execution_id = scenario.get("execution_id", None) or str(
+                    uuid4()
+                )
+                scenario["execution_id"] = base_case_execution_id
+                scenario["base_case_execution_id"] = base_case_execution_id
+                scenario["tasksToRunInScenario"] = list(self.graph.nodes)
+                continue
             tasks_in_reruns = scenario["taskReruns"]
             # Find any tasks that have replacements for this scenario
@@ -62,6 +68,7 @@ class Pypeline:
                 tasks_to_be_rerun_in_scenario
             )
             scenario["tasksToRunInScenario"] = tasks_to_be_rerun_in_scenario
+            scenario["base_case_execution_id"] = base_case_execution_id
             scenario["execution_id"] = scenario.get("execution_id", None) or str(
                 uuid4()
             )
@@ -78,22 +85,31 @@ class Pypeline:
                 )
                 message = lazy_actor.message()
                 message.options["pipeline"] = pipeline
-                message.options["task_replacements"] = self.task_replacements
+                message.options["task_replacements"] = copy(
+                    scenario["taskReplacements"]
+                )
                 message.options["execution_id"] = scenario["execution_id"]
                 message.options["task_name"] = first_task
-                message.options["root_execution_id"] = self.execution_id
-                if self.pipeline_settings:
-                    message.kwargs["settings"] = copy(self.pipeline_settings)
+                message.options["base_case_execution_id"] = base_case_execution_id
+                if scenario["settings"]:
+                    message.kwargs["settings"] = copy(scenario["settings"])
                     message.kwargs["settings"]["execution_id"] = scenario[
                         "execution_id"
                     ]
+                    message.kwargs["settings"][
+                        "base_case_execution_id"
+                    ] = base_case_execution_id
                 self._starting_messages.append(message)
         for m in self._starting_messages:
             m.options["scenarios"] = self.scenarios
+        # Run the first task of the first scenario no matter what
+        first_scenario_task_replacements = scenarios[0]["taskReplacements"]
+        first_scenario_settings = scenarios[0].get("settings", None)
         handler = task_definitions[first_task]["handlers"][
-            self.task_replacements.get(first_task, 0)
+            first_scenario_task_replacements.get(first_task, 0)
         ]
         lazy_actor = register_lazy_actor(
             self.broker,
@@ -102,15 +118,18 @@ class Pypeline:
         )
         message = lazy_actor.message()
         message.options["pipeline"] = pipeline
-        message.options["task_replacements"] = self.task_replacements
-        message.options["execution_id"] = self.execution_id
+        message.options["task_replacements"] = first_scenario_task_replacements
+        message.options["execution_id"] = base_case_execution_id
         message.options["task_name"] = first_task
         message.options["scenarios"] = self.scenarios
-        message.options["root_execution_id"] = self.execution_id
+        message.options["base_case_execution_id"] = base_case_execution_id
-        if self.pipeline_settings:
-            message.kwargs["settings"] = copy(self.pipeline_settings)
-            message.kwargs["settings"]["execution_id"] = self.execution_id
+        if first_scenario_settings:
+            message.kwargs["settings"] = copy(first_scenario_settings)
+            message.kwargs["settings"]["execution_id"] = base_case_execution_id
+            message.kwargs["settings"][
+                "base_case_execution_id"
+            ] = base_case_execution_id
         self._starting_messages.append(message)
@@ -120,7 +139,9 @@ class Pypeline:
                 f"{message.options['execution_id']}-{message.options['task_name']}"
             )
             locking_parallel_barrier = LockingParallelBarrier(
-                REDIS_URL, task_key=task_key, lock_key=f"{self.execution_id}-lock"
+                REDIS_URL,
+                task_key=task_key,
+                lock_key=f"{message.options['base_case_execution_id']}-lock",
             )
             locking_parallel_barrier.set_task_count(1)
             self.broker.enqueue(message, delay=delay)
@@ -131,36 +152,39 @@ class Pypeline:
         return self.number_of_tasks
     def completed(self):
-        redis_task_keys = [
-            f"{self.execution_id}-{node}" for node in list(self.graph.nodes)
-        ]
-        redis_lock_key = f"{self.execution_id}-lock"
-        for scenario in self.scenarios:
-            scenario_task_keys = [
-                f"{scenario['execution_id']}-{task}"
-                for task in scenario["tasksToRunInScenario"]
-            ]
-            redis_task_keys = redis_task_keys + scenario_task_keys
+        locks = []
-        for task_key in redis_task_keys:
-            locking_parallel_barrier = LockingParallelBarrier(
-                REDIS_URL, task_key=task_key, lock_key=redis_lock_key
+        for scenario in self.scenarios:
+            locks.append(
+                {
+                    "scenario_task_keys": [
+                        f"{scenario['execution_id']}-{task}"
+                        for task in scenario["tasksToRunInScenario"]
+                    ],
+                    "redis_lock_key": f"{scenario['base_case_execution_id']}-lock",
+                }
             )
-            try:
-                locking_parallel_barrier.acquire_lock(
-                    timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
+        for lock in locks:
+            for task_key in lock["scenario_task_keys"]:
+                locking_parallel_barrier = LockingParallelBarrier(
+                    REDIS_URL, task_key=task_key, lock_key=lock["redis_lock_key"]
                 )
-                task_complete = True
-                if locking_parallel_barrier.task_exists():
-                    remaining_tasks = locking_parallel_barrier.get_task_count()
-                    if remaining_tasks >= 1:
+                try:
+                    locking_parallel_barrier.acquire_lock(
+                        timeout=PARALLEL_PIPELINE_CALLBACK_BARRIER_TTL
+                    )
+                    task_complete = True
+                    if locking_parallel_barrier.task_exists():
+                        remaining_tasks = locking_parallel_barrier.get_task_count()
+                        if remaining_tasks >= 1:
+                            task_complete = False
+                    else:
                         task_complete = False
-                else:
-                    task_complete = False
-            finally:
-                locking_parallel_barrier.release_lock()
-            if not task_complete:
-                return task_complete
+                finally:
+                    locking_parallel_barrier.release_lock()
+                if not task_complete:
+                    return task_complete
         return True
@@ -168,10 +192,7 @@ class Pypeline:
         return json.dumps(
             {
                 "pipeline": self.pipeline,
-                "pipeline_settings": self.pipeline_settings,
-                "task_replacements": self.task_replacements,
                 "scenarios": self.scenarios,
-                "execution_id": self.execution_id,
             }
         )
@@ -181,8 +202,5 @@ class Pypeline:
         return cls(
             data["pipeline"],
-            pipeline_settings=data["pipeline_settings"],
-            task_replacements=data["task_replacements"],
             scenarios=data["scenarios"],
-            execution_id=data["execution_id"],
         )

pypeline/pipelines/factory.py CHANGED Viewed

@@ -20,11 +20,7 @@ from pypeline.utils.pipeline_utils import (
 def dag_generator(
-    pipeline_id: str,
-    task_replacements: dict = {},
-    scenarios: typing.List[typing.Dict] = [],
-    *args,
-    **kwargs
+    pipeline_id: str, scenarios: typing.List[typing.Dict] = [], *args, **kwargs
 ) -> typing.Union[parallel_pipeline, Pypeline]:
     """Generates a pipeline dag from a pre-defined pipeline yaml
@@ -43,40 +39,19 @@ def dag_generator(
     broker.actors.clear()
     if pipeline["schemaVersion"] == 2:
-        # If the pipeline_config expects settings ensure we have them
-        if (
-            "settings" in pipeline_config
-            and len(pipeline_config["settings"]["required"]) > 0
-            and "settings" not in kwargs
-        ):
-            raise MissingSettingsException()
+        supplied_pipeline_settings_schema = create_pipeline_settings_schema(
+            pipeline_config["settings"]
+        )
-        # If we're here we expect to  have settings.  Pop them out of kwargs to validate
-        inputted_settings = kwargs.pop("settings", {})
-        if "settings" in pipeline_config:
-            supplied_pipeline_settings_schema = create_pipeline_settings_schema(
-                pipeline_config["settings"]
-            )
-            # Validate scenarios settings to make sure they look okay
-            validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
+        # Validate scenarios settings to make sure they look okay
+        validated_scenarios = PipelineScenarioSchema(many=True).load(scenarios)
-            for scenario in validated_scenarios:
-                supplied_pipeline_settings_schema.load(scenario["settings"])
+        for scenario in validated_scenarios:
+            supplied_pipeline_settings_schema.load(scenario["settings"])
-            validated_settings = supplied_pipeline_settings_schema.load(
-                inputted_settings
-            )
-            p = Pypeline(
-                pipeline,
-                pipeline_settings=validated_settings,
-                task_replacements=task_replacements,
-                scenarios=scenarios,
-                broker=broker,
-            )
-        else:
-            p = Pypeline(pipeline, task_replacements=task_replacements, broker=broker)
+        p = Pypeline(pipeline, scenarios=scenarios, broker=broker)
         return p
     graph = get_execution_graph(pipeline_config)
     optimal_execution_graph = topological_sort_with_parallelism(graph.copy())
     registered_actors: typing.Dict[str, LazyActor] = {}

pypeline/pipelines/middleware/pypeline_middleware.py CHANGED Viewed

@@ -35,7 +35,7 @@ class PypelineMiddleware(Middleware):
         locking_parallel_barrier = LockingParallelBarrier(
             self.redis_url,
             task_key=task_key,
-            lock_key=f"{message.options['root_execution_id']}-lock",
+            lock_key=f"{message.options['base_case_execution_id']}-lock",
         )
         try:
             locking_parallel_barrier.acquire_lock(
@@ -60,7 +60,7 @@ class PypelineMiddleware(Middleware):
                 locking_parallel_barrier = LockingParallelBarrier(
                     self.redis_url,
                     task_key=task_key,
-                    lock_key=f"{message.options['root_execution_id']}-lock",
+                    lock_key=f"{message.options['base_case_execution_id']}-lock",
                 )
                 try:
                     locking_parallel_barrier.acquire_lock(
@@ -75,12 +75,12 @@ class PypelineMiddleware(Middleware):
                     locking_parallel_barrier.release_lock()
                 if not remaining_tasks:
-                    task_key = f"{message.options['root_execution_id']}-{ancestor}"
+                    task_key = f"{message.options['base_case_execution_id']}-{ancestor}"
                     locking_parallel_barrier = LockingParallelBarrier(
                         self.redis_url,
                         task_key=task_key,
-                        lock_key=f"{message.options['root_execution_id']}-lock",
+                        lock_key=f"{message.options['base_case_execution_id']}-lock",
                     )
                     try:
                         locking_parallel_barrier.acquire_lock(
@@ -103,7 +103,10 @@ class PypelineMiddleware(Middleware):
             if not ancestor_tasks_complete:
                 break
-            if message.options["root_execution_id"] == message.options["execution_id"]:
+            if (
+                message.options["base_case_execution_id"]
+                == message.options["execution_id"]
+            ):
                 for scenario in message.options["scenarios"]:
                     child_predecessors = list(graph.predecessors(child))
                     if (
@@ -115,7 +118,7 @@ class PypelineMiddleware(Middleware):
                         locking_parallel_barrier = LockingParallelBarrier(
                             self.redis_url,
                             task_key=task_key,
-                            lock_key=f"{message.options['root_execution_id']}-lock",
+                            lock_key=f"{message.options['base_case_execution_id']}-lock",
                         )
                         locking_parallel_barrier.set_task_count(1)
                         handler = task_definitions[child]["handlers"][
@@ -137,9 +140,9 @@ class PypelineMiddleware(Middleware):
                         ]
                         scenario_message.options["task_name"] = child
-                        scenario_message.options["root_execution_id"] = message.options[
-                            "root_execution_id"
-                        ]
+                        scenario_message.options["base_case_execution_id"] = (
+                            message.options["base_case_execution_id"]
+                        )
                         scenario_message.options["scenarios"] = message.options[
                             "scenarios"
                         ]
@@ -155,7 +158,7 @@ class PypelineMiddleware(Middleware):
             locking_parallel_barrier = LockingParallelBarrier(
                 self.redis_url,
                 task_key=task_key,
-                lock_key=f"{message.options['root_execution_id']}-lock",
+                lock_key=f"{message.options['base_case_execution_id']}-lock",
             )
             locking_parallel_barrier.set_task_count(1)
             handler = task_definitions[child]["handlers"][
@@ -172,8 +175,8 @@ class PypelineMiddleware(Middleware):
             child_message.options["task_replacements"] = task_replacements
             child_message.options["execution_id"] = execution_id
             child_message.options["task_name"] = child
-            child_message.options["root_execution_id"] = message.options[
-                "root_execution_id"
+            child_message.options["base_case_execution_id"] = message.options[
+                "base_case_execution_id"
             ]
             child_message.options["scenarios"] = message.options["scenarios"]
             if "settings" in message.kwargs:

{scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: scalable-pypeline
-Version: 2.1.3
+Version: 2.1.5
 Summary: PypeLine - Python pipelines for the Real World
 Home-page: https://gitlab.com/bravos2/pypeline
 Author: Bravos Power Corporation

{scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
-pypeline/__init__.py,sha256=-5z5R8xV0UToQjp9-3ipF_dBiBdRXtdotx4_h9ZJZT8,22
+pypeline/__init__.py,sha256=Ol8KeLnnX1kAXAFgJsZj4d_cZMypHQtaiyICJpuzp64,22
 pypeline/barrier.py,sha256=oO964l9qOCOibweOHyNivmAvufdXOke9nz2tdgclouo,1172
-pypeline/constants.py,sha256=coiF8dMP25qIwoNYSnS7oy7hCd4-5yqPFmdPsN93Q1A,2892
-pypeline/dramatiq.py,sha256=TGwXWSInpCPFtYC5C-Omc2sEk5ecpsOG8xHH_mx9WTo,12451
+pypeline/constants.py,sha256=415-5fTJQXPO4by14T4BBC6hOn11m96XFiAHSh9Sfxo,2949
+pypeline/dramatiq.py,sha256=NyNwAw4iibWnS5GhTVQWxAOfBj3VXkfgSliilMa4ajg,12501
 pypeline/extensions.py,sha256=BzOTnXhNxap3N7uIUUh_hO6dDwx08Vc_RJDE93_K0Lo,610
 pypeline/pipeline_config_schema.py,sha256=hK2_egtg-YFx_XJDs_NyrOTGKkel7W83X-G0sic52sM,10592
 pypeline/pipeline_settings_schema.py,sha256=84AuNFYsOUpoADsjEo_n9T6Ica-c21oK_V9s15I4lCg,20212
@@ -11,17 +11,16 @@ pypeline/flask/__init__.py,sha256=AdljRh0lMiS8ExgDmgzObwVs8jW7hqQuf83Ml8kn8GQ,49
 pypeline/flask/decorators.py,sha256=ki6jkjZwbDbCWuj7ET7N-ncZwrASp4Fy7257WIYiAAQ,1102
 pypeline/flask/flask_pypeline.py,sha256=Uqyu3PnSP3DoVZUJPqV9chjT4xdRgvcL3OMXxkbdTEg,5490
 pypeline/flask/api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pypeline/flask/api/pipelines.py,sha256=8Y5dkIVb32dMc0jBI7lB2sQgsAIe7WYmPn-G9tlUY5o,10161
+pypeline/flask/api/pipelines.py,sha256=lw1ggsjp_Iha5MhyQGHtVW0akpVJnxIk0hn6NkC3c8s,9314
 pypeline/flask/api/schedules.py,sha256=8PKCMdPucaer8opchNlI5aDssK2UqT79hHpeg5BMtTA,1210
 pypeline/pipelines/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pypeline/pipelines/factory.py,sha256=4HNGUJzYtgBOWP7fStXF0M61CYNAid9l9PGru9HyhXA,4115
+pypeline/pipelines/factory.py,sha256=356v1S0WPvDkd9f0fKk0H9aXVbOqQYSWt47aOl66EKk,3172
 pypeline/pipelines/composition/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pypeline/pipelines/composition/parallel_pipeline_composition.py,sha256=pTw9Xb9h4JnV4siFc3JStm5lB-i9djUADo3Kh5K3s7g,12976
-pypeline/pipelines/composition/pypeline_composition.py,sha256=ieTuQZ8zxTtvmPEkrWFbItjGtvO3JUotXcR-Jim2mss,7204
+pypeline/pipelines/composition/pypeline_composition.py,sha256=UBuDKEfRoIbL-9c-HH2ZTVbzfkwFSlNoFH-AVNqt0QE,7965
 pypeline/pipelines/middleware/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-pypeline/pipelines/middleware/deduplication_middleware.py,sha256=AxlvvTRbXfMtZXW5z7P72HfF3zVM_vKd5VvZeKToZcM,3655
 pypeline/pipelines/middleware/parallel_pipeline_middleware.py,sha256=kTp6niYoe2nXIiN6EGRfdpxrJyioo0GPxDkfefbGlEk,2821
-pypeline/pipelines/middleware/pypeline_middleware.py,sha256=kvt5A9OxDwpIo0PsH11Im62tH6VquUc6OFoZDw2Gxsk,8036
+pypeline/pipelines/middleware/pypeline_middleware.py,sha256=IXVqzcOlSJ43lsn-i298RkaeygB-PTJjsvdTDtpgfwg,8141
 pypeline/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pypeline/utils/config_utils.py,sha256=rAIATyoW7kGETZ_Z2DqiXtGd7bJp5uPfcLtfNPOYsNs,2167
 pypeline/utils/dramatiq_utils.py,sha256=5GDcOvKY-8S8r---wb6Q8QAywhbKVJ-qILjcYNHei8Y,3658
@@ -29,9 +28,9 @@ pypeline/utils/module_utils.py,sha256=-yEJIukDCoXnmlZVXB6Dww25tH6GdPE5SoFqv6pfdV
 pypeline/utils/pipeline_utils.py,sha256=kGP1QwCJikGC5QNRtzRXCDVewyRMpWIqERTNnxGLlSY,4795
 pypeline/utils/schema_utils.py,sha256=Fgl0y9Cuo_TZeEx_S3gaSVnLjn6467LTkjb2ek7Ms98,851
 tests/fixtures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-scalable_pypeline-2.1.3.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
-scalable_pypeline-2.1.3.dist-info/METADATA,sha256=h_svvRncrZllbvgkpsYcD_Wd-bIGWGAq_hC4Hj7kCGk,5926
-scalable_pypeline-2.1.3.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
-scalable_pypeline-2.1.3.dist-info/entry_points.txt,sha256=uWs10ODfHSBKo2Cx_QaUjPHQTpZ3e77j9VlAdRRmMyg,119
-scalable_pypeline-2.1.3.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
-scalable_pypeline-2.1.3.dist-info/RECORD,,
+scalable_pypeline-2.1.5.dist-info/LICENSE,sha256=DVQuDIgE45qn836wDaWnYhSdxoLXgpRRKH4RuTjpRZQ,10174
+scalable_pypeline-2.1.5.dist-info/METADATA,sha256=Fsm58uF_UU13xtUbVLq1bGtxPkwIZRnOvwbpB766URM,5926
+scalable_pypeline-2.1.5.dist-info/WHEEL,sha256=bb2Ot9scclHKMOLDEHY6B2sicWOgugjFKaJsT7vwMQo,110
+scalable_pypeline-2.1.5.dist-info/entry_points.txt,sha256=uWs10ODfHSBKo2Cx_QaUjPHQTpZ3e77j9VlAdRRmMyg,119
+scalable_pypeline-2.1.5.dist-info/top_level.txt,sha256=C7dpkEOc_-nnsAQb28BfQknjD6XHRyS9ZrvVeoIbV7s,15
+scalable_pypeline-2.1.5.dist-info/RECORD,,

pypeline/pipelines/middleware/deduplication_middleware.py DELETED Viewed

@@ -1,94 +0,0 @@
-import dramatiq
-import signal
-from dramatiq.middleware import Middleware
-from pypeline.barrier import LockingParallelBarrier
-from pypeline.constants import DEFAULT_TASK_TTL
-import logging
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-class DeduplicationMiddleware(Middleware):
-    def __init__(self, redis_url="redis://localhost:6379/0"):
-        self.redis_url = redis_url
-        self.active_locks = {}
-    def before_process_message(self, broker, message):
-        task_id = message.message_id
-        task_key = f"dramatiq:task_counter:{task_id}"
-        lock_key = f"dramatiq:lock:{task_id}"
-        try:
-            # Try to acquire a lock for the task
-            locking_parallel_barrier = LockingParallelBarrier(
-                self.redis_url,
-                task_key=task_key,
-                lock_key=lock_key,
-            )
-            if (
-                locking_parallel_barrier.get_task_count() > 0
-                or not locking_parallel_barrier.acquire_lock(timeout=DEFAULT_TASK_TTL)
-            ):
-                logger.info(f"Found duplicate task {task_id}.  Skipping...")
-                raise dramatiq.middleware.SkipMessage(
-                    f"Task {task_id} is already being processed."
-                )
-            locking_parallel_barrier.set_task_count(1)
-            # Store the lock reference in the message and track it globally
-            message.options["dedupe_task_key"] = task_key
-            message.options["dedupe_lock_key"] = lock_key
-            self.active_locks[lock_key] = locking_parallel_barrier
-        except dramatiq.middleware.SkipMessage:
-            raise dramatiq.middleware.SkipMessage(
-                f"Task {task_id} is already being processed."
-            )
-        except Exception as e:
-            logger.exception(e)
-            raise e
-    def after_process_message(self, broker, message, *, result=None, exception=None):
-        """Releases lock for the message that just finished."""
-        dedupe_task_key = message.options.get("dedupe_task_key", None)
-        dedupe_lock_key = message.options.get("dedupe_lock_key", None)
-        if not dedupe_lock_key or not dedupe_task_key:
-            logger.warning(
-                "unexpected in after_process_message: dedupe task or lock key not in message"
-            )
-            return
-        if dedupe_lock_key in self.active_locks:
-            try:
-                lock = self.active_locks[dedupe_lock_key]
-                lock.decrement_task_count()
-                lock.release_lock()
-                del self.active_locks[dedupe_lock_key]
-            except Exception as e:
-                logger.info(
-                    f"Exception while trying to release lock {dedupe_lock_key}: {e}"
-                )
-                raise e
-        else:
-            lock = LockingParallelBarrier(
-                self.redis_url,
-                task_key=dedupe_task_key,
-                lock_key=dedupe_lock_key,
-            )
-            lock.decrement_task_count()
-            lock.release_lock()
-    def before_worker_shutdown(self, *args):
-        self.release_all_locks()
-    def before_worker_thread_shutdown(self, *args):
-        self.release_all_locks()
-    def release_all_locks(self, *args):
-        """Release all locks when the worker shuts down."""
-        for lock_key, lock in self.active_locks.items():
-            try:
-                lock.decrement_task_count()
-                lock.release_lock()
-            except Exception as e:
-                logger.info(f"Exception while trying to release lock {lock_key}: {e}")
-                raise e
-        self.active_locks.clear()

{scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{scalable_pypeline-2.1.3.dist-info → scalable_pypeline-2.1.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

scalable-pypeline 2.1.3__py2.py3-none-any.whl → 2.1.5__py2.py3-none-any.whl

scalable-pypeline 2.1.3py2.py3-none-any.whl → 2.1.5py2.py3-none-any.whl