PyPI - pyworkflow-engine - Versions diffs - 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl - Mend

pyworkflow-engine 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

pyworkflow/__init__.py +1 -1
pyworkflow/celery/app.py +10 -0
pyworkflow/celery/singleton.py +370 -0
pyworkflow/celery/tasks.py +125 -54
pyworkflow/context/local.py +46 -0
pyworkflow/core/step.py +8 -0
pyworkflow/core/validation.py +112 -0
pyworkflow/primitives/resume_hook.py +2 -1
pyworkflow/runtime/base.py +4 -0
pyworkflow/runtime/celery.py +12 -1
pyworkflow/runtime/local.py +8 -0
pyworkflow/storage/base.py +4 -1
pyworkflow/storage/cassandra.py +30 -25
pyworkflow/storage/dynamodb.py +32 -16
pyworkflow/storage/file.py +39 -13
pyworkflow/storage/memory.py +28 -11
pyworkflow/storage/mysql.py +27 -11
pyworkflow/storage/postgres.py +29 -12
pyworkflow/storage/sqlite.py +29 -12
{pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/METADATA +1 -1
{pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/RECORD +25 -23
{pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/WHEEL +0 -0
{pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/entry_points.txt +0 -0
{pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/licenses/LICENSE +0 -0
{pyworkflow_engine-0.1.12.dist-info → pyworkflow_engine-0.1.14.dist-info}/top_level.txt +0 -0

pyworkflow/context/local.py CHANGED Viewed

@@ -114,6 +114,31 @@ class LocalContext(WorkflowContext):
             self._replay_events(event_log)
             self._is_replaying = False
+    def _extract_counter_from_id(self, id_string: str) -> int:
+        """Extract counter value from hook_id or sleep_id.
+        Formats:
+        - hook_{name}_{counter}
+        - sleep_{counter}_{duration}s
+        Args:
+            id_string: The hook_id or sleep_id string
+        Returns:
+            The counter value, or 0 if parsing fails
+        """
+        try:
+            parts = id_string.split("_")
+            if id_string.startswith("hook_"):
+                # hook_{name}_{counter} - counter is last part
+                return int(parts[-1])
+            elif id_string.startswith("sleep_"):
+                # sleep_{counter}_{duration}s - counter is second part
+                return int(parts[1])
+        except (ValueError, IndexError):
+            pass
+        return 0
     def _replay_events(self, events: list[Any]) -> None:
         """Replay events to restore state."""
         from pyworkflow.engine.events import EventType
@@ -142,6 +167,12 @@ class LocalContext(WorkflowContext):
                 payload = deserialize(event.data.get("payload"))
                 self._hook_results[hook_id] = payload
+            elif event.type == EventType.HOOK_CREATED:
+                # Track pending hooks for re-suspension
+                hook_id = event.data.get("hook_id")
+                if hook_id:
+                    self._pending_hooks[hook_id] = event.data
             elif event.type == EventType.STEP_RETRYING:
                 step_id = event.data.get("step_id")
                 self._retry_states[step_id] = {
@@ -893,6 +924,21 @@ class LocalContext(WorkflowContext):
             logger.debug(f"[replay] Hook {hook_id} already received")
             return self._hook_results[hook_id]
+        # Check if already pending (created but not yet received - replay mode)
+        # This prevents duplicate hook creation when workflow resumes
+        if hook_id in self._pending_hooks:
+            logger.debug(f"[replay] Hook {hook_id} already pending, re-suspending")
+            pending_data = self._pending_hooks[hook_id]
+            actual_token = pending_data.get("token")
+            # Call on_created callback if provided
+            if on_created is not None:
+                await on_created(actual_token)
+            raise SuspensionSignal(
+                reason=f"hook:{hook_id}",
+                hook_id=hook_id,
+                token=actual_token,
+            )
         # Generate composite token: run_id:hook_id
         from pyworkflow.primitives.resume_hook import create_hook_token

pyworkflow/core/step.py CHANGED Viewed

@@ -23,6 +23,7 @@ from loguru import logger
 from pyworkflow.context import get_context, has_context
 from pyworkflow.core.exceptions import FatalError, RetryableError
 from pyworkflow.core.registry import register_step
+from pyworkflow.core.validation import validate_step_parameters
 from pyworkflow.engine.events import (
     create_step_completed_event,
     create_step_failed_event,
@@ -118,6 +119,8 @@ def step(
                     f"Step {step_name} in transient mode, executing directly",
                     run_id=ctx.run_id,
                 )
+                # Validate parameters before execution
+                validate_step_parameters(func, args, kwargs, step_name)
                 return await _execute_with_retries(
                     func, args, kwargs, step_name, max_retries, retry_delay
                 )
@@ -172,6 +175,8 @@ def step(
             # When running in a distributed runtime (e.g., Celery), dispatch steps
             # to step workers instead of executing inline.
             if ctx.runtime == "celery":
+                # Validate parameters before dispatching to Celery
+                validate_step_parameters(func, args, kwargs, step_name)
                 return await _dispatch_step_to_celery(
                     ctx=ctx,
                     func=func,
@@ -240,6 +245,9 @@ def step(
             # Check for cancellation before executing step
             ctx.check_cancellation()
+            # Validate parameters before execution
+            validate_step_parameters(func, args, kwargs, step_name)
             try:
                 # Execute step function
                 result = await func(*args, **kwargs)

pyworkflow/core/validation.py ADDED Viewed

@@ -0,0 +1,112 @@
+"""
+Pydantic validation for step parameters.
+Validates step function arguments against their type hints using Pydantic's
+TypeAdapter for runtime type checking.
+"""
+import inspect
+from collections.abc import Callable
+from typing import Any, get_type_hints
+from pydantic import TypeAdapter, ValidationError
+from pyworkflow.core.exceptions import FatalError
+class StepValidationError(FatalError):
+    """
+    Raised when step parameter validation fails.
+    This is a FatalError subclass to ensure validation failures
+    immediately fail the workflow without retries.
+    """
+    def __init__(
+        self,
+        step_name: str,
+        param_name: str,
+        expected_type: type,
+        received_value: Any,
+        validation_error: ValidationError,
+    ) -> None:
+        self.step_name = step_name
+        self.param_name = param_name
+        self.expected_type = expected_type
+        self.received_value = received_value
+        self.validation_error = validation_error
+        # Build clear error message
+        error_details = str(validation_error)
+        message = (
+            f"Step '{step_name}' parameter validation failed for '{param_name}': "
+            f"expected {expected_type}, got {type(received_value).__name__} "
+            f"with value {received_value!r}. Details: {error_details}"
+        )
+        super().__init__(message)
+def validate_step_parameters(
+    func: Callable,
+    args: tuple,
+    kwargs: dict,
+    step_name: str,
+) -> None:
+    """
+    Validate step parameters against their type hints using Pydantic.
+    Only parameters with type annotations are validated. Parameters without
+    type hints are skipped.
+    Args:
+        func: The step function (original, unwrapped)
+        args: Positional arguments passed to the step
+        kwargs: Keyword arguments passed to the step
+        step_name: Name of the step for error messages
+    Raises:
+        StepValidationError: If any typed parameter fails validation
+    """
+    # Get function signature and type hints
+    sig = inspect.signature(func)
+    try:
+        # Try to get type hints, may fail for some edge cases
+        type_hints = get_type_hints(func)
+    except Exception:
+        # If we can't get type hints, skip validation
+        return
+    if not type_hints:
+        # No type hints at all, skip validation
+        return
+    # Bind arguments to parameters
+    try:
+        bound = sig.bind(*args, **kwargs)
+        bound.apply_defaults()
+    except TypeError:
+        # If binding fails, the function call itself will fail
+        # Let the normal execution handle this
+        return
+    # Validate each parameter that has a type hint
+    for param_name, param_value in bound.arguments.items():
+        if param_name not in type_hints:
+            # No type hint for this parameter, skip validation
+            continue
+        expected_type = type_hints[param_name]
+        try:
+            # Use Pydantic TypeAdapter for validation
+            adapter = TypeAdapter(expected_type)
+            adapter.validate_python(param_value)
+        except ValidationError as e:
+            raise StepValidationError(
+                step_name=step_name,
+                param_name=param_name,
+                expected_type=expected_type,
+                received_value=param_value,
+                validation_error=e,
+            )

pyworkflow/primitives/resume_hook.py CHANGED Viewed

@@ -185,6 +185,7 @@ async def resume_hook(
         hook_id=hook_id,
         status=HookStatus.RECEIVED,
         payload=serialized_payload,
+        run_id=run_id,
     )
     # Schedule workflow resumption via configured runtime
@@ -195,7 +196,7 @@ async def resume_hook(
     runtime = get_runtime(config.default_runtime)
     try:
-        await runtime.schedule_resume(run_id, storage)
+        await runtime.schedule_resume(run_id, storage, triggered_by_hook_id=hook_id)
     except Exception as e:
         logger.warning(
             f"Failed to schedule workflow resumption: {e}",

pyworkflow/runtime/base.py CHANGED Viewed

@@ -97,6 +97,7 @@ class Runtime(ABC):
         self,
         run_id: str,
         storage: "StorageBackend",
+        triggered_by_hook_id: str | None = None,
     ) -> None:
         """
         Schedule a workflow to be resumed immediately.
@@ -109,6 +110,9 @@ class Runtime(ABC):
         Args:
             run_id: The run_id of the workflow to resume
             storage: Storage backend
+            triggered_by_hook_id: Optional hook ID that triggered this resume.
+                                  Used by distributed runtimes to prevent
+                                  spurious resumes from duplicate calls.
         """
         # Default implementation: no-op
         # Subclasses override if they support async scheduling

pyworkflow/runtime/celery.py CHANGED Viewed

@@ -202,25 +202,36 @@ class CeleryRuntime(Runtime):
         self,
         run_id: str,
         storage: "StorageBackend",
+        triggered_by_hook_id: str | None = None,
     ) -> None:
         """
         Schedule immediate workflow resumption via Celery task.
         This is called by resume_hook() to trigger workflow resumption
         after a hook event is received.
+        Args:
+            run_id: The workflow run ID to resume
+            storage: Storage backend for configuration
+            triggered_by_hook_id: Optional hook ID that triggered this resume.
+                                  Used to prevent spurious resumes from duplicate calls.
         """
         from pyworkflow.celery.tasks import resume_workflow_task
         logger.info(
             f"Scheduling workflow resume via Celery: {run_id}",
             run_id=run_id,
+            triggered_by_hook_id=triggered_by_hook_id,
         )
         storage_config = self._get_storage_config(storage)
         resume_workflow_task.apply_async(
             args=[run_id],
-            kwargs={"storage_config": storage_config},
+            kwargs={
+                "storage_config": storage_config,
+                "triggered_by_hook_id": triggered_by_hook_id,
+            },
         )
         logger.info(

pyworkflow/runtime/local.py CHANGED Viewed

@@ -507,16 +507,24 @@ class LocalRuntime(Runtime):
         self,
         run_id: str,
         storage: "StorageBackend",
+        triggered_by_hook_id: str | None = None,
     ) -> None:
         """
         Schedule immediate workflow resumption.
         For local runtime, this directly calls resume_workflow since
         execution happens in-process.
+        Args:
+            run_id: The workflow run ID to resume
+            storage: Storage backend
+            triggered_by_hook_id: Optional hook ID that triggered this resume.
+                                  Not used in local runtime (no queueing).
         """
         logger.info(
             f"Scheduling immediate workflow resume: {run_id}",
             run_id=run_id,
+            triggered_by_hook_id=triggered_by_hook_id,
         )
         try:

pyworkflow/storage/base.py CHANGED Viewed

@@ -291,12 +291,13 @@ class StorageBackend(ABC):
         pass
     @abstractmethod
-    async def get_hook(self, hook_id: str) -> Hook | None:
+    async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
         """
         Retrieve a hook by ID.
         Args:
             hook_id: Hook identifier
+            run_id: Run ID (required for composite key lookup in SQL backends)
         Returns:
             Hook if found, None otherwise
@@ -322,6 +323,7 @@ class StorageBackend(ABC):
         hook_id: str,
         status: HookStatus,
         payload: str | None = None,
+        run_id: str | None = None,
     ) -> None:
         """
         Update hook status and optionally payload.
@@ -330,6 +332,7 @@ class StorageBackend(ABC):
             hook_id: Hook identifier
             status: New status
             payload: JSON serialized payload (if received)
+            run_id: Run ID (required for composite key lookup in SQL backends)
         """
         pass

pyworkflow/storage/cassandra.py CHANGED Viewed

@@ -1072,29 +1072,31 @@ class CassandraStorageBackend(StorageBackend):
         session.execute(batch)
-    async def get_hook(self, hook_id: str) -> Hook | None:
-        """Retrieve a hook by ID."""
+    async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
+        """Retrieve a hook by ID (run_id allows skipping lookup table)."""
         session = self._ensure_connected()
-        # First lookup run_id
-        lookup = session.execute(
-            SimpleStatement(
-                "SELECT run_id FROM hooks_by_id WHERE hook_id = %s",
-                consistency_level=self.read_consistency,
-            ),
-            (hook_id,),
-        ).one()
+        if not run_id:
+            # First lookup run_id from lookup table
+            lookup = session.execute(
+                SimpleStatement(
+                    "SELECT run_id FROM hooks_by_id WHERE hook_id = %s",
+                    consistency_level=self.read_consistency,
+                ),
+                (hook_id,),
+            ).one()
-        if not lookup:
-            return None
+            if not lookup:
+                return None
+            run_id = lookup.run_id
-        # Then get full hook
+        # Get full hook
         row = session.execute(
             SimpleStatement(
                 "SELECT * FROM hooks WHERE run_id = %s AND hook_id = %s",
                 consistency_level=self.read_consistency,
             ),
-            (lookup.run_id, hook_id),
+            (run_id, hook_id),
         ).one()
         if not row:
@@ -1137,21 +1139,24 @@ class CassandraStorageBackend(StorageBackend):
         hook_id: str,
         status: HookStatus,
         payload: str | None = None,
+        run_id: str | None = None,
     ) -> None:
         """Update hook status and optionally payload."""
         session = self._ensure_connected()
-        # First lookup run_id
-        lookup = session.execute(
-            SimpleStatement(
-                "SELECT run_id FROM hooks_by_id WHERE hook_id = %s",
-                consistency_level=self.read_consistency,
-            ),
-            (hook_id,),
-        ).one()
+        if not run_id:
+            # First lookup run_id from lookup table
+            lookup = session.execute(
+                SimpleStatement(
+                    "SELECT run_id FROM hooks_by_id WHERE hook_id = %s",
+                    consistency_level=self.read_consistency,
+                ),
+                (hook_id,),
+            ).one()
-        if not lookup:
-            return
+            if not lookup:
+                return
+            run_id = lookup.run_id
         received_at = datetime.now(UTC) if status == HookStatus.RECEIVED else None
@@ -1164,7 +1169,7 @@ class CassandraStorageBackend(StorageBackend):
                 """,
                 consistency_level=self.write_consistency,
             ),
-            (status.value, payload, received_at, lookup.run_id, hook_id),
+            (status.value, payload, received_at, run_id, hook_id),
         )
     async def list_hooks(

pyworkflow/storage/dynamodb.py CHANGED Viewed

@@ -722,9 +722,9 @@ class DynamoDBStorageBackend(StorageBackend):
     async def create_hook(self, hook: Hook) -> None:
         """Create a hook record."""
         async with self._get_client() as client:
-            # Main hook item
+            # Main hook item (composite key: run_id + hook_id)
             item = {
-                "PK": f"HOOK#{hook.hook_id}",
+                "PK": f"HOOK#{hook.run_id}#{hook.hook_id}",
                 "SK": "#METADATA",
                 "entity_type": "hook",
                 "hook_id": hook.hook_id,
@@ -741,12 +741,13 @@ class DynamoDBStorageBackend(StorageBackend):
                 "GSI1SK": f"{hook.status.value}#{hook.created_at.isoformat()}",
             }
-            # Token lookup item
+            # Token lookup item (stores run_id and hook_id for lookup)
             token_item = {
                 "PK": f"TOKEN#{hook.token}",
-                "SK": f"HOOK#{hook.hook_id}",
+                "SK": f"HOOK#{hook.run_id}#{hook.hook_id}",
                 "entity_type": "hook_token",
                 "hook_id": hook.hook_id,
+                "run_id": hook.run_id,
             }
             # Write both items
@@ -759,16 +760,26 @@ class DynamoDBStorageBackend(StorageBackend):
                 Item=self._dict_to_item(token_item),
             )
-    async def get_hook(self, hook_id: str) -> Hook | None:
-        """Retrieve a hook by ID."""
+    async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
+        """Retrieve a hook by ID (requires run_id for composite key)."""
         async with self._get_client() as client:
-            response = await client.get_item(
-                TableName=self.table_name,
-                Key={
-                    "PK": {"S": f"HOOK#{hook_id}"},
-                    "SK": {"S": "#METADATA"},
-                },
-            )
+            if run_id:
+                response = await client.get_item(
+                    TableName=self.table_name,
+                    Key={
+                        "PK": {"S": f"HOOK#{run_id}#{hook_id}"},
+                        "SK": {"S": "#METADATA"},
+                    },
+                )
+            else:
+                # Fallback: try old format without run_id
+                response = await client.get_item(
+                    TableName=self.table_name,
+                    Key={
+                        "PK": {"S": f"HOOK#{hook_id}"},
+                        "SK": {"S": "#METADATA"},
+                    },
+                )
             item = response.get("Item")
             if not item:
@@ -779,7 +790,7 @@ class DynamoDBStorageBackend(StorageBackend):
     async def get_hook_by_token(self, token: str) -> Hook | None:
         """Retrieve a hook by its token."""
         async with self._get_client() as client:
-            # First get the hook_id from the token lookup item
+            # First get the hook_id and run_id from the token lookup item
             response = await client.query(
                 TableName=self.table_name,
                 KeyConditionExpression="PK = :pk",
@@ -792,13 +803,16 @@ class DynamoDBStorageBackend(StorageBackend):
                 return None
             hook_id = self._deserialize_value(items[0]["hook_id"])
-            return await self.get_hook(hook_id)
+            run_id_attr = items[0].get("run_id")
+            run_id = self._deserialize_value(run_id_attr) if run_id_attr else None
+            return await self.get_hook(hook_id, run_id)
     async def update_hook_status(
         self,
         hook_id: str,
         status: HookStatus,
         payload: str | None = None,
+        run_id: str | None = None,
     ) -> None:
         """Update hook status and optionally payload."""
         async with self._get_client() as client:
@@ -814,10 +828,12 @@ class DynamoDBStorageBackend(StorageBackend):
                 update_expr += ", received_at = :received_at"
                 expr_values[":received_at"] = {"S": datetime.now(UTC).isoformat()}
+            pk = f"HOOK#{run_id}#{hook_id}" if run_id else f"HOOK#{hook_id}"
             await client.update_item(
                 TableName=self.table_name,
                 Key={
-                    "PK": {"S": f"HOOK#{hook_id}"},
+                    "PK": {"S": pk},
                     "SK": {"S": "#METADATA"},
                 },
                 UpdateExpression=update_expr,

pyworkflow/storage/file.py CHANGED Viewed

@@ -464,7 +464,8 @@ class FileStorageBackend(StorageBackend):
     async def create_hook(self, hook: Hook) -> None:
         """Create a hook record."""
-        hook_file = self.hooks_dir / f"{hook.hook_id}.json"
+        # Use composite filename: run_id__hook_id.json (double underscore separator)
+        hook_file = self.hooks_dir / f"{hook.run_id}__{hook.hook_id}.json"
         lock_file = self.locks_dir / "token_index.lock"
         lock = FileLock(str(lock_file))
@@ -473,16 +474,25 @@ class FileStorageBackend(StorageBackend):
         def _write() -> None:
             with lock:
                 hook_file.write_text(json.dumps(data, indent=2))
-                # Update token index
+                # Update token index (stores run_id:hook_id as value)
                 index = self._load_token_index()
-                index[hook.token] = hook.hook_id
+                index[hook.token] = f"{hook.run_id}:{hook.hook_id}"
                 self._save_token_index(index)
         await asyncio.to_thread(_write)
-    async def get_hook(self, hook_id: str) -> Hook | None:
-        """Retrieve a hook by ID."""
-        hook_file = self.hooks_dir / f"{hook_id}.json"
+    async def get_hook(self, hook_id: str, run_id: str | None = None) -> Hook | None:
+        """Retrieve a hook by ID (requires run_id for composite filename)."""
+        if run_id:
+            hook_file = self.hooks_dir / f"{run_id}__{hook_id}.json"
+        else:
+            # Fallback: try old format for backwards compat
+            hook_file = self.hooks_dir / f"{hook_id}.json"
+            if not hook_file.exists():
+                # Search for any file with this hook_id
+                for f in self.hooks_dir.glob(f"*__{hook_id}.json"):
+                    hook_file = f
+                    break
         if not hook_file.exists():
             return None
@@ -496,13 +506,18 @@ class FileStorageBackend(StorageBackend):
     async def get_hook_by_token(self, token: str) -> Hook | None:
         """Retrieve a hook by its token."""
-        def _lookup() -> str | None:
+        def _lookup() -> tuple[str, str] | None:
             index = self._load_token_index()
-            return index.get(token)
+            value = index.get(token)
+            if value and ":" in value:
+                parts = value.split(":", 1)
+                return (parts[0], parts[1])
+            return None
-        hook_id = await asyncio.to_thread(_lookup)
-        if hook_id:
-            return await self.get_hook(hook_id)
+        result = await asyncio.to_thread(_lookup)
+        if result:
+            run_id, hook_id = result
+            return await self.get_hook(hook_id, run_id)
         return None
     async def update_hook_status(
@@ -510,14 +525,25 @@ class FileStorageBackend(StorageBackend):
         hook_id: str,
         status: HookStatus,
         payload: str | None = None,
+        run_id: str | None = None,
     ) -> None:
         """Update hook status and optionally payload."""
-        hook_file = self.hooks_dir / f"{hook_id}.json"
+        if run_id:
+            hook_file = self.hooks_dir / f"{run_id}__{hook_id}.json"
+        else:
+            # Fallback: try old format
+            hook_file = self.hooks_dir / f"{hook_id}.json"
+            if not hook_file.exists():
+                # Search for any file with this hook_id
+                for f in self.hooks_dir.glob(f"*__{hook_id}.json"):
+                    hook_file = f
+                    break
         if not hook_file.exists():
             raise ValueError(f"Hook {hook_id} not found")
-        lock_file = self.locks_dir / f"hook_{hook_id}.lock"
+        safe_hook_id = hook_id.replace("/", "_").replace(":", "_")
+        lock_file = self.locks_dir / f"hook_{safe_hook_id}.lock"
         lock = FileLock(str(lock_file))
         def _update() -> None:

pyworkflow-engine 0.1.12__py3-none-any.whl → 0.1.14__py3-none-any.whl

pyworkflow-engine 0.1.12py3-none-any.whl → 0.1.14py3-none-any.whl