PyPI - puda-comms - Versions diffs - 0.0.5__tar.gz → 0.0.6__tar.gz - Mend

puda-comms 0.0.5tar.gz → 0.0.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{puda_comms-0.0.5 → puda_comms-0.0.6}/PKG-INFO RENAMED Viewed

@@ -1,11 +1,10 @@
 Metadata-Version: 2.3
 Name: puda-comms
-Version: 0.0.5
+Version: 0.0.6
 Summary: Communication library for the PUDA platform.
 Author: zhao
 Author-email: zhao <20024592+agentzhao@users.noreply.github.com>
 Requires-Dist: nats-py>=2.12.0
-Requires-Dist: puda-drivers
 Requires-Dist: pydantic>=2.12.5
 Requires-Python: >=3.14
 Description-Content-Type: text/markdown
@@ -73,6 +72,7 @@ Represents a command to be sent to a machine.
 **Fields:**
 - `name` (str): The command name to execute
+- `machine_id` (str): Machine ID to send the command to (required)
 - `params` (Dict[str, Any]): Command parameters (default: empty dict)
 - `step_number` (int): Execution step number for tracking progress
 - `version` (str): Command version (default: "1.0")
@@ -81,7 +81,8 @@ Represents a command to be sent to a machine.
 ```python
 command = CommandRequest(
     name="attach_tip",
-    params={"slot": "A3", "well": "G8"},
+    machine_id="first",
+    params={"deck_slot": "A3", "well_name": "G8"},
     step_number=2,
     version="1.0"
 )
@@ -109,7 +110,7 @@ response = CommandResponse(
 error_response = CommandResponse(
     status=CommandResponseStatus.ERROR,
     code="EXECUTION_ERROR",
-    message="Failed to attach tip: slot A3 not found",
+    message="Failed to attach tip: deck_slot A3 not found",
     completed_at="2026-01-20T02:00:46Z"
 )
 ```
@@ -166,8 +167,8 @@ Complete NATS message structure combining header with optional command or respon
   "command": {
     "name": "attach_tip",
     "params": {
-      "slot": "A3",
-      "well": "G8"
+      "deck_slot": "A3",
+      "well_name": "G8"
     },
     "step_number": 2,
     "version": "1.0"
@@ -230,10 +231,9 @@ Queue commands are regular commands that are executed in sequence. Use `send_que
 Both `send_queue_command()`, `send_queue_commands()`, and `send_immediate_command()` accept an optional `timeout` parameter (default: 120 seconds):
 ```python
-# Single command
+# Single command (machine_id must be in CommandRequest)
 reply = await service.send_queue_command(
-    request=request,
-    machine_id="first",
+    request=request,  # request.machine_id must be set
     run_id=run_id,
     user_id="user123",
     username="John Doe",
@@ -241,9 +241,9 @@ reply = await service.send_queue_command(
 )
 # Multiple commands (timeout applies to each command)
+# Each command in the list must have machine_id set
 reply = await service.send_queue_commands(
-    requests=commands,
-    machine_id="first",
+    requests=commands,  # Each CommandRequest must have machine_id
     run_id=run_id,
     user_id="user123",
     username="John Doe",
@@ -282,8 +282,7 @@ Always check the response status and handle errors appropriately:
 ```python
 reply: NATSMessage = await service.send_queue_command(
-    request=request,
-    machine_id="first",
+    request=request,  # request.machine_id must be set
     run_id=run_id,
     user_id="user123",
     username="John Doe"

{puda_comms-0.0.5 → puda_comms-0.0.6}/README.md RENAMED Viewed

@@ -61,6 +61,7 @@ Represents a command to be sent to a machine.
 **Fields:**
 - `name` (str): The command name to execute
+- `machine_id` (str): Machine ID to send the command to (required)
 - `params` (Dict[str, Any]): Command parameters (default: empty dict)
 - `step_number` (int): Execution step number for tracking progress
 - `version` (str): Command version (default: "1.0")
@@ -69,7 +70,8 @@ Represents a command to be sent to a machine.
 ```python
 command = CommandRequest(
     name="attach_tip",
-    params={"slot": "A3", "well": "G8"},
+    machine_id="first",
+    params={"deck_slot": "A3", "well_name": "G8"},
     step_number=2,
     version="1.0"
 )
@@ -97,7 +99,7 @@ response = CommandResponse(
 error_response = CommandResponse(
     status=CommandResponseStatus.ERROR,
     code="EXECUTION_ERROR",
-    message="Failed to attach tip: slot A3 not found",
+    message="Failed to attach tip: deck_slot A3 not found",
     completed_at="2026-01-20T02:00:46Z"
 )
 ```
@@ -154,8 +156,8 @@ Complete NATS message structure combining header with optional command or respon
   "command": {
     "name": "attach_tip",
     "params": {
-      "slot": "A3",
-      "well": "G8"
+      "deck_slot": "A3",
+      "well_name": "G8"
     },
     "step_number": 2,
     "version": "1.0"
@@ -218,10 +220,9 @@ Queue commands are regular commands that are executed in sequence. Use `send_que
 Both `send_queue_command()`, `send_queue_commands()`, and `send_immediate_command()` accept an optional `timeout` parameter (default: 120 seconds):
 ```python
-# Single command
+# Single command (machine_id must be in CommandRequest)
 reply = await service.send_queue_command(
-    request=request,
-    machine_id="first",
+    request=request,  # request.machine_id must be set
     run_id=run_id,
     user_id="user123",
     username="John Doe",
@@ -229,9 +230,9 @@ reply = await service.send_queue_command(
 )
 # Multiple commands (timeout applies to each command)
+# Each command in the list must have machine_id set
 reply = await service.send_queue_commands(
-    requests=commands,
-    machine_id="first",
+    requests=commands,  # Each CommandRequest must have machine_id
     run_id=run_id,
     user_id="user123",
     username="John Doe",
@@ -270,8 +271,7 @@ Always check the response status and handle errors appropriately:
 ```python
 reply: NATSMessage = await service.send_queue_command(
-    request=request,
-    machine_id="first",
+    request=request,  # request.machine_id must be set
     run_id=run_id,
     user_id="user123",
     username="John Doe"

{puda_comms-0.0.5 → puda_comms-0.0.6}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "puda-comms"
-version = "0.0.5"
+version = "0.0.6"
 description = "Communication library for the PUDA platform."
 readme = "README.md"
 authors = [
@@ -9,14 +9,9 @@ authors = [
 requires-python = ">=3.14"
 dependencies = [
     "nats-py>=2.12.0",
-    # "puda-drivers>=0.0.16",
-    "puda-drivers",
     "pydantic>=2.12.5",
 ]
-[tool.uv.sources]
-puda-drivers = {workspace = true}
 [tool.ruff]
 line-length = 100

puda_comms-0.0.6/src/puda_comms/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+# Import models first to ensure they're initialized before other modules that depend on them
+from . import models
+from .machine_client import MachineClient
+from .execution_state import ExecutionState
+from .command_service import CommandService
+from .stream_subscriber import StreamSubscriber
+__all__ = ["MachineClient", "ExecutionState", "CommandService", "StreamSubscriber", "models"]

{puda_comms-0.0.5 → puda_comms-0.0.6}/src/puda_comms/command_service.py RENAMED Viewed

@@ -16,7 +16,7 @@ from typing import Dict, Any, Optional
 import nats
 from nats.js.client import JetStreamContext
 from nats.aio.msg import Msg
-from puda_comms.models import (
+from .models import (
     CommandRequest,
     CommandResponseStatus,
     NATSMessage,
@@ -104,7 +104,7 @@ class ResponseHandler:
                     command, step_number, run_id, message.response.status
                 )
                 if message.response.status == CommandResponseStatus.ERROR:
-                    logger.warning("Command failed: %s", message.response.message)
+                    logger.error("Error Code: %s, Message: %s", message.response.code.name, message.response.message)
                 # Get the pending response
                 pending = self._pending_responses[key]
@@ -347,7 +347,6 @@ class CommandService:
         self,
         *,
         request: CommandRequest,
-        machine_id: str,
         run_id: str,
         user_id: str,
         username: str,
@@ -357,8 +356,7 @@ class CommandService:
         Send a queue command to the machine and wait for response.
         Args:
-            request: CommandRequest model containing command details
-            machine_id: Machine ID to send the command to
+            request: CommandRequest model containing command details (must include machine_id)
             run_id: Run ID for the command
             user_id: User ID who initiated the command
             username: Username who initiated the command
@@ -370,8 +368,8 @@ class CommandService:
         if not self._connected or not self.js:
             raise RuntimeError("Not connected to NATS. Call connect() first.")
-        # Determine subject
-        subject = f"{NAMESPACE}.{machine_id}.cmd.queue"
+        # Determine subject using machine_id from request
+        subject = f"{NAMESPACE}.{request.machine_id}.cmd.queue"
         logger.info(
             "Sending queue command: subject=%s, command=%s, run_id=%s, step_number=%s",
@@ -379,12 +377,12 @@ class CommandService:
         )
         # Get or create response handler for this machine
-        response_handler = await self._get_response_handler(machine_id)
+        response_handler = await self._get_response_handler(request.machine_id)
         # Register pending response
         response_event = response_handler.register_pending(run_id, request.step_number)
         # Build payload
-        payload = self._build_command_payload(request, machine_id, run_id, user_id, username)
+        payload = self._build_command_payload(request, request.machine_id, run_id, user_id, username)
         try:
             # Publish to JetStream
@@ -437,12 +435,12 @@ class CommandService:
         """
         request = CommandRequest(
             name="start",
+            machine_id=machine_id,
             params={},
             step_number=0
         )
         return await self.send_immediate_command(
             request=request,
-            machine_id=machine_id,
             run_id=run_id,
             user_id=user_id,
             username=username,
@@ -472,12 +470,12 @@ class CommandService:
         """
         request = CommandRequest(
             name="complete",
+            machine_id=machine_id,
             params={},
             step_number=0
         )
         return await self.send_immediate_command(
             request=request,
-            machine_id=machine_id,
             run_id=run_id,
             user_id=user_id,
             username=username,
@@ -488,7 +486,6 @@ class CommandService:
         self,
         *,
         requests: list[CommandRequest],
-        machine_id: str,
         run_id: str,
         user_id: str,
         username: str,
@@ -497,14 +494,18 @@ class CommandService:
         """
         Send multiple queue commands sequentially and wait for responses.
-        Automatically sends START command before the sequence and COMPLETE command after
-        successful completion. Sends commands one by one, waiting for each response before
-        sending the next. If any command fails or times out, stops immediately and returns
-        the error response. If all commands succeed, returns the last command's response.
+        Automatically sends START commands to all unique machine_ids before the sequence
+        and COMPLETE commands to all unique machine_ids after successful completion.
+        Sends commands one by one, waiting for each response before sending the next.
+        If any command fails or times out, stops immediately, completes all started runs,
+        and returns the error response. If all commands succeed, returns the last command's response.
+        Each command must specify its own machine_id. Commands with different machine_ids
+        will be sent to their respective machines. All machines involved will receive
+        START commands at the beginning and COMPLETE commands at the end.
         Args:
-            requests: List of CommandRequest models to send sequentially
-            machine_id: Machine ID to send the commands to
+            requests: List of CommandRequest models to send sequentially (each must include machine_id)
             run_id: Run ID for all commands
             user_id: User ID who initiated the commands
             username: Username who initiated the commands
@@ -521,28 +522,42 @@ class CommandService:
             logger.warning("No commands to send")
             return None
+        # Collect all unique machine_ids from requests
+        machine_ids = set()
+        for req in requests:
+            if isinstance(req, dict):
+                req = CommandRequest.model_validate(req)
+            elif not isinstance(req, CommandRequest):
+                raise ValueError(f"Request must be a CommandRequest or dict, got {type(req)}")
+            machine_ids.add(req.machine_id)
+        machine_ids_list = sorted(list(machine_ids))  # Sort for consistent logging
         logger.info(
-            "Sending %d queue commands sequentially: machine_id=%s, run_id=%s",
+            "Sending %d queue commands sequentially to machines: %s, run_id=%s",
             len(requests),
-            machine_id,
+            machine_ids_list,
             run_id
         )
-        # Always send START command before sequence
-        logger.info("Sending START command before sequence")
-        start_response = await self.start_run(
-            machine_id=machine_id,
-            run_id=run_id,
-            user_id=user_id,
-            username=username,
-            timeout=timeout
-        )
-        if start_response is None:
-            logger.error("START command timed out")
-            return None
-        if start_response.response and start_response.response.status == CommandResponseStatus.ERROR:
-            logger.error("START command failed: %s", start_response.response.message)
-            return start_response
+        # Send START commands to all unique machine_ids before sequence
+        logger.info("Sending START commands to all machines: %s", machine_ids_list)
+        started_machines = set()
+        for machine_id in machine_ids_list:
+            start_response = await self.start_run(
+                machine_id=machine_id,
+                run_id=run_id,
+                user_id=user_id,
+                username=username,
+                timeout=timeout
+            )
+            if start_response is None:
+                logger.error("START command timed out for machine: %s, aborting", machine_id)
+                return None
+            if start_response.response and start_response.response.status == CommandResponseStatus.ERROR:
+                logger.error("START command failed for machine %s: %s, aborting", machine_id, start_response.response.message)
+                return start_response
+            started_machines.add(machine_id)
         last_response: Optional[NATSMessage] = None
@@ -555,16 +570,16 @@ class CommandService:
                     raise ValueError(f"Request {idx} must be a CommandRequest or dict, got {type(request)}")
                 logger.info(
-                    "Sending command %d/%d: %s (step %s)",
+                    "Sending command %d/%d: %s (step %s) to machine %s",
                     idx,
                     len(requests),
                     request.name,
-                    request.step_number
+                    request.step_number,
+                    request.machine_id
                 )
                 response = await self.send_queue_command(
                     request=request,
-                    machine_id=machine_id,
                     run_id=run_id,
                     user_id=user_id,
                     username=username,
@@ -591,9 +606,22 @@ class CommandService:
                             len(requests),
                             request.name,
                             request.step_number,
-                            response.response.code,
+                            response.response.code.name,
                             response.response.message
                         )
+                        # Complete the run on all machines that were started
+                        logger.info("Completing runs on all machines due to error")
+                        for machine_id_to_complete in started_machines:
+                            try:
+                                await self.complete_run(
+                                    machine_id=machine_id_to_complete,
+                                    run_id=run_id,
+                                    user_id=user_id,
+                                    username=username,
+                                    timeout=timeout
+                                )
+                            except Exception as e:
+                                logger.error("Failed to complete run for machine %s during error cleanup: %s", machine_id_to_complete, e)
                         return response
                     # Command succeeded, store as last response
@@ -614,6 +642,19 @@ class CommandService:
                         request.name,
                         request.step_number
                     )
+                    # Complete the run on all machines that were started
+                    logger.info("Completing runs on all machines due to error")
+                    for machine_id_to_complete in started_machines:
+                        try:
+                            await self.complete_run(
+                                machine_id=machine_id_to_complete,
+                                run_id=run_id,
+                                user_id=user_id,
+                                username=username,
+                                timeout=timeout
+                            )
+                        except Exception as e:
+                            logger.error("Failed to complete run for machine %s during error cleanup: %s", machine_id_to_complete, e)
                     return response
             logger.info(
@@ -621,44 +662,46 @@ class CommandService:
                 len(requests)
             )
-            # Always send COMPLETE command after successful sequence
-            logger.info("Sending COMPLETE command after successful sequence")
-            complete_response = await self.complete_run(
-                machine_id=machine_id,
-                run_id=run_id,
-                user_id=user_id,
-                username=username,
-                timeout=timeout
-            )
-            if complete_response is None:
-                logger.error("COMPLETE command timed out")
-                return None
-            if complete_response.response and complete_response.response.status == CommandResponseStatus.ERROR:
-                logger.error("COMPLETE command failed: %s", complete_response.response.message)
-                return complete_response
-            # Return the last command response, not the COMPLETE response
-            return last_response
-        except Exception as e:
-            # If any error occurs during command execution, try to complete the run
-            # to clean up state (but don't fail if this also fails)
-            logger.warning("Error during command sequence, attempting to complete run: %s", e)
-            try:
-                await self.complete_run(
-                    machine_id=machine_id,
+            # Always send COMPLETE commands to all machines after successful sequence
+            logger.info("Sending COMPLETE commands to all machines: %s", machine_ids_list)
+            for machine_id_to_complete in machine_ids_list:
+                complete_response = await self.complete_run(
+                    machine_id=machine_id_to_complete,
                     run_id=run_id,
                     user_id=user_id,
                     username=username,
                     timeout=timeout
                 )
-            except Exception as cleanup_error:
-                logger.error("Failed to complete run during error cleanup: %s", cleanup_error)
+                if complete_response is None:
+                    logger.error("COMPLETE command timed out for machine: %s, aborting", machine_id_to_complete)
+                    return None
+                if complete_response.response and complete_response.response.status == CommandResponseStatus.ERROR:
+                    logger.error("COMPLETE command failed for machine %s: %s, aborting", machine_id_to_complete, complete_response.response.message)
+                    return complete_response
+            # Return the last command response, not the COMPLETE response
+            return last_response
+        except Exception as e:
+            # If any error occurs during command execution, try to complete the run
+            # on all machines that were started to clean up state
+            logger.warning("Error during command sequence, attempting to complete runs on all machines: %s", e)
+            for machine_id_to_complete in started_machines:
+                try:
+                    await self.complete_run(
+                        machine_id=machine_id_to_complete,
+                        run_id=run_id,
+                        user_id=user_id,
+                        username=username,
+                        timeout=timeout
+                    )
+                except Exception as cleanup_error:
+                    logger.error("Failed to complete run for machine %s during error cleanup: %s", machine_id_to_complete, cleanup_error)
             raise
     async def send_immediate_command(
         self,
         *,
         request: CommandRequest,
-        machine_id: str,
         run_id: str,
         user_id: str,
         username: str,
@@ -668,8 +711,7 @@ class CommandService:
         Send an immediate command (pause, resume, cancel) to the machine.
         Args:
-            request: CommandRequest model containing command details
-            machine_id: Machine ID to send the command to
+            request: CommandRequest model containing command details (must include machine_id)
             run_id: Run ID for the command
             user_id: User ID who initiated the command
             username: Username who initiated the command
@@ -681,23 +723,22 @@ class CommandService:
         if not self._connected or not self.js:
             raise RuntimeError("Not connected to NATS. Call connect() first.")
-        # Determine subject
-        subject = f"{NAMESPACE}.{machine_id}.cmd.immediate"
+        # Determine subject using machine_id from request
+        subject = f"{NAMESPACE}.{request.machine_id}.cmd.immediate"
         logger.info(
             "Sending immediate command: machine_id=%s, command=%s, run_id=%s, step_number=%s",
-            machine_id, request.name, run_id, request.step_number
+            request.machine_id, request.name, run_id, request.step_number
         )
         # Get or create response handler for this machine
-        response_handler = await self._get_response_handler(machine_id)
+        response_handler = await self._get_response_handler(request.machine_id)
         # Register pending response
         response_received = response_handler.register_pending(run_id, request.step_number)
         # Build payload
-        payload = self._build_command_payload(request, machine_id, run_id, user_id, username)
+        payload = self._build_command_payload(request, request.machine_id, run_id, user_id, username)
         try:
             # Publish to JetStream

{puda_comms-0.0.5 → puda_comms-0.0.6}/src/puda_comms/machine_client.py RENAMED Viewed

@@ -10,7 +10,7 @@ import logging
 from typing import Dict, Any, Optional, Callable, Awaitable
 from datetime import datetime, timezone
 import nats
-from puda_comms.models import (
+from .models import (
     CommandResponseStatus,
     CommandResponse,
     CommandResponseCode,
@@ -19,10 +19,10 @@ from puda_comms.models import (
     MessageType,
     ImmediateCommand,
 )
-from puda_comms.run_manager import RunManager
+from .run_manager import RunManager
 from nats.js.client import JetStreamContext
 from nats.js.api import StreamConfig, ConsumerConfig
-from nats.js.errors import NotFoundError
+from nats.js.errors import NotFoundError, Error as NATSError
 from nats.aio.msg import Msg
 logger = logging.getLogger(__name__)
@@ -491,6 +491,21 @@ class MachineClient:
                 )
                 return
+            # If active run_id is None, return error response
+            if self.run_manager.get_active_run_id() is None:
+                await msg.ack()
+                await self._publish_command_response(
+                    msg=msg,
+                    response=CommandResponse(
+                        status=CommandResponseStatus.ERROR,
+                        code=CommandResponseCode.RUN_ID_MISMATCH,
+                        message='Send START command to start a run before sending commands'
+                    ),
+                    subject=self.response_queue
+                )
+                return
+            # If run_id does not match active run_id, return error response
             if not await self.run_manager.validate_run_id(run_id):
                 await msg.ack()
                 await self._publish_command_response(
@@ -512,7 +527,9 @@ class MachineClient:
             # Finalize message state based on response
             if response.status == CommandResponseStatus.SUCCESS:
                 await msg.ack()
-            else:
+            elif response.status == CommandResponseStatus.ERROR:
+                # just complete the run if the command failed
+                await self.run_manager.complete_run(run_id)
                 await msg.term()
             await self._publish_command_response(
@@ -526,6 +543,7 @@ class MachineClient:
             # Handler was cancelled (e.g., via task cancellation)
             logger.info("Handler execution cancelled: run_id=%s, step_number=%s, command=%s", run_id, step_number, command)
             await msg.ack()
+            await self.run_manager.complete_run(run_id)
             await self._publish_command_response(
                 msg=msg,
                 response=CommandResponse(
@@ -540,6 +558,7 @@ class MachineClient:
         except json.JSONDecodeError as e:
             logger.error("JSON Decode Error. Terminating message.")
             await msg.term()
+            await self.run_manager.complete_run(run_id)
             await self._publish_command_response(
                 msg=msg,
                 response=CommandResponse(
@@ -557,6 +576,7 @@ class MachineClient:
             # Terminate all errors to prevent infinite redelivery loops
             logger.error("Handler failed (terminating message): %s", e)
             await msg.term()
+            await self.run_manager.complete_run(run_id)
             await self._publish_command_response(
                 msg=msg,
                 response=CommandResponse(
@@ -593,7 +613,7 @@ class MachineClient:
                             response = CommandResponse(
                                 status=CommandResponseStatus.ERROR,
                                 code=CommandResponseCode.RUN_ID_MISMATCH,
-                                message='cannot start, another run is currently running'
+                                message=f'cannot start, {self.run_manager.get_active_run_id()} is currently running'
                             )
                         else:
                             await self.publish_state({'state': 'active', 'run_id': run_id})
@@ -862,21 +882,76 @@ class MachineClient:
             retention='workqueue'
         )
+        durable_name = f"cmd_immed_{self.machine_id}"
+        # Try to unsubscribe from existing subscription if it exists
+        if self._cmd_immediate_sub:
+            try:
+                await self._cmd_immediate_sub.unsubscribe()
+                logger.info("Unsubscribed from existing immediate command subscription")
+            except Exception as e:
+                logger.debug("Error unsubscribing from existing subscription: %s", e)
+            self._cmd_immediate_sub = None
+        # Try to delete existing consumer if it's bound (from previous run)
+        try:
+            await self.js.delete_consumer(self.STREAM_COMMAND_IMMEDIATE, durable_name)
+            logger.info("Deleted existing immediate consumer: %s", durable_name)
+        except NotFoundError:
+            # Consumer doesn't exist, which is fine
+            logger.debug("Consumer %s does not exist, will be created", durable_name)
+        except Exception as e:
+            error_msg = str(e).lower()
+            if "bound" in error_msg or "in use" in error_msg:
+                # Consumer is bound but we can't delete it - try to unsubscribe first
+                logger.warning("Consumer %s is bound to a subscription. Attempting to force delete...", durable_name)
+                # Wait a moment for any pending operations to complete
+                await asyncio.sleep(0.5)
+                try:
+                    await self.js.delete_consumer(self.STREAM_COMMAND_IMMEDIATE, durable_name)
+                    logger.info("Successfully deleted bound consumer: %s", durable_name)
+                except Exception as delete_error:
+                    logger.warning("Could not delete bound consumer %s: %s. Will attempt to subscribe anyway.",
+                                 durable_name, delete_error)
+            else:
+                logger.warning("Error checking/deleting consumer %s: %s", durable_name, e)
         try:
             self._cmd_immediate_sub = await self.js.subscribe(
                 subject=self.cmd_immediate,
                 stream=self.STREAM_COMMAND_IMMEDIATE,
-                durable=f"cmd_immed_{self.machine_id}",
+                durable=durable_name,
                 cb=message_handler  # required for push consumer to handle messages
             )
+        except NATSError as e:
+            error_msg = str(e).lower()
+            if "bound" in error_msg or "already bound" in error_msg:
+                # Consumer is still bound - try to delete it and retry
+                logger.warning("Consumer %s is still bound. Attempting to delete and retry...", durable_name)
+                try:
+                    await self.js.delete_consumer(self.STREAM_COMMAND_IMMEDIATE, durable_name)
+                    await asyncio.sleep(0.5)  # Brief wait for cleanup
+                    # Retry subscription
+                    self._cmd_immediate_sub = await self.js.subscribe(
+                        subject=self.cmd_immediate,
+                        stream=self.STREAM_COMMAND_IMMEDIATE,
+                        durable=durable_name,
+                        cb=message_handler
+                    )
+                    logger.info("Successfully subscribed after deleting bound consumer")
+                except Exception as retry_error:
+                    logger.error("Failed to subscribe after deleting bound consumer: %s", retry_error)
+                    raise
+            else:
+                raise
         except NotFoundError:
             # Stream still not found after ensuring it exists - this shouldn't happen
             # but handle it gracefully
-            logger.error("Stream %s not found even after creation attempt. Check NATS server configuration.",
+            logger.error("Stream %s not found even after creation attempt. Check NATS server configuration.",
                        self.STREAM_COMMAND_IMMEDIATE)
             raise
-        logger.info("Subscribed to immediate commands: %s (durable: cmd_immed_%s, stream: %s)",
+        logger.info("Subscribed to immediate commands: %s (durable: cmd_immed_%s, stream: %s)",
                    self.cmd_immediate, self.machine_id, self.STREAM_COMMAND_IMMEDIATE)

{puda_comms-0.0.5 → puda_comms-0.0.6}/src/puda_comms/models.py RENAMED Viewed

@@ -57,8 +57,10 @@ class CommandRequest(BaseModel):
     """Command request data for NATS messages."""
     name: str = Field(description="The command name (string) to send to the machine.")
     params: Dict[str, Any] = Field(default_factory=dict, description="The parameters to send to the machine.")
+    kwargs: Dict[str, Any] = Field(default_factory=dict, description="Additional keyword arguments (e.g., channels in Biologic).")
     step_number: int = Field(description="Execution step number (integer). Used to track the progress of a command.")
     version: str = Field(default="1.0", description="Command version.")
+    machine_id: str = Field(description="Machine ID to send the command to.")
 class CommandResponse(BaseModel):
@@ -67,7 +69,7 @@ class CommandResponse(BaseModel):
     completed_at: str = Field(default_factory=_get_current_timestamp, description="ISO format timestamp (auto-set on creation)")
     code: Optional[CommandResponseCode] = Field(default=None, description="Error code")
     message: Optional[str] = Field(default=None, description="Error message (human-readable description)")
-    data: Optional[Dict[str, Any]] = Field(default=None, description="Optional output data from the command handler")
+    data: Optional[Dict[Any, Any]] = Field(default=None, description="Optional output data from the command handler")
 class MessageHeader(BaseModel):
     """Header for NATS messages."""
@@ -78,6 +80,7 @@ class MessageHeader(BaseModel):
     machine_id: str = Field(description="Machine ID")
     run_id: Optional[str] = Field(default=None, description="Unique identifier (uuid) for the run/workflow")
     timestamp: str = Field(default_factory=_get_current_timestamp, description="ISO format timestamp (auto-set on creation)")
 class NATSMessage(BaseModel):
     """
     Complete NATS message structure.

puda_comms-0.0.6/src/puda_comms/stream_subscriber.py ADDED Viewed

@@ -0,0 +1,388 @@
+"""
+Reusable NATS JetStream subscriber for services that need to consume messages.
+Provides a base class for subscribing to NATS streams with durable consumers,
+automatic reconnection, and message handling callbacks.
+This implements a push consumer pattern where NATS JetStream automatically
+delivers messages to registered callbacks as they arrive, rather than requiring
+the client to explicitly fetch/pull messages.
+"""
+import asyncio
+import logging
+from typing import Optional, Callable, Awaitable, List, Any
+from abc import abstractmethod
+import nats
+from nats.js.client import JetStreamContext
+from nats.aio.msg import Msg
+logger = logging.getLogger(__name__)
+class StreamSubscriber:
+    """
+    Base class for subscribing to NATS JetStream streams using push consumer pattern.
+    This class implements a push consumer where NATS JetStream automatically delivers
+    messages to registered callbacks as they arrive. The server pushes messages to
+    the client rather than requiring the client to pull/fetch them.
+    Handles connection management, durable subscriptions, and message routing.
+    Services can extend this class and implement message handling logic.
+    Example:
+        ```python
+        class MyService(StreamSubscriber):
+            async def handle_message(self, msg: Msg, stream: str, subject: str):
+                # Process message
+                data = json.loads(msg.data.decode())
+                # ... your logic ...
+                await msg.ack()
+        service = MyService(servers=["nats://localhost:4222"])
+        await service.subscribe("STREAM_NAME", "puda.*.cmd.response.queue", "my_consumer")
+        await service.run()
+        ```
+    """
+    def __init__(
+        self,
+        servers: List[str],
+        connect_timeout: int = 10,
+        reconnect_time_wait: int = 2,
+        max_reconnect_attempts: int = -1
+    ):
+        """
+        Initialize the stream subscriber.
+        Args:
+            servers: List of NATS server URLs (e.g., ["nats://localhost:4222"])
+            connect_timeout: Timeout for initial connection in seconds
+            reconnect_time_wait: Wait time between reconnection attempts in seconds
+            max_reconnect_attempts: Maximum reconnection attempts (-1 for unlimited)
+        """
+        if not servers:
+            raise ValueError("servers must be a non-empty list")
+        self.servers = servers
+        self.connect_timeout = connect_timeout
+        self.reconnect_time_wait = reconnect_time_wait
+        self.max_reconnect_attempts = max_reconnect_attempts
+        self.nc: Optional[nats.NATS] = None
+        self.js: Optional[JetStreamContext] = None
+        self._subscriptions: List[Any] = []
+        self._is_connected = False
+        self._should_run = True
+    async def connect(self) -> bool:
+        """
+        Connect to NATS servers.
+        Returns:
+            True if connected successfully, False otherwise
+        """
+        if self._is_connected:
+            return True
+        try:
+            self.nc = await nats.connect(
+                servers=self.servers,
+                connect_timeout=self.connect_timeout,
+                reconnect_time_wait=self.reconnect_time_wait,
+                max_reconnect_attempts=self.max_reconnect_attempts,
+                error_cb=self._error_callback,
+                disconnected_cb=self._disconnected_callback,
+                reconnected_cb=self._reconnected_callback,
+                closed_cb=self._closed_callback
+            )
+            self.js = self.nc.jetstream()
+            self._is_connected = True
+            logger.info("Connected to NATS servers: %s", self.servers)
+            return True
+        except Exception as e:
+            logger.error("Failed to connect to NATS: %s", e)
+            self._is_connected = False
+            return False
+    async def disconnect(self):
+        """Disconnect from NATS and cleanup subscriptions."""
+        self._should_run = False
+        # Unsubscribe from all streams
+        for sub in self._subscriptions:
+            try:
+                await sub.unsubscribe()
+            except Exception as e:
+                logger.debug("Error unsubscribing: %s", e)
+        self._subscriptions.clear()
+        # Close NATS connection
+        if self.nc:
+            await self.nc.close()
+            self.nc = None
+            self.js = None
+        self._is_connected = False
+        logger.info("Disconnected from NATS")
+    async def subscribe(
+        self,
+        stream: str,
+        subject: str,
+        durable: Optional[str] = None,
+        callback: Optional[Callable[[Msg, str, str], Awaitable[None]]] = None
+    ):
+        """
+        Subscribe to a NATS JetStream stream using push consumer pattern.
+        This creates a push subscription where NATS JetStream automatically delivers
+        messages to the callback as they arrive. Messages are pushed to the client
+        rather than requiring explicit fetch/pull operations.
+        Args:
+            stream: Name of the JetStream stream
+            subject: Subject pattern to subscribe to (supports wildcards)
+            durable: Optional durable consumer name (for persistent subscriptions)
+            callback: Optional async callback function(msg, stream, subject) -> None
+                     If not provided, calls handle_message() method
+        Raises:
+            RuntimeError: If not connected to NATS
+        """
+        if not self._is_connected or not self.js:
+            raise RuntimeError("Not connected to NATS. Call connect() first.")
+        # Use provided callback or default to handle_message method
+        if callback is None:
+            callback = self.handle_message
+        # Create callback wrapper
+        async def message_wrapper(msg: Msg):
+            try:
+                await callback(msg, stream, subject)
+            except Exception as e:
+                logger.error(
+                    "Error in message callback for stream=%s, subject=%s: %s",
+                    stream, subject, e, exc_info=True
+                )
+                # Don't ack on error - let the caller decide
+                # This allows for retry logic in the handler
+        try:
+            # Subscribe with durable consumer if specified
+            if durable:
+                sub = await self.js.subscribe(
+                    subject,
+                    stream=stream,
+                    durable=durable,
+                    cb=lambda msg: asyncio.create_task(message_wrapper(msg))
+                )
+            else:
+                # Ephemeral subscription
+                sub = await self.js.subscribe(
+                    subject,
+                    stream=stream,
+                    cb=lambda msg: asyncio.create_task(message_wrapper(msg))
+                )
+            self._subscriptions.append(sub)
+            logger.info(
+                "Subscribed to stream=%s, subject=%s, durable=%s",
+                stream, subject, durable or "ephemeral"
+            )
+        except Exception as e:
+            error_msg = str(e)
+            # Handle the specific case where consumer is already bound
+            if "consumer is already bound" in error_msg.lower():
+                logger.warning(
+                    "Consumer '%s' for stream '%s' is already bound. "
+                    "This usually happens when the service didn't shut down cleanly. "
+                    "Attempting to delete the consumer and retry...",
+                    durable, stream
+                )
+                if durable:
+                    try:
+                        # Try to delete the consumer (may fail if actively bound)
+                        await self.js.delete_consumer(stream, durable)
+                        logger.info("Deleted consumer '%s' for stream '%s'", durable, stream)
+                        # Retry subscription after deletion
+                        sub = await self.js.subscribe(
+                            subject,
+                            stream=stream,
+                            durable=durable,
+                            cb=lambda msg: asyncio.create_task(message_wrapper(msg))
+                        )
+                        self._subscriptions.append(sub)
+                        logger.info(
+                            "Successfully subscribed after consumer cleanup: stream=%s, subject=%s, durable=%s",
+                            stream, subject, durable
+                        )
+                    except Exception as retry_error:
+                        retry_error_msg = str(retry_error)
+                        if "bound" in retry_error_msg.lower() or "in use" in retry_error_msg.lower():
+                            logger.error(
+                                "Consumer '%s' for stream '%s' cannot be deleted because it's still bound. "
+                                "This typically means the previous service instance is still running or "
+                                "the subscription hasn't timed out yet. Solutions:\n"
+                                "  1. Wait a few seconds and restart the service\n"
+                                "  2. Manually delete the consumer: nats consumer rm %s %s\n"
+                                "  3. Restart the NATS server\n"
+                                "  4. Use a different durable consumer name",
+                                durable, stream, stream, durable
+                            )
+                        else:
+                            logger.error(
+                                "Failed to delete consumer '%s' for stream '%s': %s",
+                                durable, stream, retry_error
+                            )
+                        raise
+                else:
+                    raise
+            else:
+                logger.error(
+                    "Failed to subscribe to stream=%s, subject=%s: %s",
+                    stream, subject, e
+                )
+                raise
+    @abstractmethod
+    async def handle_message(self, msg: Msg, stream: str, subject: str):
+        """
+        Handle an incoming message pushed by NATS JetStream. Override this method in subclasses.
+        This method is called automatically when NATS JetStream pushes a message
+        to this subscriber. The push consumer pattern means messages arrive
+        asynchronously via callbacks rather than being explicitly fetched.
+        Default implementation logs and acks the message.
+        Subclasses should implement their own message processing logic.
+        Args:
+            msg: NATS message object
+            stream: Name of the stream the message came from
+            subject: Subject pattern that matched this message
+        """
+        logger.debug(
+            "Received message from stream=%s, subject=%s, data_size=%d",
+            stream, subject, len(msg.data)
+        )
+        # Default: ack the message
+        await msg.ack()
+    async def _error_callback(self, error: Exception):
+        """Callback for NATS errors."""
+        if error:
+            logger.error("NATS error: %s", error, exc_info=True)
+        else:
+            logger.error("NATS error: Unknown error (error object is None)")
+    async def _disconnected_callback(self):
+        """Callback when disconnected from NATS."""
+        logger.warning("Disconnected from NATS servers")
+        self._is_connected = False
+    async def _reconnected_callback(self):
+        """Callback when reconnected to NATS."""
+        logger.info("Reconnected to NATS servers")
+        self._is_connected = True
+        if self.nc:
+            self.js = self.nc.jetstream()
+            # Clear old subscriptions as they're no longer valid after reconnection
+            self._subscriptions.clear()
+            # Re-subscribe to all streams
+            await self._resubscribe_all()
+    async def _closed_callback(self):
+        """Callback when connection is closed."""
+        logger.info("NATS connection closed")
+        self._is_connected = False
+    async def _resubscribe_all(self):
+        """
+        Re-subscribe to all streams after reconnection.
+        Override this method in subclasses to restore subscriptions.
+        The default implementation does nothing - subclasses should track
+        their subscriptions and re-subscribe here.
+        """
+        logger.debug("Reconnection detected, but no subscriptions to restore")
+    async def run(self, health_check_interval: float = 1.0):
+        """
+        Run the subscriber service with connection health monitoring.
+        This method will:
+        1. Connect to NATS (with retry logic)
+        2. Call on_start() hook for subclasses to set up subscriptions
+        3. Monitor connection health and reconnect if needed
+        4. Call on_stop() hook on shutdown
+        Args:
+            health_check_interval: Interval in seconds to check connection health
+        """
+        # Connect to NATS with retry logic
+        while self._should_run:
+            if await self.connect():
+                break
+            logger.warning("Failed to connect to NATS, retrying in 5 seconds...")
+            await asyncio.sleep(5)
+        # Call on_start hook for subclasses to set up subscriptions
+        await self.on_start()
+        logger.info("Stream subscriber service started")
+        # Main loop with health monitoring
+        try:
+            while self._should_run:
+                await asyncio.sleep(health_check_interval)
+                # Check connection health
+                if not self._is_connected:
+                    logger.warning("Connection lost, attempting to reconnect...")
+                    if await self.connect():
+                        # Clear old subscriptions as they're no longer valid after reconnection
+                        self._subscriptions.clear()
+                        await self._resubscribe_all()
+        except KeyboardInterrupt:
+            logger.info("Received KeyboardInterrupt, shutting down...")
+        except Exception as e:
+            logger.error("Unexpected error in main loop: %s", e, exc_info=True)
+        finally:
+            await self.on_stop()
+            await self.disconnect()
+    @abstractmethod
+    async def on_start(self):
+        """
+        Hook called when the service starts. Override in subclasses to set up subscriptions.
+        Example:
+            ```python
+            async def on_start(self):
+                await self.subscribe("STREAM_NAME", "puda.*.cmd.response.queue", "my_consumer")
+                await self.subscribe("STREAM_NAME", "puda.*.cmd.response.immediate", "my_consumer2")
+            ```
+        """
+        pass
+    @abstractmethod
+    async def on_stop(self):
+        """
+        Hook called when the service stops. Override in subclasses for cleanup.
+        """
+        pass
+    # ==================== Context Manager ====================
+    async def __aenter__(self):
+        """Async context manager entry."""
+        await self.connect()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        """Async context manager exit."""
+        await self.disconnect()
+        return False  # Don't suppress exceptions

puda_comms-0.0.5/src/puda_comms/__init__.py DELETED Viewed

@@ -1,5 +0,0 @@
-from .machine_client import MachineClient
-from .execution_state import ExecutionState
-from .command_service import CommandService
-__all__ = ["MachineClient", "ExecutionState", "CommandService"]

{puda_comms-0.0.5 → puda_comms-0.0.6}/src/puda_comms/execution_state.py RENAMED Viewed

File without changes

{puda_comms-0.0.5 → puda_comms-0.0.6}/src/puda_comms/run_manager.py RENAMED Viewed

File without changes

puda-comms 0.0.5__tar.gz → 0.0.6__tar.gz

puda-comms 0.0.5tar.gz → 0.0.6tar.gz