PyPI - digitalkin - Versions diffs - 0.3.0rc1__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

digitalkin 0.3.0rc1py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

digitalkin/__version__.py +1 -1
digitalkin/core/common/__init__.py +9 -0
digitalkin/core/common/factories.py +156 -0
digitalkin/core/job_manager/base_job_manager.py +128 -28
digitalkin/core/job_manager/single_job_manager.py +80 -25
digitalkin/core/job_manager/taskiq_broker.py +114 -19
digitalkin/core/job_manager/taskiq_job_manager.py +291 -39
digitalkin/core/task_manager/base_task_manager.py +539 -0
digitalkin/core/task_manager/local_task_manager.py +108 -0
digitalkin/core/task_manager/remote_task_manager.py +87 -0
digitalkin/core/task_manager/surrealdb_repository.py +43 -4
digitalkin/core/task_manager/task_executor.py +249 -0
digitalkin/core/task_manager/task_session.py +107 -19
digitalkin/grpc_servers/module_server.py +2 -2
digitalkin/grpc_servers/module_servicer.py +21 -12
digitalkin/grpc_servers/registry_server.py +1 -1
digitalkin/grpc_servers/registry_servicer.py +4 -4
digitalkin/grpc_servers/utils/grpc_error_handler.py +53 -0
digitalkin/models/core/task_monitor.py +17 -0
digitalkin/models/grpc_servers/models.py +4 -4
digitalkin/models/module/module_context.py +5 -0
digitalkin/models/module/module_types.py +304 -16
digitalkin/modules/_base_module.py +66 -28
digitalkin/services/cost/grpc_cost.py +8 -41
digitalkin/services/filesystem/grpc_filesystem.py +9 -38
digitalkin/services/services_config.py +11 -0
digitalkin/services/services_models.py +3 -1
digitalkin/services/setup/default_setup.py +5 -6
digitalkin/services/setup/grpc_setup.py +51 -14
digitalkin/services/storage/grpc_storage.py +2 -2
digitalkin/services/user_profile/__init__.py +12 -0
digitalkin/services/user_profile/default_user_profile.py +55 -0
digitalkin/services/user_profile/grpc_user_profile.py +69 -0
digitalkin/services/user_profile/user_profile_strategy.py +40 -0
digitalkin/utils/__init__.py +28 -0
digitalkin/utils/dynamic_schema.py +483 -0
{digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/METADATA +9 -29
{digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/RECORD +42 -30
modules/dynamic_setup_module.py +362 -0
digitalkin/core/task_manager/task_manager.py +0 -439
{digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/WHEEL +0 -0
{digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/licenses/LICENSE +0 -0
{digitalkin-0.3.0rc1.dist-info → digitalkin-0.3.1.dist-info}/top_level.txt +0 -0

digitalkin/core/job_manager/taskiq_broker.py CHANGED Viewed

@@ -1,10 +1,12 @@
 """Taskiq broker & RSTREAM producer for the job manager."""
 import asyncio
+import datetime
 import json
 import logging
 import os
 import pickle  # noqa: S403
+from typing import Any
 from rstream import Producer
 from rstream.exceptions import PreconditionFailed
@@ -14,7 +16,10 @@ from taskiq.compat import model_validate
 from taskiq.message import BrokerMessage
 from taskiq_aio_pika import AioPikaBroker
+from digitalkin.core.common import ConnectionFactory, ModuleFactory
 from digitalkin.core.job_manager.base_job_manager import BaseJobManager
+from digitalkin.core.task_manager.task_executor import TaskExecutor
+from digitalkin.core.task_manager.task_session import TaskSession
 from digitalkin.logger import logger
 from digitalkin.models.core.job_manager_models import StreamCodeModel
 from digitalkin.models.module.module_types import OutputModelT
@@ -118,6 +123,24 @@ RSTREAM_PRODUCER = define_producer()
 TASKIQ_BROKER = define_broker()
+async def cleanup_global_resources() -> None:
+    """Clean up global resources (producer and broker connections).
+    This should be called during shutdown to prevent connection leaks.
+    """
+    try:
+        await RSTREAM_PRODUCER.close()
+        logger.info("RStream producer closed successfully")
+    except Exception as e:
+        logger.warning("Failed to close RStream producer: %s", e)
+    try:
+        await TASKIQ_BROKER.shutdown()
+        logger.info("Taskiq broker shut down successfully")
+    except Exception as e:
+        logger.warning("Failed to shutdown Taskiq broker: %s", e)
 async def send_message_to_stream(job_id: str, output_data: OutputModelT) -> None:  # type: ignore
     """Callback define to add a message frame to the Rstream.
@@ -152,27 +175,70 @@ async def run_start_module(
         setup_data: dict,
         context: Allow TaskIQ context access
     """
-    logger.warning("%s", services_mode)
+    logger.info("Starting module with services_mode: %s", services_mode)
     services_config = ServicesConfig(
         services_config_strategies=module_class.services_config_strategies,
         services_config_params=module_class.services_config_params,
         mode=services_mode,
     )
     setattr(module_class, "services_config", services_config)
-    logger.warning("%s | %s", services_config, module_class.services_config)
+    logger.debug("Services config: %s | Module config: %s", services_config, module_class.services_config)
+    module_class.discover()
     job_id = context.message.task_id
     callback = await BaseJobManager.job_specific_callback(send_message_to_stream, job_id)
-    module = module_class(job_id, mission_id=mission_id, setup_id=setup_id, setup_version_id=setup_version_id)
-    await module.start(
-        input_data,
-        setup_data,
-        callback,
-        # ensure that the callback is called when the task is done + allow asyncio to run
-        # TODO: should define a BaseModel for stream code / error
-        done_callback=lambda _: asyncio.create_task(callback(StreamCodeModel(code="__END_OF_STREAM__"))),
-    )
+    module = ModuleFactory.create_module_instance(module_class, job_id, mission_id, setup_id, setup_version_id)
+    channel = None
+    try:
+        # Create TaskExecutor and supporting components for worker execution
+        executor = TaskExecutor()
+        # SurrealDB env vars are expected to be set in env.
+        channel = await ConnectionFactory.create_surreal_connection("taskiq_worker", datetime.timedelta(seconds=5))
+        session = TaskSession(job_id, mission_id, channel, module, datetime.timedelta(seconds=2))
+        # Execute the task using TaskExecutor
+        # Create a proper done callback that handles errors
+        async def send_end_of_stream(_: Any) -> None:  # noqa: ANN401
+            try:
+                await callback(StreamCodeModel(code="__END_OF_STREAM__"))
+            except Exception as e:
+                logger.error("Error sending end of stream: %s", e, exc_info=True)
+        # Reconstruct Pydantic models from dicts for type safety
+        try:
+            input_model = module_class.create_input_model(input_data)
+            setup_model = await module_class.create_setup_model(setup_data)
+        except Exception as e:
+            logger.error("Failed to reconstruct models for job %s: %s", job_id, e, exc_info=True)
+            raise
+        supervisor_task = await executor.execute_task(
+            task_id=job_id,
+            mission_id=mission_id,
+            coro=module.start(
+                input_model,
+                setup_model,
+                callback,
+                done_callback=lambda result: asyncio.ensure_future(send_end_of_stream(result)),
+            ),
+            session=session,
+            channel=channel,
+        )
+        # Wait for the supervisor task to complete
+        await supervisor_task
+        logger.info("Module task %s completed", job_id)
+    except Exception:
+        logger.exception("Error running module %s", job_id)
+        raise
+    finally:
+        # Cleanup channel
+        if channel is not None:
+            try:
+                await channel.close()
+            except Exception:
+                logger.exception("Error closing channel for job %s", job_id)
 @TASKIQ_BROKER.task
@@ -196,20 +262,49 @@ async def run_config_module(
         config_setup_data: dict,
         context: Allow TaskIQ context access
     """
-    logger.warning("%s", services_mode)
+    logger.info("Starting config module with services_mode: %s", services_mode)
     services_config = ServicesConfig(
         services_config_strategies=module_class.services_config_strategies,
         services_config_params=module_class.services_config_params,
         mode=services_mode,
     )
     setattr(module_class, "services_config", services_config)
-    logger.warning("%s | %s", services_config, module_class.services_config)
+    logger.debug("Services config: %s | Module config: %s", services_config, module_class.services_config)
     job_id = context.message.task_id
     callback = await BaseJobManager.job_specific_callback(send_message_to_stream, job_id)
-    module = module_class(job_id, mission_id=mission_id, setup_id=setup_id, setup_version_id=setup_version_id)
+    module = ModuleFactory.create_module_instance(module_class, job_id, mission_id, setup_id, setup_version_id)
-    await module.start_config_setup(
-        module_class.create_config_setup_model(config_setup_data),
-        callback,
-    )
+    # Override environment variables temporarily to use manager's SurrealDB
+    channel = None
+    try:
+        # Create TaskExecutor and supporting components for worker execution
+        executor = TaskExecutor()
+        # SurrealDB env vars are expected to be set in env.
+        channel = await ConnectionFactory.create_surreal_connection("taskiq_worker", datetime.timedelta(seconds=5))
+        session = TaskSession(job_id, mission_id, channel, module, datetime.timedelta(seconds=2))
+        # Create and run the config setup task with TaskExecutor
+        setup_model = module_class.create_config_setup_model(config_setup_data)
+        supervisor_task = await executor.execute_task(
+            task_id=job_id,
+            mission_id=mission_id,
+            coro=module.start_config_setup(setup_model, callback),
+            session=session,
+            channel=channel,
+        )
+        # Wait for the supervisor task to complete
+        await supervisor_task
+        logger.info("Config module task %s completed", job_id)
+    except Exception:
+        logger.exception("Error running config module %s", job_id)
+        raise
+    finally:
+        # Cleanup channel
+        if channel is not None:
+            try:
+                await channel.close()
+            except Exception:
+                logger.exception("Error closing channel for job %s", job_id)

digitalkin/core/job_manager/taskiq_job_manager.py CHANGED Viewed

@@ -9,19 +9,22 @@ except ImportError:
 import asyncio
 import contextlib
+import datetime
 import json
 import os
 from collections.abc import AsyncGenerator, AsyncIterator
 from contextlib import asynccontextmanager
-from typing import TYPE_CHECKING, Any, Generic
+from typing import TYPE_CHECKING, Any
 from rstream import Consumer, ConsumerOffsetSpecification, MessageContext, OffsetType
+from digitalkin.core.common import ConnectionFactory, QueueFactory
 from digitalkin.core.job_manager.base_job_manager import BaseJobManager
-from digitalkin.core.job_manager.taskiq_broker import STREAM, STREAM_RETENTION, TASKIQ_BROKER
+from digitalkin.core.job_manager.taskiq_broker import STREAM, STREAM_RETENTION, TASKIQ_BROKER, cleanup_global_resources
+from digitalkin.core.task_manager.remote_task_manager import RemoteTaskManager
 from digitalkin.logger import logger
 from digitalkin.models.core.task_monitor import TaskStatus
-from digitalkin.models.module import InputModelT, SetupModelT
+from digitalkin.models.module import InputModelT, OutputModelT, SetupModelT
 from digitalkin.modules._base_module import BaseModule
 from digitalkin.services.services_models import ServicesMode
@@ -29,7 +32,7 @@ if TYPE_CHECKING:
     from taskiq.task import AsyncTaskiqTask
-class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
+class TaskiqJobManager(BaseJobManager[InputModelT, OutputModelT, SetupModelT]):
     """Taskiq job manager for running modules in Taskiq tasks."""
     services_mode: ServicesMode
@@ -62,6 +65,13 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
         if queue:
             await queue.put(data.get("output_data"))
+    async def start(self) -> None:
+        """Start the TaskiqJobManager and initialize SurrealDB connection."""
+        await self._start()
+        self.channel = await ConnectionFactory.create_surreal_connection(
+            database="taskiq_job_manager", timeout=datetime.timedelta(seconds=5)
+        )
     async def _start(self) -> None:
         await TASKIQ_BROKER.startup()
@@ -82,12 +92,34 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
             callback=self._on_message,  # type: ignore
             offset_specification=start_spec,
         )
+        # Wrap the consumer task with error handling
+        async def run_consumer_with_error_handling() -> None:
+            try:
+                await self.stream_consumer.run()
+            except asyncio.CancelledError:
+                logger.debug("Stream consumer task cancelled")
+                raise
+            except Exception as e:
+                logger.error("Stream consumer task failed: %s", e, exc_info=True, extra={"error": str(e)})
+                # Re-raise to ensure the error is not silently ignored
+                raise
         self.stream_consumer_task = asyncio.create_task(
-            self.stream_consumer.run(),
+            run_consumer_with_error_handling(),
             name="stream_consumer_task",
         )
     async def _stop(self) -> None:
+        """Stop the TaskiqJobManager and clean up all resources."""
+        # Close SurrealDB connection
+        if hasattr(self, "channel"):
+            try:
+                await self.channel.close()
+                logger.info("TaskiqJobManager: SurrealDB connection closed")
+            except Exception as e:
+                logger.warning("Failed to close SurrealDB connection: %s", e)
         # Signal the consumer to stop
         await self.stream_consumer.close()
         # Cancel the background task
@@ -95,18 +127,40 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
         with contextlib.suppress(asyncio.CancelledError):
             await self.stream_consumer_task
+        # Clean up job queues
+        self.job_queues.clear()
+        logger.info("TaskiqJobManager: Cleared %d job queues", len(self.job_queues))
+        # Call global cleanup for producer and broker
+        await cleanup_global_resources()
     def __init__(
         self,
         module_class: type[BaseModule],
         services_mode: ServicesMode,
+        default_timeout: float = 10.0,
+        max_concurrent_tasks: int = 100,
+        stream_timeout: float = 30.0,
     ) -> None:
-        """Initialize the Taskiq job manager."""
-        super().__init__(module_class, services_mode)
+        """Initialize the Taskiq job manager.
+        Args:
+            module_class: The class of the module to be managed
+            services_mode: The mode of operation for the services
+            default_timeout: Default timeout for task operations
+            max_concurrent_tasks: Maximum number of concurrent tasks
+            stream_timeout: Timeout for stream consumer operations (default: 15.0s for distributed systems)
+        """
+        # Create remote task manager for distributed execution
+        task_manager = RemoteTaskManager(default_timeout, max_concurrent_tasks)
+        # Initialize base job manager with task manager
+        super().__init__(module_class, services_mode, task_manager)
         logger.warning("TaskiqJobManager initialized with app: %s", TASKIQ_BROKER)
-        self.services_mode = services_mode
         self.job_queues: dict[str, asyncio.Queue] = {}
         self.max_queue_size = 1000
+        self.stream_timeout = stream_timeout
     async def generate_config_setup_module_response(self, job_id: str) -> SetupModelT:
         """Generate a stream consumer for a module's output data.
@@ -120,12 +174,20 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
         Returns:
             SetupModelT: the SetupModelT object fully processed.
+        Raises:
+            asyncio.TimeoutError: If waiting for the setup response times out.
         """
-        queue: asyncio.Queue = asyncio.Queue(maxsize=self.max_queue_size)
+        queue = QueueFactory.create_bounded_queue(maxsize=self.max_queue_size)
         self.job_queues[job_id] = queue
         try:
-            item = await queue.get()
+            # Add timeout to prevent indefinite blocking
+            item = await asyncio.wait_for(queue.get(), timeout=30.0)
+        except asyncio.TimeoutError:
+            logger.error("Timeout waiting for config setup response for job %s", job_id)
+            raise
+        else:
             queue.task_done()
             return item
         finally:
@@ -157,7 +219,7 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
             TypeError: If the function is called with bad data type.
             ValueError: If the module fails to start.
         """
-        task = TASKIQ_BROKER.find_task("digitalkin.core.taskiq_broker:run_config_module")
+        task = TASKIQ_BROKER.find_task("digitalkin.core.job_manager.taskiq_broker:run_config_module")
         if task is None:
             msg = "Task not found"
@@ -167,6 +229,7 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
             msg = "config_setup_data must be a valid model with model_dump method"
             raise TypeError(msg)
+        # Submit task to Taskiq
         running_task: AsyncTaskiqTask[Any] = await task.kiq(
             mission_id,
             setup_id,
@@ -177,6 +240,27 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
         )
         job_id = running_task.task_id
+        # Create module instance for metadata
+        module = self.module_class(
+            job_id,
+            mission_id=mission_id,
+            setup_id=setup_id,
+            setup_version_id=setup_version_id,
+        )
+        # Register task in TaskManager (remote mode)
+        async def _dummy_coro() -> None:
+            """Dummy coroutine - actual execution happens in worker."""
+        await self.create_task(
+            job_id,
+            mission_id,
+            module,
+            _dummy_coro(),
+        )
+        logger.info("Registered config task: %s, waiting for initial result", job_id)
         result = await running_task.wait_result(timeout=10)
         logger.info("Job %s with data %s", job_id, result)
         return job_id
@@ -191,28 +275,75 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
         Yields:
             messages: The stream messages from the associated module.
         """
-        queue: asyncio.Queue = asyncio.Queue(maxsize=self.max_queue_size)
+        queue = QueueFactory.create_bounded_queue(maxsize=self.max_queue_size)
         self.job_queues[job_id] = queue
         async def _stream() -> AsyncGenerator[dict[str, Any], Any]:
-            """Generate the stream allowing flowless communication.
+            """Generate the stream with batch-drain optimization.
+            This implementation uses a micro-batching pattern optimized for distributed
+            message streams from RabbitMQ:
+            1. Block waiting for the first item (with timeout for termination checks)
+            2. Drain all immediately available items without blocking (micro-batch)
+            3. Yield control back to event loop
+            This pattern provides:
+            - Better throughput for bursty message streams
+            - Reduced gRPC streaming overhead
+            - Lower latency when multiple messages arrive simultaneously
             Yields:
                 dict: generated object from the module
             """
             while True:
-                item = await queue.get()
-                queue.task_done()
-                yield item
-                while True:
-                    try:
-                        item = queue.get_nowait()
-                    except asyncio.QueueEmpty:
-                        break
+                try:
+                    # Block for first item with timeout to allow termination checks
+                    item = await asyncio.wait_for(queue.get(), timeout=self.stream_timeout)
                     queue.task_done()
                     yield item
+                    # Drain all immediately available items (micro-batch optimization)
+                    # This reduces latency when messages arrive in bursts from RabbitMQ
+                    batch_count = 0
+                    max_batch_size = 100  # Safety limit to prevent memory spikes
+                    while batch_count < max_batch_size:
+                        try:
+                            item = queue.get_nowait()
+                            queue.task_done()
+                            yield item
+                            batch_count += 1
+                        except asyncio.QueueEmpty:  # noqa: PERF203
+                            # No more items immediately available, break to next blocking wait
+                            break
+                except asyncio.TimeoutError:
+                    logger.warning("Stream consumer timeout for job %s, checking if job is still active", job_id)
+                    # Check if job is registered
+                    if job_id not in self.tasks_sessions:
+                        logger.info("Job %s no longer registered, ending stream", job_id)
+                        break
+                    # Check job status to detect cancelled/failed jobs
+                    status = await self.get_module_status(job_id)
+                    if status in {TaskStatus.CANCELLED, TaskStatus.FAILED}:
+                        logger.info("Job %s has terminal status %s, draining queue and ending stream", job_id, status)
+                        # Drain remaining queue items before stopping
+                        while not queue.empty():
+                            try:
+                                item = queue.get_nowait()
+                                queue.task_done()
+                                yield item
+                            except asyncio.QueueEmpty:  # noqa: PERF203
+                                break
+                        break
+                    # Continue waiting for active/completed jobs
+                    continue
         try:
             yield _stream()
         finally:
@@ -241,12 +372,13 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
         Raises:
             ValueError: If the task is not found.
         """
-        task = TASKIQ_BROKER.find_task("digitalkin.core.taskiq_broker:run_start_module")
+        task = TASKIQ_BROKER.find_task("digitalkin.core.job_manager.taskiq_broker:run_start_module")
         if task is None:
             msg = "Task not found"
             raise ValueError(msg)
+        # Submit task to Taskiq
         running_task: AsyncTaskiqTask[Any] = await task.kiq(
             mission_id,
             setup_id,
@@ -257,33 +389,153 @@ class TaskiqJobManager(BaseJobManager, Generic[InputModelT, SetupModelT]):
             setup_data.model_dump(),
         )
         job_id = running_task.task_id
+        # Create module instance for metadata
+        module = self.module_class(
+            job_id,
+            mission_id=mission_id,
+            setup_id=setup_id,
+            setup_version_id=setup_version_id,
+        )
+        # Register task in TaskManager (remote mode)
+        # Dummy coroutine will be closed by TaskManager since execution_mode="remote"
+        async def _dummy_coro() -> None:
+            """Dummy coroutine - actual execution happens in worker."""
+        await self.create_task(
+            job_id,
+            mission_id,
+            module,
+            _dummy_coro(),  # Will be closed immediately by TaskManager in remote mode
+        )
+        logger.info("Registered remote task: %s, waiting for initial result", job_id)
         result = await running_task.wait_result(timeout=10)
         logger.debug("Job %s with data %s", job_id, result)
         return job_id
+    async def get_module_status(self, job_id: str) -> TaskStatus:
+        """Query a module status from SurrealDB.
+        Args:
+            job_id: The unique identifier of the job.
+        Returns:
+            TaskStatus: The status of the module task.
+        """
+        if job_id not in self.tasks_sessions:
+            logger.warning("Job %s not found in registry", job_id)
+            return TaskStatus.FAILED
+        # Safety check: if channel not initialized (start() wasn't called), return FAILED
+        if not hasattr(self, "channel") or self.channel is None:
+            logger.warning("Job %s status check failed - channel not initialized", job_id)
+            return TaskStatus.FAILED
+        try:
+            # Query the tasks table for the task status
+            task_record = await self.channel.select_by_task_id("tasks", job_id)
+            if task_record and "status" in task_record:
+                status_str = task_record["status"]
+                return TaskStatus(status_str) if isinstance(status_str, str) else status_str
+            # If no record found in tasks, check heartbeats to see if task exists
+            heartbeat_record = await self.channel.select_by_task_id("heartbeats", job_id)
+            if heartbeat_record:
+                return TaskStatus.RUNNING
+            # No task or heartbeat record found - task may still be initializing
+            logger.debug("No task or heartbeat record found for job %s - task may still be initializing", job_id)
+        except Exception:
+            logger.exception("Error getting status for job %s", job_id)
+            return TaskStatus.FAILED
+        else:
+            return TaskStatus.FAILED
+    async def wait_for_completion(self, job_id: str) -> None:
+        """Wait for a task to complete by polling its status from SurrealDB.
+        This method polls the task status until it reaches a terminal state.
+        Uses a 0.5 second polling interval to balance responsiveness and resource usage.
+        Args:
+            job_id: The unique identifier of the job to wait for.
+        Raises:
+            KeyError: If the job_id is not found in tasks_sessions.
+        """
+        if job_id not in self.tasks_sessions:
+            msg = f"Job {job_id} not found"
+            raise KeyError(msg)
+        # Poll task status until terminal state
+        terminal_states = {TaskStatus.COMPLETED, TaskStatus.FAILED, TaskStatus.CANCELLED}
+        while True:
+            status = await self.get_module_status(job_id)
+            if status in terminal_states:
+                logger.debug("Job %s reached terminal state: %s", job_id, status)
+                break
+            await asyncio.sleep(0.5)  # Poll interval
     async def stop_module(self, job_id: str) -> bool:
-        """Revoke (terminate) the Taskiq task with id.
+        """Stop a running module using TaskManager.
         Args:
             job_id: The Taskiq task id to stop.
-        Raises:
-            bool: True if the task was successfully revoked, False otherwise.
+        Returns:
+            bool: True if the signal was successfully sent, False otherwise.
         """
-        msg = "stop_module not implemented in TaskiqJobManager"
-        raise NotImplementedError(msg)
+        if job_id not in self.tasks_sessions:
+            logger.warning("Job %s not found in registry", job_id)
+            return False
-    async def stop_all_modules(self) -> None:
-        """Stop all running modules."""
-        msg = "stop_all_modules not implemented in TaskiqJobManager"
-        raise NotImplementedError(msg)
+        try:
+            session = self.tasks_sessions[job_id]
+            # Use TaskManager's cancel_task method which handles signal sending
+            await self.cancel_task(job_id, session.mission_id)
+            logger.info("Cancel signal sent for job %s via TaskManager", job_id)
-    async def get_module_status(self, job_id: str) -> TaskStatus:
-        """Query a module status."""
-        msg = "get_module_status not implemented in TaskiqJobManager"
-        raise NotImplementedError(msg)
+            # Clean up queue after cancellation
+            self.job_queues.pop(job_id, None)
+            logger.debug("Cleaned up queue for job %s", job_id)
+        except Exception:
+            logger.exception("Error stopping job %s", job_id)
+            return False
+        return True
+    async def stop_all_modules(self) -> None:
+        """Stop all running modules tracked in the registry."""
+        stop_tasks = [self.stop_module(job_id) for job_id in list(self.tasks_sessions.keys())]
+        if stop_tasks:
+            results = await asyncio.gather(*stop_tasks, return_exceptions=True)
+            logger.info("Stopped %d modules, results: %s", len(results), results)
     async def list_modules(self) -> dict[str, dict[str, Any]]:
-        """List all modules."""
-        msg = "list_modules not implemented in TaskiqJobManager"
-        raise NotImplementedError(msg)
+        """List all modules tracked in the registry with their statuses.
+        Returns:
+            dict[str, dict[str, Any]]: A dictionary containing information about all tracked modules.
+        """
+        modules_info: dict[str, dict[str, Any]] = {}
+        for job_id in self.tasks_sessions:
+            try:
+                status = await self.get_module_status(job_id)
+                task_record = await self.channel.select_by_task_id("tasks", job_id)
+                modules_info[job_id] = {
+                    "name": self.module_class.__name__,
+                    "status": status,
+                    "class": self.module_class.__name__,
+                    "mission_id": task_record.get("mission_id") if task_record else "unknown",
+                }
+            except Exception:  # noqa: PERF203
+                logger.exception("Error getting info for job %s", job_id)
+                modules_info[job_id] = {
+                    "name": self.module_class.__name__,
+                    "status": TaskStatus.FAILED,
+                    "class": self.module_class.__name__,
+                    "error": "Failed to retrieve status",
+                }
+        return modules_info

digitalkin 0.3.0rc1__py3-none-any.whl → 0.3.1__py3-none-any.whl

digitalkin 0.3.0rc1py3-none-any.whl → 0.3.1py3-none-any.whl