digitalkin 0.2.12__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. digitalkin/__version__.py +1 -1
  2. digitalkin/grpc_servers/_base_server.py +15 -17
  3. digitalkin/grpc_servers/module_server.py +9 -10
  4. digitalkin/grpc_servers/module_servicer.py +199 -85
  5. digitalkin/grpc_servers/registry_server.py +3 -6
  6. digitalkin/grpc_servers/registry_servicer.py +18 -19
  7. digitalkin/grpc_servers/utils/exceptions.py +4 -0
  8. digitalkin/grpc_servers/utils/grpc_client_wrapper.py +3 -5
  9. digitalkin/logger.py +45 -1
  10. digitalkin/models/module/__init__.py +2 -1
  11. digitalkin/models/module/module.py +1 -0
  12. digitalkin/models/module/module_types.py +1 -0
  13. digitalkin/modules/_base_module.py +124 -7
  14. digitalkin/modules/archetype_module.py +11 -1
  15. digitalkin/modules/job_manager/base_job_manager.py +181 -0
  16. digitalkin/modules/job_manager/job_manager_models.py +44 -0
  17. digitalkin/modules/job_manager/single_job_manager.py +285 -0
  18. digitalkin/modules/job_manager/taskiq_broker.py +214 -0
  19. digitalkin/modules/job_manager/taskiq_job_manager.py +286 -0
  20. digitalkin/modules/tool_module.py +2 -1
  21. digitalkin/modules/trigger_module.py +3 -1
  22. digitalkin/services/cost/default_cost.py +8 -4
  23. digitalkin/services/cost/grpc_cost.py +15 -7
  24. digitalkin/services/filesystem/default_filesystem.py +2 -4
  25. digitalkin/services/filesystem/grpc_filesystem.py +8 -5
  26. digitalkin/services/setup/__init__.py +1 -0
  27. digitalkin/services/setup/default_setup.py +10 -12
  28. digitalkin/services/setup/grpc_setup.py +8 -10
  29. digitalkin/services/storage/default_storage.py +11 -5
  30. digitalkin/services/storage/grpc_storage.py +23 -8
  31. digitalkin/utils/arg_parser.py +5 -48
  32. digitalkin/utils/development_mode_action.py +51 -0
  33. {digitalkin-0.2.12.dist-info → digitalkin-0.2.14.dist-info}/METADATA +46 -15
  34. {digitalkin-0.2.12.dist-info → digitalkin-0.2.14.dist-info}/RECORD +41 -34
  35. {digitalkin-0.2.12.dist-info → digitalkin-0.2.14.dist-info}/WHEEL +1 -1
  36. modules/cpu_intensive_module.py +281 -0
  37. modules/minimal_llm_module.py +240 -58
  38. modules/storage_module.py +5 -6
  39. modules/text_transform_module.py +1 -1
  40. digitalkin/modules/job_manager.py +0 -177
  41. {digitalkin-0.2.12.dist-info → digitalkin-0.2.14.dist-info}/licenses/LICENSE +0 -0
  42. {digitalkin-0.2.12.dist-info → digitalkin-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,181 @@
1
+ """Background module manager."""
2
+
3
+ import abc
4
+ from collections.abc import AsyncGenerator, AsyncIterator, Callable, Coroutine
5
+ from contextlib import asynccontextmanager
6
+ from typing import Any, Generic
7
+
8
+ from digitalkin.models import ModuleStatus
9
+ from digitalkin.models.module import InputModelT, OutputModelT, SetupModelT
10
+ from digitalkin.models.module.module_types import ConfigSetupModelT
11
+ from digitalkin.modules._base_module import BaseModule
12
+ from digitalkin.services.services_config import ServicesConfig
13
+ from digitalkin.services.services_models import ServicesMode
14
+
15
+
16
+ class BaseJobManager(abc.ABC, Generic[InputModelT, SetupModelT, ConfigSetupModelT]):
17
+ """Abstract base class for managing background module jobs."""
18
+
19
+ async def _start(self) -> None:
20
+ """Start the job manager.
21
+
22
+ This method initializes any necessary resources or configurations
23
+ required for the job manager to function.
24
+ """
25
+
26
+ @staticmethod
27
+ async def job_specific_callback(
28
+ callback: Callable[[str, OutputModelT], Coroutine[Any, Any, None]], job_id: str
29
+ ) -> Callable[[OutputModelT], Coroutine[Any, Any, None]]:
30
+ """Generate a job-specific callback function.
31
+
32
+ Args:
33
+ callback: The callback function to be executed when the job completes.
34
+ job_id: The unique identifier of the job.
35
+
36
+ Returns:
37
+ Callable: A wrapped callback function that includes the job ID.
38
+ """
39
+
40
+ def callback_wrapper(output_data: OutputModelT) -> Coroutine[Any, Any, None]:
41
+ """Wrapper for the callback function.
42
+
43
+ Args:
44
+ output_data: The output data produced by the job.
45
+
46
+ Returns:
47
+ Coroutine: The wrapped callback function.
48
+ """
49
+ return callback(job_id, output_data)
50
+
51
+ return callback_wrapper
52
+
53
+ def __init__(
54
+ self,
55
+ module_class: type[BaseModule],
56
+ services_mode: ServicesMode,
57
+ ) -> None:
58
+ """Initialize the job manager.
59
+
60
+ Args:
61
+ module_class: The class of the module to be managed.
62
+ services_mode: The mode of operation for the services (e.g., ASYNC or SYNC).
63
+ """
64
+ self.module_class = module_class
65
+
66
+ services_config = ServicesConfig(
67
+ services_config_strategies=self.module_class.services_config_strategies,
68
+ services_config_params=self.module_class.services_config_params,
69
+ mode=services_mode,
70
+ )
71
+ setattr(self.module_class, "services_config", services_config)
72
+
73
+ @abc.abstractmethod # type: ignore
74
+ @asynccontextmanager # type: ignore
75
+ async def generate_stream_consumer(self, job_id: str) -> AsyncIterator[AsyncGenerator[dict[str, Any], None]]:
76
+ """Generate a stream consumer for the job's message stream.
77
+
78
+ Args:
79
+ job_id: The unique identifier of the job to filter messages for.
80
+
81
+ Yields:
82
+ dict[str, Any]: The messages from the associated module's stream.
83
+ """
84
+
85
+ @abc.abstractmethod
86
+ async def create_module_instance_job(
87
+ self,
88
+ input_data: InputModelT,
89
+ setup_data: SetupModelT,
90
+ mission_id: str,
91
+ setup_version_id: str,
92
+ ) -> str:
93
+ """Create and start a new job for the module's instance.
94
+
95
+ Args:
96
+ input_data: The input data required to start the job.
97
+ setup_data: The setup configuration for the module.
98
+ mission_id: The mission ID associated with the job.
99
+ setup_version_id: The setup ID associated with the module.
100
+
101
+ Returns:
102
+ str: The unique identifier (job ID) of the created job.
103
+ """
104
+
105
+ @abc.abstractmethod
106
+ async def generate_config_setup_module_response(self, job_id: str) -> SetupModelT:
107
+ """Generate a stream consumer for a module's output data.
108
+
109
+ This method creates an asynchronous generator that streams output data
110
+ from a specific module job. If the module does not exist, it generates
111
+ an error message.
112
+
113
+ Args:
114
+ job_id: The unique identifier of the job.
115
+
116
+ Returns:
117
+ SetupModelT: the SetupModelT object fully processed.
118
+ """
119
+
120
+ @abc.abstractmethod
121
+ async def create_config_setup_instance_job(
122
+ self,
123
+ config_setup_data: ConfigSetupModelT,
124
+ setup_data: SetupModelT,
125
+ mission_id: str,
126
+ setup_version_id: str,
127
+ ) -> str:
128
+ """Create and start a new module job.
129
+
130
+ This method initializes a new module job, assigns it a unique job ID,
131
+ and starts it in the background.
132
+
133
+ Args:
134
+ config_setup_data: The input data required to start the job.
135
+ setup_data: The setup configuration for the module.
136
+ mission_id: The mission ID associated with the job.
137
+ setup_version_id: The setup ID.
138
+
139
+ Returns:
140
+ str: The unique identifier (job ID) of the created job.
141
+
142
+ Raises:
143
+ Exception: If the module fails to start.
144
+ """
145
+
146
+ @abc.abstractmethod
147
+ async def stop_module(self, job_id: str) -> bool:
148
+ """Stop a running module job.
149
+
150
+ Args:
151
+ job_id: The unique identifier of the job to stop.
152
+
153
+ Returns:
154
+ bool: True if the job was successfully stopped, False if it does not exist.
155
+ """
156
+
157
+ @abc.abstractmethod
158
+ async def get_module_status(self, job_id: str) -> ModuleStatus | None:
159
+ """Retrieve the status of a module job.
160
+
161
+ Args:
162
+ job_id: The unique identifier of the job.
163
+
164
+ Returns:
165
+ ModuleStatus | None: The status of the job, or None if the job does not exist.
166
+ """
167
+
168
+ @abc.abstractmethod
169
+ async def stop_all_modules(self) -> None:
170
+ """Stop all currently running module jobs.
171
+
172
+ This method ensures that all active jobs are gracefully terminated.
173
+ """
174
+
175
+ @abc.abstractmethod
176
+ async def list_modules(self) -> dict[str, dict[str, Any]]:
177
+ """List all modules along with their statuses.
178
+
179
+ Returns:
180
+ dict[str, dict[str, Any]]: A dictionary containing information about all modules and their statuses.
181
+ """
@@ -0,0 +1,44 @@
1
+ """Job manager models."""
2
+
3
+ from enum import Enum
4
+
5
+ from pydantic import BaseModel
6
+
7
+ from digitalkin.modules.job_manager.base_job_manager import BaseJobManager
8
+
9
+
10
+ class StreamCodeModel(BaseModel):
11
+ """Typed error/code model."""
12
+
13
+ code: str
14
+
15
+
16
+ class JobManagerMode(Enum):
17
+ """Job manager mode."""
18
+
19
+ SINGLE = "single"
20
+ TASKIQ = "taskiq"
21
+
22
+ def __str__(self) -> str:
23
+ """Get the string representation of the job manager mode.
24
+
25
+ Returns:
26
+ str: job manager mode name.
27
+ """
28
+ return self.value
29
+
30
+ def get_manager_class(self) -> type[BaseJobManager]:
31
+ """Get the job manager class based on the mode.
32
+
33
+ Returns:
34
+ type: The job manager class.
35
+ """
36
+ match self:
37
+ case JobManagerMode.SINGLE:
38
+ from digitalkin.modules.job_manager.single_job_manager import SingleJobManager # noqa: PLC0415
39
+
40
+ return SingleJobManager
41
+ case JobManagerMode.TASKIQ:
42
+ from digitalkin.modules.job_manager.taskiq_job_manager import TaskiqJobManager # noqa: PLC0415
43
+
44
+ return TaskiqJobManager
@@ -0,0 +1,285 @@
1
+ """Background module manager with single instance."""
2
+
3
+ import asyncio
4
+ import uuid
5
+ from collections.abc import AsyncGenerator, AsyncIterator
6
+ from contextlib import asynccontextmanager
7
+ from typing import Any, Generic
8
+
9
+ import grpc
10
+
11
+ from digitalkin.logger import logger
12
+ from digitalkin.models import ModuleStatus
13
+ from digitalkin.models.module import ConfigSetupModelT, InputModelT, OutputModelT, SetupModelT
14
+ from digitalkin.modules._base_module import BaseModule
15
+ from digitalkin.modules.job_manager.base_job_manager import BaseJobManager
16
+ from digitalkin.services.services_models import ServicesMode
17
+
18
+
19
+ class SingleJobManager(BaseJobManager, Generic[InputModelT, SetupModelT, ConfigSetupModelT]):
20
+ """Manages a single instance of a module job.
21
+
22
+ This class ensures that only one instance of a module job is active at a time.
23
+ It provides functionality to create, stop, and monitor module jobs, as well as
24
+ to handle their output data.
25
+ """
26
+
27
+ modules: dict[str, BaseModule]
28
+ queue: dict[str, asyncio.Queue]
29
+
30
+ def __init__(
31
+ self,
32
+ module_class: type[BaseModule],
33
+ services_mode: ServicesMode,
34
+ ) -> None:
35
+ """Initialize the job manager.
36
+
37
+ Args:
38
+ module_class: The class of the module to be managed.
39
+ services_mode: The mode of operation for the services (e.g., ASYNC or SYNC).
40
+ """
41
+ super().__init__(module_class, services_mode)
42
+
43
+ self._lock = asyncio.Lock()
44
+ self.modules: dict[str, BaseModule] = {}
45
+ self.queues: dict[str, asyncio.Queue] = {}
46
+
47
+ async def generate_config_setup_module_response(self, job_id: str) -> SetupModelT:
48
+ """Generate a stream consumer for a module's output data.
49
+
50
+ This method creates an asynchronous generator that streams output data
51
+ from a specific module job. If the module does not exist, it generates
52
+ an error message.
53
+
54
+ Args:
55
+ job_id: The unique identifier of the job.
56
+
57
+ Returns:
58
+ SetupModelT: the SetupModelT object fully processed.
59
+ """
60
+ module = self.modules.get(job_id, None)
61
+ logger.debug("Module %s found: %s", job_id, module)
62
+
63
+ try:
64
+ return await self.queues[job_id].get()
65
+ finally:
66
+ logger.info(f"{job_id=}: {self.queues[job_id].empty()}")
67
+ del self.queues[job_id]
68
+
69
+ async def create_config_setup_instance_job(
70
+ self,
71
+ config_setup_data: ConfigSetupModelT,
72
+ setup_data: SetupModelT,
73
+ mission_id: str,
74
+ setup_version_id: str,
75
+ ) -> str:
76
+ """Create and start a new module setup configuration job.
77
+
78
+ This method initializes a new module job, assigns it a unique job ID,
79
+ and starts the config setup it in the background.
80
+
81
+ Args:
82
+ config_setup_data: The input data required to start the job.
83
+ setup_data: The setup configuration for the module.
84
+ mission_id: The mission ID associated with the job.
85
+ setup_version_id: The setup ID.
86
+
87
+ Returns:
88
+ str: The unique identifier (job ID) of the created job.
89
+
90
+ Raises:
91
+ Exception: If the module fails to start.
92
+ """
93
+ job_id = str(uuid.uuid4())
94
+ # TODO: Ensure the job_id is unique.
95
+ module = self.module_class(job_id, mission_id=mission_id, setup_version_id=setup_version_id)
96
+ self.modules[job_id] = module
97
+ self.queues[job_id] = asyncio.Queue()
98
+
99
+ try:
100
+ await module.start_config_setup(
101
+ config_setup_data,
102
+ setup_data,
103
+ await self.job_specific_callback(self.add_to_queue, job_id),
104
+ )
105
+ logger.debug("Module %s (%s) started successfully", job_id, module.name)
106
+ except Exception:
107
+ # Remove the module from the manager in case of an error.
108
+ del self.modules[job_id]
109
+ logger.exception("Failed to start module %s: %s", job_id)
110
+ raise
111
+ else:
112
+ return job_id
113
+
114
+ async def add_to_queue(self, job_id: str, output_data: OutputModelT) -> None: # type: ignore
115
+ """Add output data to the queue for a specific job.
116
+
117
+ This method is used as a callback to handle output data generated by a module job.
118
+
119
+ Args:
120
+ job_id: The unique identifier of the job.
121
+ output_data: The output data produced by the job.
122
+ """
123
+ await self.queues[job_id].put(output_data.model_dump())
124
+
125
+ @asynccontextmanager # type: ignore
126
+ async def generate_stream_consumer(self, job_id: str) -> AsyncIterator[AsyncGenerator[dict[str, Any], None]]: # type: ignore
127
+ """Generate a stream consumer for a module's output data.
128
+
129
+ This method creates an asynchronous generator that streams output data
130
+ from a specific module job. If the module does not exist, it generates
131
+ an error message.
132
+
133
+ Args:
134
+ job_id: The unique identifier of the job.
135
+
136
+ Yields:
137
+ AsyncGenerator: A stream of output data or error messages.
138
+ """
139
+ module = self.modules.get(job_id, None)
140
+
141
+ logger.debug("Module %s found: %s", job_id, module)
142
+
143
+ async def _stream() -> AsyncGenerator[dict[str, Any], Any]:
144
+ """Stream output data from the module.
145
+
146
+ Yields:
147
+ dict: Output data generated by the module.
148
+ """
149
+ if module is None:
150
+ yield {
151
+ "error": {
152
+ "error_message": f"Module {job_id} not found",
153
+ "code": grpc.StatusCode.NOT_FOUND,
154
+ }
155
+ }
156
+ return
157
+
158
+ try:
159
+ while module.status == ModuleStatus.RUNNING or (
160
+ not self.queues[job_id].empty()
161
+ and module.status
162
+ in {
163
+ ModuleStatus.STOPPED,
164
+ ModuleStatus.STOPPING,
165
+ }
166
+ ):
167
+ logger.info(f"{job_id=}: {module.status=}")
168
+ yield await self.queues[job_id].get()
169
+ logger.info(f"{job_id=}: {module.status=} | {self.queues[job_id].empty()}")
170
+
171
+ finally:
172
+ del self.queues[job_id]
173
+
174
+ yield _stream()
175
+
176
+ async def create_module_instance_job(
177
+ self,
178
+ input_data: InputModelT,
179
+ setup_data: SetupModelT,
180
+ mission_id: str,
181
+ setup_version_id: str,
182
+ ) -> str:
183
+ """Create and start a new module job.
184
+
185
+ This method initializes a new module job, assigns it a unique job ID,
186
+ and starts it in the background.
187
+
188
+ Args:
189
+ input_data: The input data required to start the job.
190
+ setup_data: The setup configuration for the module.
191
+ mission_id: The mission ID associated with the job.
192
+ setup_version_id: The setup ID associated with the module.
193
+
194
+ Returns:
195
+ str: The unique identifier (job ID) of the created job.
196
+
197
+ Raises:
198
+ Exception: If the module fails to start.
199
+ """
200
+ job_id = str(uuid.uuid4())
201
+ # TODO: Ensure the job_id is unique.
202
+ module = self.module_class(job_id, mission_id=mission_id, setup_version_id=setup_version_id)
203
+ self.modules[job_id] = module
204
+ self.queues[job_id] = asyncio.Queue()
205
+
206
+ try:
207
+ await module.start(
208
+ input_data,
209
+ setup_data,
210
+ await self.job_specific_callback(self.add_to_queue, job_id),
211
+ )
212
+ logger.debug("Module %s (%s) started successfully", job_id, module.name)
213
+ except Exception:
214
+ # Remove the module from the manager in case of an error.
215
+ del self.modules[job_id]
216
+ logger.exception("Failed to start module %s: %s", job_id)
217
+ raise
218
+ else:
219
+ return job_id
220
+
221
+ async def stop_module(self, job_id: str) -> bool:
222
+ """Stop a running module job.
223
+
224
+ Args:
225
+ job_id: The unique identifier of the job to stop.
226
+
227
+ Returns:
228
+ bool: True if the module was successfully stopped, False if it does not exist.
229
+
230
+ Raises:
231
+ Exception: If an error occurs while stopping the module.
232
+ """
233
+ async with self._lock:
234
+ module = self.modules.get(job_id)
235
+ if not module:
236
+ logger.warning(f"Module {job_id} not found")
237
+ return False
238
+ try:
239
+ await module.stop()
240
+ # should maybe be added in finally
241
+ del self.queues[job_id]
242
+ del self.modules[job_id]
243
+ logger.debug(f"Module {job_id} ({module.name}) stopped successfully")
244
+ except Exception as e:
245
+ logger.error(f"Error while stopping module {job_id}: {e}")
246
+ raise
247
+ else:
248
+ return True
249
+
250
+ async def get_module_status(self, job_id: str) -> ModuleStatus | None:
251
+ """Retrieve the status of a module job.
252
+
253
+ Args:
254
+ job_id: The unique identifier of the job.
255
+
256
+ Returns:
257
+ ModuleStatus | None: The status of the module, or None if it does not exist.
258
+ """
259
+ module = self.modules.get(job_id)
260
+ return module.status if module else None
261
+
262
+ async def stop_all_modules(self) -> None:
263
+ """Stop all currently running module jobs.
264
+
265
+ This method ensures that all active jobs are gracefully terminated.
266
+ """
267
+ async with self._lock:
268
+ stop_tasks = [self.stop_module(job_id) for job_id in list(self.modules.keys())]
269
+ if stop_tasks:
270
+ await asyncio.gather(*stop_tasks, return_exceptions=True)
271
+
272
+ async def list_modules(self) -> dict[str, dict[str, Any]]:
273
+ """List all modules along with their statuses.
274
+
275
+ Returns:
276
+ dict[str, dict[str, Any]]: A dictionary containing information about all modules and their statuses.
277
+ """
278
+ return {
279
+ job_id: {
280
+ "name": module.name,
281
+ "status": module.status,
282
+ "class": module.__class__.__name__,
283
+ }
284
+ for job_id, module in self.modules.items()
285
+ }