digitalkin 0.2.25rc0__py3-none-any.whl → 0.3.2.dev14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (122) hide show
  1. base_server/server_async_insecure.py +6 -5
  2. base_server/server_async_secure.py +6 -5
  3. base_server/server_sync_insecure.py +5 -4
  4. base_server/server_sync_secure.py +5 -4
  5. digitalkin/__version__.py +1 -1
  6. digitalkin/core/__init__.py +1 -0
  7. digitalkin/core/common/__init__.py +9 -0
  8. digitalkin/core/common/factories.py +156 -0
  9. digitalkin/core/job_manager/__init__.py +1 -0
  10. digitalkin/{modules → core}/job_manager/base_job_manager.py +138 -32
  11. digitalkin/core/job_manager/single_job_manager.py +373 -0
  12. digitalkin/{modules → core}/job_manager/taskiq_broker.py +121 -26
  13. digitalkin/core/job_manager/taskiq_job_manager.py +541 -0
  14. digitalkin/core/task_manager/__init__.py +1 -0
  15. digitalkin/core/task_manager/base_task_manager.py +539 -0
  16. digitalkin/core/task_manager/local_task_manager.py +108 -0
  17. digitalkin/core/task_manager/remote_task_manager.py +87 -0
  18. digitalkin/core/task_manager/surrealdb_repository.py +266 -0
  19. digitalkin/core/task_manager/task_executor.py +249 -0
  20. digitalkin/core/task_manager/task_session.py +368 -0
  21. digitalkin/grpc_servers/__init__.py +1 -19
  22. digitalkin/grpc_servers/_base_server.py +3 -3
  23. digitalkin/grpc_servers/module_server.py +120 -195
  24. digitalkin/grpc_servers/module_servicer.py +81 -44
  25. digitalkin/grpc_servers/utils/__init__.py +1 -0
  26. digitalkin/grpc_servers/utils/exceptions.py +0 -8
  27. digitalkin/grpc_servers/utils/grpc_client_wrapper.py +25 -9
  28. digitalkin/grpc_servers/utils/grpc_error_handler.py +53 -0
  29. digitalkin/grpc_servers/utils/utility_schema_extender.py +100 -0
  30. digitalkin/logger.py +64 -27
  31. digitalkin/mixins/__init__.py +19 -0
  32. digitalkin/mixins/base_mixin.py +10 -0
  33. digitalkin/mixins/callback_mixin.py +24 -0
  34. digitalkin/mixins/chat_history_mixin.py +110 -0
  35. digitalkin/mixins/cost_mixin.py +76 -0
  36. digitalkin/mixins/file_history_mixin.py +93 -0
  37. digitalkin/mixins/filesystem_mixin.py +46 -0
  38. digitalkin/mixins/logger_mixin.py +51 -0
  39. digitalkin/mixins/storage_mixin.py +79 -0
  40. digitalkin/models/__init__.py +1 -1
  41. digitalkin/models/core/__init__.py +1 -0
  42. digitalkin/{modules/job_manager → models/core}/job_manager_models.py +3 -11
  43. digitalkin/models/core/task_monitor.py +74 -0
  44. digitalkin/models/grpc_servers/__init__.py +1 -0
  45. digitalkin/{grpc_servers/utils → models/grpc_servers}/models.py +92 -7
  46. digitalkin/models/module/__init__.py +18 -11
  47. digitalkin/models/module/base_types.py +61 -0
  48. digitalkin/models/module/module.py +9 -1
  49. digitalkin/models/module/module_context.py +282 -6
  50. digitalkin/models/module/module_types.py +29 -105
  51. digitalkin/models/module/setup_types.py +490 -0
  52. digitalkin/models/module/tool_cache.py +68 -0
  53. digitalkin/models/module/tool_reference.py +117 -0
  54. digitalkin/models/module/utility.py +167 -0
  55. digitalkin/models/services/__init__.py +9 -0
  56. digitalkin/models/services/cost.py +1 -0
  57. digitalkin/models/services/registry.py +35 -0
  58. digitalkin/models/services/storage.py +39 -5
  59. digitalkin/modules/__init__.py +5 -1
  60. digitalkin/modules/_base_module.py +265 -167
  61. digitalkin/modules/archetype_module.py +6 -1
  62. digitalkin/modules/tool_module.py +16 -3
  63. digitalkin/modules/trigger_handler.py +7 -6
  64. digitalkin/modules/triggers/__init__.py +8 -0
  65. digitalkin/modules/triggers/healthcheck_ping_trigger.py +45 -0
  66. digitalkin/modules/triggers/healthcheck_services_trigger.py +63 -0
  67. digitalkin/modules/triggers/healthcheck_status_trigger.py +52 -0
  68. digitalkin/services/__init__.py +4 -0
  69. digitalkin/services/communication/__init__.py +7 -0
  70. digitalkin/services/communication/communication_strategy.py +76 -0
  71. digitalkin/services/communication/default_communication.py +101 -0
  72. digitalkin/services/communication/grpc_communication.py +234 -0
  73. digitalkin/services/cost/__init__.py +9 -2
  74. digitalkin/services/cost/grpc_cost.py +9 -42
  75. digitalkin/services/filesystem/default_filesystem.py +0 -2
  76. digitalkin/services/filesystem/grpc_filesystem.py +10 -39
  77. digitalkin/services/registry/__init__.py +22 -1
  78. digitalkin/services/registry/default_registry.py +135 -4
  79. digitalkin/services/registry/exceptions.py +47 -0
  80. digitalkin/services/registry/grpc_registry.py +306 -0
  81. digitalkin/services/registry/registry_models.py +15 -0
  82. digitalkin/services/registry/registry_strategy.py +88 -4
  83. digitalkin/services/services_config.py +25 -3
  84. digitalkin/services/services_models.py +5 -1
  85. digitalkin/services/setup/default_setup.py +6 -7
  86. digitalkin/services/setup/grpc_setup.py +52 -15
  87. digitalkin/services/storage/grpc_storage.py +4 -4
  88. digitalkin/services/user_profile/__init__.py +12 -0
  89. digitalkin/services/user_profile/default_user_profile.py +55 -0
  90. digitalkin/services/user_profile/grpc_user_profile.py +69 -0
  91. digitalkin/services/user_profile/user_profile_strategy.py +25 -0
  92. digitalkin/utils/__init__.py +28 -0
  93. digitalkin/utils/arg_parser.py +1 -1
  94. digitalkin/utils/development_mode_action.py +2 -2
  95. digitalkin/utils/dynamic_schema.py +483 -0
  96. digitalkin/utils/package_discover.py +1 -2
  97. digitalkin/utils/schema_splitter.py +207 -0
  98. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/METADATA +11 -30
  99. digitalkin-0.3.2.dev14.dist-info/RECORD +143 -0
  100. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/top_level.txt +1 -0
  101. modules/archetype_with_tools_module.py +244 -0
  102. modules/cpu_intensive_module.py +1 -1
  103. modules/dynamic_setup_module.py +338 -0
  104. modules/minimal_llm_module.py +1 -1
  105. modules/text_transform_module.py +1 -1
  106. monitoring/digitalkin_observability/__init__.py +46 -0
  107. monitoring/digitalkin_observability/http_server.py +150 -0
  108. monitoring/digitalkin_observability/interceptors.py +176 -0
  109. monitoring/digitalkin_observability/metrics.py +201 -0
  110. monitoring/digitalkin_observability/prometheus.py +137 -0
  111. monitoring/tests/test_metrics.py +172 -0
  112. services/filesystem_module.py +7 -5
  113. services/storage_module.py +4 -2
  114. digitalkin/grpc_servers/registry_server.py +0 -65
  115. digitalkin/grpc_servers/registry_servicer.py +0 -456
  116. digitalkin/grpc_servers/utils/factory.py +0 -180
  117. digitalkin/modules/job_manager/single_job_manager.py +0 -294
  118. digitalkin/modules/job_manager/taskiq_job_manager.py +0 -290
  119. digitalkin-0.2.25rc0.dist-info/RECORD +0 -89
  120. /digitalkin/{grpc_servers/utils → models/grpc_servers}/types.py +0 -0
  121. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/WHEEL +0 -0
  122. {digitalkin-0.2.25rc0.dist-info → digitalkin-0.3.2.dev14.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,368 @@
1
+ """Task session easing task lifecycle management."""
2
+
3
+ import asyncio
4
+ import datetime
5
+ from collections.abc import AsyncGenerator
6
+
7
+ from digitalkin.core.task_manager.surrealdb_repository import SurrealDBConnection
8
+ from digitalkin.logger import logger
9
+ from digitalkin.models.core.task_monitor import (
10
+ CancellationReason,
11
+ HeartbeatMessage,
12
+ SignalMessage,
13
+ SignalType,
14
+ TaskStatus,
15
+ )
16
+ from digitalkin.modules._base_module import BaseModule
17
+
18
+
19
+ class TaskSession:
20
+ """Task Session with lifecycle management.
21
+
22
+ The Session defined the whole lifecycle of a task as an epheneral context.
23
+ """
24
+
25
+ db: SurrealDBConnection
26
+ module: BaseModule
27
+
28
+ status: TaskStatus
29
+ signal_queue: AsyncGenerator | None
30
+
31
+ task_id: str
32
+ mission_id: str
33
+ signal_record_id: str | None
34
+ heartbeat_record_id: str | None
35
+
36
+ started_at: datetime.datetime | None
37
+ completed_at: datetime.datetime | None
38
+
39
+ is_cancelled: asyncio.Event
40
+ cancellation_reason: CancellationReason
41
+ _paused: asyncio.Event
42
+ _heartbeat_interval: datetime.timedelta
43
+ _last_heartbeat: datetime.datetime
44
+
45
+ def __init__(
46
+ self,
47
+ task_id: str,
48
+ mission_id: str,
49
+ db: SurrealDBConnection,
50
+ module: BaseModule,
51
+ heartbeat_interval: datetime.timedelta = datetime.timedelta(seconds=2),
52
+ queue_maxsize: int = 1000,
53
+ ) -> None:
54
+ """Initialize Task Session.
55
+
56
+ Args:
57
+ task_id: Unique task identifier
58
+ mission_id: Mission identifier
59
+ db: SurrealDB connection
60
+ module: Module instance
61
+ heartbeat_interval: Interval between heartbeats
62
+ queue_maxsize: Maximum size for the queue (0 = unlimited)
63
+ """
64
+ self.db = db
65
+ self.module = module
66
+
67
+ self.status = TaskStatus.PENDING
68
+ # Bounded queue to prevent unbounded memory growth (max 1000 items)
69
+ self.queue: asyncio.Queue = asyncio.Queue(maxsize=queue_maxsize)
70
+
71
+ self.task_id = task_id
72
+ self.mission_id = mission_id
73
+
74
+ self.heartbeat = None
75
+ self.started_at = None
76
+ self.completed_at = None
77
+
78
+ self.signal_record_id = None
79
+ self.heartbeat_record_id = None
80
+
81
+ self.is_cancelled = asyncio.Event()
82
+ self.cancellation_reason = CancellationReason.UNKNOWN
83
+ self._paused = asyncio.Event()
84
+ self._heartbeat_interval = heartbeat_interval
85
+
86
+ logger.info(
87
+ "TaskSession initialized",
88
+ extra={
89
+ "task_id": task_id,
90
+ "mission_id": mission_id,
91
+ "heartbeat_interval": str(heartbeat_interval),
92
+ },
93
+ )
94
+
95
+ @property
96
+ def cancelled(self) -> bool:
97
+ """Task cancellation status."""
98
+ return self.is_cancelled.is_set()
99
+
100
+ @property
101
+ def paused(self) -> bool:
102
+ """Task paused status."""
103
+ return self._paused.is_set()
104
+
105
+ @property
106
+ def setup_id(self) -> str:
107
+ """Get setup_id from module context."""
108
+ return self.module.context.session.setup_id
109
+
110
+ @property
111
+ def setup_version_id(self) -> str:
112
+ """Get setup_version_id from module context."""
113
+ return self.module.context.session.setup_version_id
114
+
115
+ @property
116
+ def session_ids(self) -> dict[str, str]:
117
+ """Get all session IDs from module context for structured logging."""
118
+ return self.module.context.session.current_ids()
119
+
120
+ async def send_heartbeat(self) -> bool:
121
+ """Rate-limited heartbeat with connection resilience.
122
+
123
+ Returns:
124
+ bool: True if heartbeat was successful, False otherwise
125
+ """
126
+ heartbeat = HeartbeatMessage(
127
+ task_id=self.task_id,
128
+ mission_id=self.mission_id,
129
+ setup_id=self.setup_id,
130
+ setup_version_id=self.setup_version_id,
131
+ timestamp=datetime.datetime.now(datetime.timezone.utc),
132
+ )
133
+
134
+ if self.heartbeat_record_id is None:
135
+ try:
136
+ success = await self.db.create("heartbeats", heartbeat.model_dump())
137
+ if "code" not in success:
138
+ self.heartbeat_record_id = success.get("id") # type: ignore
139
+ self._last_heartbeat = heartbeat.timestamp
140
+ return True
141
+ except Exception as e:
142
+ logger.error(
143
+ "Heartbeat exception",
144
+ extra={**self.session_ids, "error": str(e)},
145
+ exc_info=True,
146
+ )
147
+ logger.error("Initial heartbeat failed", extra=self.session_ids)
148
+ return False
149
+
150
+ if (heartbeat.timestamp - self._last_heartbeat) < self._heartbeat_interval:
151
+ logger.debug(
152
+ "Heartbeat skipped due to rate limiting",
153
+ extra={**self.session_ids, "delta": str(heartbeat.timestamp - self._last_heartbeat)},
154
+ )
155
+ return True
156
+
157
+ try:
158
+ success = await self.db.merge("heartbeats", self.heartbeat_record_id, heartbeat.model_dump())
159
+ if "code" not in success:
160
+ self._last_heartbeat = heartbeat.timestamp
161
+ return True
162
+ except Exception as e:
163
+ logger.error(
164
+ "Heartbeat exception",
165
+ extra={**self.session_ids, "error": str(e)},
166
+ exc_info=True,
167
+ )
168
+ logger.warning("Heartbeat failed", extra=self.session_ids)
169
+ return False
170
+
171
+ async def generate_heartbeats(self) -> None:
172
+ """Periodic heartbeat generator with cancellation support."""
173
+ logger.debug("Heartbeat generator started", extra=self.session_ids)
174
+ while not self.cancelled:
175
+ logger.debug(
176
+ "Heartbeat tick",
177
+ extra={**self.session_ids, "cancelled": self.cancelled},
178
+ )
179
+ success = await self.send_heartbeat()
180
+ if not success:
181
+ logger.error("Heartbeat failed, cancelling task", extra=self.session_ids)
182
+ await self._handle_cancel(CancellationReason.HEARTBEAT_FAILURE)
183
+ break
184
+ await asyncio.sleep(self._heartbeat_interval.total_seconds())
185
+
186
+ async def wait_if_paused(self) -> None:
187
+ """Block execution if task is paused."""
188
+ if self._paused.is_set():
189
+ logger.info("Task paused, waiting for resume", extra=self.session_ids)
190
+ await self._paused.wait()
191
+
192
+ async def listen_signals(self) -> None: # noqa: C901
193
+ """Enhanced signal listener with comprehensive handling.
194
+
195
+ Raises:
196
+ CancelledError: Asyncio when task cancelling
197
+ """
198
+ logger.info("Signal listener started", extra=self.session_ids)
199
+ if self.signal_record_id is None:
200
+ self.signal_record_id = (await self.db.select_by_task_id("tasks", self.task_id)).get("id")
201
+
202
+ live_id, live_signals = await self.db.start_live("tasks")
203
+ try:
204
+ async for signal in live_signals:
205
+ logger.debug("Signal received", extra={**self.session_ids, "signal": signal})
206
+ if self.cancelled:
207
+ break
208
+
209
+ if signal is None or signal["id"] == self.signal_record_id or "payload" not in signal:
210
+ continue
211
+
212
+ if signal["action"] == "cancel":
213
+ await self._handle_cancel(CancellationReason.SIGNAL)
214
+ elif signal["action"] == "pause":
215
+ await self._handle_pause()
216
+ elif signal["action"] == "resume":
217
+ await self._handle_resume()
218
+ elif signal["action"] == "status":
219
+ await self._handle_status_request()
220
+
221
+ except asyncio.CancelledError:
222
+ logger.debug("Signal listener cancelled", extra=self.session_ids)
223
+ raise
224
+ except Exception as e:
225
+ logger.error(
226
+ "Signal listener fatal error",
227
+ extra={**self.session_ids, "error": str(e)},
228
+ exc_info=True,
229
+ )
230
+ finally:
231
+ await self.db.stop_live(live_id)
232
+ logger.info("Signal listener stopped", extra=self.session_ids)
233
+
234
+ async def _handle_cancel(self, reason: CancellationReason = CancellationReason.UNKNOWN) -> None:
235
+ """Idempotent cancellation with acknowledgment and reason tracking.
236
+
237
+ Args:
238
+ reason: The reason for cancellation (signal, heartbeat failure, cleanup, etc.)
239
+ """
240
+ if self.is_cancelled.is_set():
241
+ logger.debug(
242
+ "Cancel ignored - already cancelled",
243
+ extra={
244
+ **self.session_ids,
245
+ "existing_reason": self.cancellation_reason.value,
246
+ "new_reason": reason.value,
247
+ },
248
+ )
249
+ return
250
+
251
+ self.cancellation_reason = reason
252
+ self.status = TaskStatus.CANCELLED
253
+ self.is_cancelled.set()
254
+
255
+ # Log with appropriate level based on reason
256
+ if reason in {CancellationReason.SUCCESS_CLEANUP, CancellationReason.FAILURE_CLEANUP}:
257
+ logger.debug(
258
+ "Task cancelled (cleanup)",
259
+ extra={**self.session_ids, "cancellation_reason": reason.value},
260
+ )
261
+ else:
262
+ logger.info(
263
+ "Task cancelled",
264
+ extra={**self.session_ids, "cancellation_reason": reason.value},
265
+ )
266
+
267
+ # Resume if paused so cancellation can proceed
268
+ if self._paused.is_set():
269
+ self._paused.set()
270
+
271
+ await self.db.update(
272
+ "tasks",
273
+ self.signal_record_id, # type: ignore
274
+ SignalMessage(
275
+ task_id=self.task_id,
276
+ mission_id=self.mission_id,
277
+ setup_id=self.setup_id,
278
+ setup_version_id=self.setup_version_id,
279
+ action=SignalType.ACK_CANCEL,
280
+ status=self.status,
281
+ ).model_dump(),
282
+ )
283
+
284
+ async def _handle_pause(self) -> None:
285
+ """Pause task execution."""
286
+ if not self._paused.is_set():
287
+ logger.info("Task paused", extra=self.session_ids)
288
+ self._paused.set()
289
+
290
+ await self.db.update(
291
+ "tasks",
292
+ self.signal_record_id, # type: ignore
293
+ SignalMessage(
294
+ task_id=self.task_id,
295
+ mission_id=self.mission_id,
296
+ setup_id=self.setup_id,
297
+ setup_version_id=self.setup_version_id,
298
+ action=SignalType.ACK_PAUSE,
299
+ status=self.status,
300
+ ).model_dump(),
301
+ )
302
+
303
+ async def _handle_resume(self) -> None:
304
+ """Resume paused task."""
305
+ if self._paused.is_set():
306
+ logger.info("Task resumed", extra=self.session_ids)
307
+ self._paused.clear()
308
+
309
+ await self.db.update(
310
+ "tasks",
311
+ self.signal_record_id, # type: ignore
312
+ SignalMessage(
313
+ task_id=self.task_id,
314
+ mission_id=self.mission_id,
315
+ setup_id=self.setup_id,
316
+ setup_version_id=self.setup_version_id,
317
+ action=SignalType.ACK_RESUME,
318
+ status=self.status,
319
+ ).model_dump(),
320
+ )
321
+
322
+ async def _handle_status_request(self) -> None:
323
+ """Send current task status."""
324
+ await self.db.update(
325
+ "tasks",
326
+ self.signal_record_id, # type: ignore
327
+ SignalMessage(
328
+ task_id=self.task_id,
329
+ mission_id=self.mission_id,
330
+ setup_id=self.setup_id,
331
+ setup_version_id=self.setup_version_id,
332
+ status=self.status,
333
+ action=SignalType.ACK_STATUS,
334
+ ).model_dump(),
335
+ )
336
+
337
+ logger.debug("Status report sent", extra=self.session_ids)
338
+
339
+ async def cleanup(self) -> None:
340
+ """Clean up task session resources.
341
+
342
+ This includes:
343
+ - Clearing queue to free memory
344
+ - Stopping module
345
+ - Closing database connection
346
+ - Clearing module reference
347
+ """
348
+ # Clear queue to free memory
349
+ try:
350
+ while not self.queue.empty():
351
+ self.queue.get_nowait()
352
+ except asyncio.QueueEmpty:
353
+ pass
354
+
355
+ # Stop module
356
+ try:
357
+ await self.module.stop()
358
+ except Exception:
359
+ logger.exception(
360
+ "Error stopping module during cleanup",
361
+ extra={"mission_id": self.mission_id, "task_id": self.task_id},
362
+ )
363
+
364
+ # Close DB connection (kills all live queries)
365
+ await self.db.close()
366
+
367
+ # Clear module reference to allow garbage collection
368
+ self.module = None # type: ignore
@@ -1,19 +1 @@
1
- """This package contains the gRPC server and client implementations.
2
-
3
- ```shell
4
- digitalkin/grpc/
5
- ├── __init__.py
6
- ├── base_server.py # Base server implementation with common functionality
7
- ├── module_server.py # Module-specific server implementation
8
- ├── registry_server.py # Registry-specific server implementation
9
- ├── module_servicer.py # gRPC servicer for Module service
10
- ├── registry_servicer.py # gRPC servicer for Registry service
11
- ├── client/ # Client libraries for connecting to servers
12
- │ ├── __init__.py
13
- │ ├── module_client.py
14
- │ └── registry_client.py
15
- └── utils/ # Utility functions
16
- ├── __init__.py
17
- └── server_utils.py # Common server utilities
18
- ```
19
- """
1
+ """This package contains the gRPC server and client implementations."""
@@ -17,9 +17,9 @@ from digitalkin.grpc_servers.utils.exceptions import (
17
17
  ServerStateError,
18
18
  ServicerError,
19
19
  )
20
- from digitalkin.grpc_servers.utils.models import SecurityMode, ServerConfig, ServerMode
21
- from digitalkin.grpc_servers.utils.types import GrpcServer, ServiceDescriptor, T
22
20
  from digitalkin.logger import logger
21
+ from digitalkin.models.grpc_servers.models import SecurityMode, ServerConfig, ServerMode
22
+ from digitalkin.models.grpc_servers.types import GrpcServer, ServiceDescriptor, T
23
23
 
24
24
 
25
25
  class BaseServer(abc.ABC):
@@ -299,7 +299,7 @@ class BaseServer(abc.ABC):
299
299
  self._add_reflection()
300
300
 
301
301
  # Start the server
302
- logger.debug("Starting gRPC server on %s", self.config.address)
302
+ logger.debug("Starting gRPC server on %s", self.config.address, extra={"config": self.config})
303
303
  try:
304
304
  if self.config.mode == ServerMode.ASYNC:
305
305
  # For async server, use the event loop