MemoryOS 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (62) hide show
  1. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/METADATA +6 -1
  2. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/RECORD +61 -55
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +6 -8
  5. memos/api/context/context.py +1 -1
  6. memos/api/context/dependencies.py +11 -0
  7. memos/configs/internet_retriever.py +13 -0
  8. memos/configs/mem_scheduler.py +38 -16
  9. memos/graph_dbs/base.py +30 -3
  10. memos/graph_dbs/nebular.py +442 -194
  11. memos/graph_dbs/neo4j.py +14 -5
  12. memos/log.py +5 -0
  13. memos/mem_os/core.py +19 -9
  14. memos/mem_os/main.py +1 -1
  15. memos/mem_os/product.py +6 -69
  16. memos/mem_os/utils/default_config.py +1 -1
  17. memos/mem_os/utils/format_utils.py +11 -47
  18. memos/mem_os/utils/reference_utils.py +133 -0
  19. memos/mem_scheduler/base_scheduler.py +58 -55
  20. memos/mem_scheduler/{modules → general_modules}/base.py +1 -2
  21. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +54 -15
  22. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +4 -4
  23. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  24. memos/mem_scheduler/{modules → general_modules}/retriever.py +19 -5
  25. memos/mem_scheduler/{modules → general_modules}/scheduler_logger.py +10 -4
  26. memos/mem_scheduler/general_scheduler.py +110 -67
  27. memos/mem_scheduler/monitors/__init__.py +0 -0
  28. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  29. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +57 -19
  30. memos/mem_scheduler/mos_for_test_scheduler.py +7 -1
  31. memos/mem_scheduler/schemas/general_schemas.py +3 -2
  32. memos/mem_scheduler/schemas/message_schemas.py +2 -1
  33. memos/mem_scheduler/schemas/monitor_schemas.py +10 -2
  34. memos/mem_scheduler/utils/misc_utils.py +43 -2
  35. memos/memories/activation/item.py +1 -1
  36. memos/memories/activation/kv.py +20 -8
  37. memos/memories/textual/base.py +1 -1
  38. memos/memories/textual/general.py +1 -1
  39. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +30 -48
  40. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  41. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +2 -0
  42. memos/memories/textual/tree_text_memory/organize/reorganizer.py +102 -140
  43. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  44. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +9 -0
  45. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  46. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  47. memos/memories/textual/tree_text_memory/retrieve/searcher.py +177 -125
  48. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +7 -2
  49. memos/memories/textual/tree_text_memory/retrieve/utils.py +1 -1
  50. memos/memos_tools/lockfree_dict.py +120 -0
  51. memos/memos_tools/thread_safe_dict.py +288 -0
  52. memos/templates/mem_reader_prompts.py +2 -0
  53. memos/templates/mem_scheduler_prompts.py +23 -10
  54. memos/templates/mos_prompts.py +40 -11
  55. memos/templates/tree_reorganize_prompts.py +24 -17
  56. memos/utils.py +19 -0
  57. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  58. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  59. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  60. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  61. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
  62. /memos/mem_scheduler/{modules → general_modules}/misc.py +0 -0
@@ -9,11 +9,13 @@ from memos.mem_scheduler.schemas.general_schemas import (
9
9
  ANSWER_LABEL,
10
10
  DEFAULT_MAX_QUERY_KEY_WORDS,
11
11
  QUERY_LABEL,
12
+ WORKING_MEMORY_TYPE,
12
13
  MemCubeID,
13
14
  UserID,
14
15
  )
15
16
  from memos.mem_scheduler.schemas.message_schemas import ScheduleMessageItem
16
17
  from memos.mem_scheduler.schemas.monitor_schemas import QueryMonitorItem
18
+ from memos.mem_scheduler.utils.filter_utils import is_all_chinese, is_all_english
17
19
  from memos.memories.textual.tree import TextualMemoryItem, TreeTextMemory
18
20
 
19
21
 
@@ -35,11 +37,12 @@ class GeneralScheduler(BaseScheduler):
35
37
 
36
38
  # for evaluation
37
39
  def search_for_eval(
38
- self,
39
- query: str,
40
- user_id: UserID | str,
41
- top_k: int,
42
- ) -> list[str]:
40
+ self, query: str, user_id: UserID | str, top_k: int, scheduler_flag: bool = True
41
+ ) -> (list[str], bool):
42
+ self.monitor.register_query_monitor_if_not_exists(
43
+ user_id=user_id, mem_cube_id=self.current_mem_cube_id
44
+ )
45
+
43
46
  query_keywords = self.monitor.extract_query_keywords(query=query)
44
47
  logger.info(f'Extract keywords "{query_keywords}" from query "{query}"')
45
48
 
@@ -48,35 +51,61 @@ class GeneralScheduler(BaseScheduler):
48
51
  keywords=query_keywords,
49
52
  max_keywords=DEFAULT_MAX_QUERY_KEY_WORDS,
50
53
  )
51
- self.monitor.query_monitors.put(item=item)
52
- logger.debug(
53
- f"Queries in monitor are {self.monitor.query_monitors.get_queries_with_timesort()}."
54
- )
54
+ query_monitor = self.monitor.query_monitors[user_id][self.current_mem_cube_id]
55
+ query_monitor.put(item=item)
56
+ logger.debug(f"Queries in monitor are {query_monitor.get_queries_with_timesort()}.")
55
57
 
56
58
  queries = [query]
57
59
 
58
60
  # recall
59
- cur_working_memory, new_candidates = self.process_session_turn(
60
- queries=queries,
61
- user_id=user_id,
62
- mem_cube_id=self.current_mem_cube_id,
63
- mem_cube=self.current_mem_cube,
64
- top_k=self.top_k,
65
- )
66
- logger.info(f"Processed {queries} and get {len(new_candidates)} new candidate memories.")
67
-
68
- # rerank
69
- new_order_working_memory = self.replace_working_memory(
70
- user_id=user_id,
71
- mem_cube_id=self.current_mem_cube_id,
72
- mem_cube=self.current_mem_cube,
73
- original_memory=cur_working_memory,
74
- new_memory=new_candidates,
61
+ mem_cube = self.current_mem_cube
62
+ text_mem_base = mem_cube.text_mem
63
+
64
+ cur_working_memory: list[TextualMemoryItem] = text_mem_base.get_working_memory()
65
+ text_working_memory: list[str] = [w_m.memory for w_m in cur_working_memory]
66
+ intent_result = self.monitor.detect_intent(
67
+ q_list=queries, text_working_memory=text_working_memory
75
68
  )
76
- new_order_working_memory = new_order_working_memory[:top_k]
77
- logger.info(f"size of new_order_working_memory: {len(new_order_working_memory)}")
78
69
 
79
- return [m.memory for m in new_order_working_memory]
70
+ if not scheduler_flag:
71
+ return text_working_memory, intent_result["trigger_retrieval"]
72
+ else:
73
+ if intent_result["trigger_retrieval"]:
74
+ missing_evidences = intent_result["missing_evidences"]
75
+ num_evidence = len(missing_evidences)
76
+ k_per_evidence = max(1, top_k // max(1, num_evidence))
77
+ new_candidates = []
78
+ for item in missing_evidences:
79
+ logger.info(f"missing_evidences: {item}")
80
+ results: list[TextualMemoryItem] = self.retriever.search(
81
+ query=item,
82
+ mem_cube=mem_cube,
83
+ top_k=k_per_evidence,
84
+ method=self.search_method,
85
+ )
86
+ logger.info(
87
+ f"search results for {missing_evidences}: {[one.memory for one in results]}"
88
+ )
89
+ new_candidates.extend(results)
90
+ print(
91
+ f"missing_evidences: {missing_evidences} and get {len(new_candidates)} new candidate memories."
92
+ )
93
+ else:
94
+ new_candidates = []
95
+ print(f"intent_result: {intent_result}. not triggered")
96
+
97
+ # rerank
98
+ new_order_working_memory = self.replace_working_memory(
99
+ user_id=user_id,
100
+ mem_cube_id=self.current_mem_cube_id,
101
+ mem_cube=self.current_mem_cube,
102
+ original_memory=cur_working_memory,
103
+ new_memory=new_candidates,
104
+ )
105
+ new_order_working_memory = new_order_working_memory[:top_k]
106
+ logger.info(f"size of new_order_working_memory: {len(new_order_working_memory)}")
107
+
108
+ return [m.memory for m in new_order_working_memory], intent_result["trigger_retrieval"]
80
109
 
81
110
  def _query_message_consumer(self, messages: list[ScheduleMessageItem]) -> None:
82
111
  """
@@ -105,18 +134,42 @@ class GeneralScheduler(BaseScheduler):
105
134
 
106
135
  # update query monitors
107
136
  for msg in messages:
137
+ self.monitor.register_query_monitor_if_not_exists(
138
+ user_id=user_id, mem_cube_id=mem_cube_id
139
+ )
140
+
108
141
  query = msg.content
109
142
  query_keywords = self.monitor.extract_query_keywords(query=query)
110
143
  logger.info(f'Extract keywords "{query_keywords}" from query "{query}"')
111
144
 
145
+ if len(query_keywords) == 0:
146
+ stripped_query = query.strip()
147
+ # Determine measurement method based on language
148
+ if is_all_english(stripped_query):
149
+ words = stripped_query.split() # Word count for English
150
+ elif is_all_chinese(stripped_query):
151
+ words = stripped_query # Character count for Chinese
152
+ else:
153
+ logger.debug(
154
+ f"Mixed-language memory, using character count: {stripped_query[:50]}..."
155
+ )
156
+ words = stripped_query # Default to character count
157
+
158
+ query_keywords = list(set(words[:20]))
159
+ logger.error(
160
+ f"Keyword extraction failed for query. Using fallback keywords: {query_keywords[:10]}... (truncated)"
161
+ )
162
+
112
163
  item = QueryMonitorItem(
113
164
  query_text=query,
114
165
  keywords=query_keywords,
115
166
  max_keywords=DEFAULT_MAX_QUERY_KEY_WORDS,
116
167
  )
117
- self.monitor.query_monitors.put(item=item)
168
+
169
+ self.monitor.query_monitors[user_id][mem_cube_id].put(item=item)
118
170
  logger.debug(
119
- f"Queries in monitor are {self.monitor.query_monitors.get_queries_with_timesort()}."
171
+ f"Queries in monitor are "
172
+ f"{self.monitor.query_monitors[user_id][mem_cube_id].get_queries_with_timesort()}."
120
173
  )
121
174
 
122
175
  queries = [msg.content for msg in messages]
@@ -143,6 +196,20 @@ class GeneralScheduler(BaseScheduler):
143
196
  )
144
197
  logger.info(f"size of new_order_working_memory: {len(new_order_working_memory)}")
145
198
 
199
+ # update activation memories
200
+ logger.info(
201
+ f"Activation memory update {'enabled' if self.enable_activation_memory else 'disabled'} "
202
+ f"(interval: {self.monitor.act_mem_update_interval}s)"
203
+ )
204
+ if self.enable_activation_memory:
205
+ self.update_activation_memory_periodically(
206
+ interval_seconds=self.monitor.act_mem_update_interval,
207
+ label=QUERY_LABEL,
208
+ user_id=user_id,
209
+ mem_cube_id=mem_cube_id,
210
+ mem_cube=messages[0].mem_cube,
211
+ )
212
+
146
213
  def _answer_message_consumer(self, messages: list[ScheduleMessageItem]) -> None:
147
214
  """
148
215
  Process and handle answer trigger messages from the queue.
@@ -165,26 +232,6 @@ class GeneralScheduler(BaseScheduler):
165
232
  # for status update
166
233
  self._set_current_context_from_message(msg=messages[0])
167
234
 
168
- # update activation memories
169
- if self.enable_act_memory_update:
170
- if (
171
- len(self.monitor.working_memory_monitors[user_id][mem_cube_id].memories)
172
- == 0
173
- ):
174
- self.initialize_working_memory_monitors(
175
- user_id=user_id,
176
- mem_cube_id=mem_cube_id,
177
- mem_cube=messages[0].mem_cube,
178
- )
179
-
180
- self.update_activation_memory_periodically(
181
- interval_seconds=self.monitor.act_mem_update_interval,
182
- label=ANSWER_LABEL,
183
- user_id=user_id,
184
- mem_cube_id=mem_cube_id,
185
- mem_cube=messages[0].mem_cube,
186
- )
187
-
188
235
  def _add_message_consumer(self, messages: list[ScheduleMessageItem]) -> None:
189
236
  logger.info(f"Messages {messages} assigned to {ADD_LABEL} handler.")
190
237
  # Process the query in a session turn
@@ -215,6 +262,9 @@ class GeneralScheduler(BaseScheduler):
215
262
  mem_type = mem_item.metadata.memory_type
216
263
  mem_content = mem_item.memory
217
264
 
265
+ if mem_type == WORKING_MEMORY_TYPE:
266
+ continue
267
+
218
268
  self.log_adding_memory(
219
269
  memory=mem_content,
220
270
  memory_type=mem_type,
@@ -224,15 +274,6 @@ class GeneralScheduler(BaseScheduler):
224
274
  log_func_callback=self._submit_web_logs,
225
275
  )
226
276
 
227
- # update activation memories
228
- if self.enable_act_memory_update:
229
- self.update_activation_memory_periodically(
230
- interval_seconds=self.monitor.act_mem_update_interval,
231
- label=ADD_LABEL,
232
- user_id=user_id,
233
- mem_cube_id=mem_cube_id,
234
- mem_cube=messages[0].mem_cube,
235
- )
236
277
  except Exception as e:
237
278
  logger.error(f"Error: {e}", exc_info=True)
238
279
 
@@ -289,18 +330,20 @@ class GeneralScheduler(BaseScheduler):
289
330
  new_candidates = []
290
331
  for item in missing_evidences:
291
332
  logger.info(f"missing_evidences: {item}")
333
+ info = {
334
+ "user_id": user_id,
335
+ "session_id": "",
336
+ }
337
+
292
338
  results: list[TextualMemoryItem] = self.retriever.search(
293
- query=item, mem_cube=mem_cube, top_k=k_per_evidence, method=self.search_method
339
+ query=item,
340
+ mem_cube=mem_cube,
341
+ top_k=k_per_evidence,
342
+ method=self.search_method,
343
+ info=info,
294
344
  )
295
345
  logger.info(
296
346
  f"search results for {missing_evidences}: {[one.memory for one in results]}"
297
347
  )
298
348
  new_candidates.extend(results)
299
-
300
- if len(new_candidates) == 0:
301
- logger.warning(
302
- f"As new_candidates is empty, new_candidates is set same to working_memory.\n"
303
- f"time_trigger_flag: {time_trigger_flag}; intent_result: {intent_result}"
304
- )
305
- new_candidates = cur_working_memory
306
349
  return cur_working_memory, new_candidates
File without changes
@@ -0,0 +1,305 @@
1
+ import threading
2
+ import time
3
+
4
+ from concurrent.futures import ThreadPoolExecutor
5
+ from datetime import datetime
6
+ from time import perf_counter
7
+
8
+ from memos.configs.mem_scheduler import BaseSchedulerConfig
9
+ from memos.log import get_logger
10
+ from memos.mem_scheduler.general_modules.base import BaseSchedulerModule
11
+ from memos.mem_scheduler.general_modules.dispatcher import SchedulerDispatcher
12
+
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ class SchedulerDispatcherMonitor(BaseSchedulerModule):
18
+ """Monitors and manages scheduling operations with LLM integration."""
19
+
20
+ def __init__(self, config: BaseSchedulerConfig):
21
+ super().__init__()
22
+ self.config: BaseSchedulerConfig = config
23
+
24
+ self.check_interval = self.config.get("dispatcher_monitor_check_interval", 60)
25
+ self.max_failures = self.config.get("dispatcher_monitor_max_failures", 2)
26
+
27
+ # Registry of monitored thread pools
28
+ self._pools: dict[str, dict] = {}
29
+ self._pool_lock = threading.Lock()
30
+
31
+ # thread pool monitor
32
+ self._monitor_thread: threading.Thread | None = None
33
+ self._running = False
34
+ self._restart_in_progress = False
35
+
36
+ # modules with thread pool
37
+ self.dispatcher: SchedulerDispatcher | None = None
38
+ self.dispatcher_pool_name = "dispatcher"
39
+
40
+ def initialize(self, dispatcher: SchedulerDispatcher):
41
+ self.dispatcher = dispatcher
42
+ self.register_pool(
43
+ name=self.dispatcher_pool_name,
44
+ executor=self.dispatcher.dispatcher_executor,
45
+ max_workers=self.dispatcher.max_workers,
46
+ restart_on_failure=True,
47
+ )
48
+
49
+ def register_pool(
50
+ self,
51
+ name: str,
52
+ executor: ThreadPoolExecutor,
53
+ max_workers: int,
54
+ restart_on_failure: bool = True,
55
+ ) -> bool:
56
+ """
57
+ Register a thread pool for monitoring.
58
+
59
+ Args:
60
+ name: Unique identifier for the pool
61
+ executor: ThreadPoolExecutor instance to monitor
62
+ max_workers: Expected maximum worker count
63
+ restart_on_failure: Whether to restart if pool fails
64
+
65
+ Returns:
66
+ bool: True if registration succeeded, False if pool already registered
67
+ """
68
+ with self._pool_lock:
69
+ if name in self._pools:
70
+ logger.warning(f"Thread pool '{name}' is already registered")
71
+ return False
72
+
73
+ self._pools[name] = {
74
+ "executor": executor,
75
+ "max_workers": max_workers,
76
+ "restart": restart_on_failure,
77
+ "failure_count": 0,
78
+ "last_active": datetime.utcnow(),
79
+ "healthy": True,
80
+ }
81
+ logger.info(f"Registered thread pool '{name}' for monitoring")
82
+ return True
83
+
84
+ def unregister_pool(self, name: str) -> bool:
85
+ """
86
+ Remove a thread pool from monitoring.
87
+
88
+ Args:
89
+ name: Identifier of the pool to remove
90
+
91
+ Returns:
92
+ bool: True if removal succeeded, False if pool not found
93
+ """
94
+ with self._pool_lock:
95
+ if name not in self._pools:
96
+ logger.warning(f"Thread pool '{name}' not found in registry")
97
+ return False
98
+
99
+ del self._pools[name]
100
+ logger.info(f"Unregistered thread pool '{name}'")
101
+ return True
102
+
103
+ def _monitor_loop(self) -> None:
104
+ """Main monitoring loop that periodically checks all registered pools."""
105
+ logger.info(f"Starting monitor loop with {self.check_interval} second interval")
106
+
107
+ while self._running:
108
+ time.sleep(self.check_interval)
109
+ try:
110
+ self._check_pools_health()
111
+ except Exception as e:
112
+ logger.error(f"Error during health check: {e!s}", exc_info=True)
113
+
114
+ logger.debug("Monitor loop exiting")
115
+
116
+ def start(self) -> bool:
117
+ """
118
+ Start the monitoring thread.
119
+
120
+ Returns:
121
+ bool: True if monitor started successfully, False if already running
122
+ """
123
+ if self._running:
124
+ logger.warning("Dispatcher Monitor is already running")
125
+ return False
126
+
127
+ self._running = True
128
+ self._monitor_thread = threading.Thread(
129
+ target=self._monitor_loop, name="threadpool_monitor", daemon=True
130
+ )
131
+ self._monitor_thread.start()
132
+ logger.info("Dispatcher Monitor monitor started")
133
+ return True
134
+
135
+ def stop(self) -> None:
136
+ """
137
+ Stop the monitoring thread and clean up all managed thread pools.
138
+ Ensures proper shutdown of all monitored executors.
139
+ """
140
+ if not self._running:
141
+ return
142
+
143
+ # Stop the monitoring loop
144
+ self._running = False
145
+ if self._monitor_thread and self._monitor_thread.is_alive():
146
+ self._monitor_thread.join(timeout=5)
147
+
148
+ # Shutdown all registered pools
149
+ with self._pool_lock:
150
+ for name, pool_info in self._pools.items():
151
+ executor = pool_info["executor"]
152
+ if not executor._shutdown: # pylint: disable=protected-access
153
+ try:
154
+ logger.info(f"Shutting down thread pool '{name}'")
155
+ executor.shutdown(wait=True, cancel_futures=True)
156
+ logger.info(f"Successfully shut down thread pool '{name}'")
157
+ except Exception as e:
158
+ logger.error(f"Error shutting down pool '{name}': {e!s}", exc_info=True)
159
+
160
+ # Clear the pool registry
161
+ self._pools.clear()
162
+ logger.info("Thread pool monitor and all pools stopped")
163
+
164
+ def _check_pools_health(self) -> None:
165
+ """Check health of all registered thread pools."""
166
+ for name, pool_info in list(self._pools.items()):
167
+ is_healthy, reason = self._check_pool_health(
168
+ pool_info=pool_info,
169
+ stuck_max_interval=4,
170
+ )
171
+ logger.info(f"Pool '{name}'. is_healthy: {is_healthy}. pool_info: {pool_info}")
172
+ with self._pool_lock:
173
+ if is_healthy:
174
+ pool_info["failure_count"] = 0
175
+ pool_info["healthy"] = True
176
+ return
177
+ else:
178
+ pool_info["failure_count"] += 1
179
+ pool_info["healthy"] = False
180
+ logger.warning(
181
+ f"Pool '{name}' unhealthy ({pool_info['failure_count']}/{self.max_failures}): {reason}"
182
+ )
183
+
184
+ if (
185
+ pool_info["failure_count"] >= self.max_failures
186
+ and pool_info["restart"]
187
+ and not self._restart_in_progress
188
+ ):
189
+ self._restart_pool(name, pool_info)
190
+
191
+ def _check_pool_health(self, pool_info: dict, stuck_max_interval=4) -> tuple[bool, str]:
192
+ """
193
+ Check health of a single thread pool.
194
+
195
+ Args:
196
+ pool_info: Dictionary containing pool configuration
197
+
198
+ Returns:
199
+ Tuple: (is_healthy, reason) where reason explains failure if not healthy
200
+ """
201
+ executor = pool_info["executor"]
202
+
203
+ # Check if executor is shutdown
204
+ if executor._shutdown: # pylint: disable=protected-access
205
+ return False, "Executor is shutdown"
206
+
207
+ # Check thread activity
208
+ active_threads = sum(
209
+ 1
210
+ for t in threading.enumerate()
211
+ if t.name.startswith(executor._thread_name_prefix) # pylint: disable=protected-access
212
+ )
213
+
214
+ # Check if no threads are active but should be
215
+ if active_threads == 0 and pool_info["max_workers"] > 0:
216
+ return False, "No active worker threads"
217
+
218
+ # Check if threads are stuck (no activity for 2 intervals)
219
+ time_delta = (datetime.utcnow() - pool_info["last_active"]).total_seconds()
220
+ if time_delta >= self.check_interval * stuck_max_interval:
221
+ return False, "No recent activity"
222
+
223
+ # If we got here, pool appears healthy
224
+ pool_info["last_active"] = datetime.utcnow()
225
+ return True, ""
226
+
227
+ def _restart_pool(self, name: str, pool_info: dict) -> None:
228
+ """
229
+ Attempt to restart a failed thread pool.
230
+
231
+ Args:
232
+ name: Name of the pool to restart
233
+ pool_info: Dictionary containing pool configuration
234
+ """
235
+ if self._restart_in_progress:
236
+ return
237
+
238
+ self._restart_in_progress = True
239
+ logger.warning(f"Attempting to restart thread pool '{name}'")
240
+
241
+ try:
242
+ old_executor = pool_info["executor"]
243
+ self.dispatcher.shutdown()
244
+
245
+ # Create new executor with same parameters
246
+ new_executor = ThreadPoolExecutor(
247
+ max_workers=pool_info["max_workers"],
248
+ thread_name_prefix=self.dispatcher.thread_name_prefix, # pylint: disable=protected-access
249
+ )
250
+ self.unregister_pool(name=self.dispatcher_pool_name)
251
+ self.dispatcher.dispatcher_executor = new_executor
252
+ self.register_pool(
253
+ name=self.dispatcher_pool_name,
254
+ executor=self.dispatcher.dispatcher_executor,
255
+ max_workers=self.dispatcher.max_workers,
256
+ restart_on_failure=True,
257
+ )
258
+
259
+ # Replace in registry
260
+ start_time = perf_counter()
261
+ with self._pool_lock:
262
+ pool_info["executor"] = new_executor
263
+ pool_info["failure_count"] = 0
264
+ pool_info["healthy"] = True
265
+ pool_info["last_active"] = datetime.utcnow()
266
+
267
+ elapsed_time = perf_counter() - start_time
268
+ if elapsed_time > 1:
269
+ logger.warning(f"Long lock wait: {elapsed_time:.3f}s")
270
+
271
+ # Shutdown old executor
272
+ try:
273
+ old_executor.shutdown(wait=False)
274
+ except Exception as e:
275
+ logger.error(f"Error shutting down old executor: {e!s}", exc_info=True)
276
+
277
+ logger.info(f"Successfully restarted thread pool '{name}'")
278
+ except Exception as e:
279
+ logger.error(f"Failed to restart pool '{name}': {e!s}", exc_info=True)
280
+ finally:
281
+ self._restart_in_progress = False
282
+
283
+ def get_status(self, name: str | None = None) -> dict:
284
+ """
285
+ Get status of monitored pools.
286
+
287
+ Args:
288
+ name: Optional specific pool name to check
289
+
290
+ Returns:
291
+ Dictionary of status information
292
+ """
293
+ with self._pool_lock:
294
+ if name:
295
+ return {name: self._pools.get(name, {}).copy()}
296
+ return {k: v.copy() for k, v in self._pools.items()}
297
+
298
+ def __enter__(self):
299
+ """Context manager entry point."""
300
+ self.start()
301
+ return self
302
+
303
+ def __exit__(self, exc_type, exc_val, exc_tb):
304
+ """Context manager exit point."""
305
+ self.stop()