MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,366 @@
1
+ import threading
2
+ import time
3
+
4
+ from time import perf_counter
5
+
6
+ from memos.configs.mem_scheduler import BaseSchedulerConfig
7
+ from memos.context.context import ContextThread, ContextThreadPoolExecutor
8
+ from memos.log import get_logger
9
+ from memos.mem_scheduler.general_modules.base import BaseSchedulerModule
10
+ from memos.mem_scheduler.schemas.general_schemas import (
11
+ DEFAULT_DISPATCHER_MONITOR_CHECK_INTERVAL,
12
+ DEFAULT_DISPATCHER_MONITOR_MAX_FAILURES,
13
+ DEFAULT_STOP_WAIT,
14
+ DEFAULT_STUCK_THREAD_TOLERANCE,
15
+ )
16
+ from memos.mem_scheduler.task_schedule_modules.dispatcher import SchedulerDispatcher
17
+ from memos.mem_scheduler.utils.db_utils import get_utc_now
18
+
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ class SchedulerDispatcherMonitor(BaseSchedulerModule):
24
+ """Monitors and manages scheduling operations with LLM integration."""
25
+
26
+ def __init__(self, config: BaseSchedulerConfig):
27
+ super().__init__()
28
+ self.config: BaseSchedulerConfig = config
29
+
30
+ self.check_interval = self.config.get(
31
+ "dispatcher_monitor_check_interval", DEFAULT_DISPATCHER_MONITOR_CHECK_INTERVAL
32
+ )
33
+ self.max_failures = self.config.get(
34
+ "dispatcher_monitor_max_failures", DEFAULT_DISPATCHER_MONITOR_MAX_FAILURES
35
+ )
36
+
37
+ # Registry of monitored thread pools
38
+ self._pools: dict[str, dict] = {}
39
+ self._pool_lock = threading.Lock()
40
+
41
+ # thread pool monitor
42
+ self._monitor_thread: threading.Thread | None = None
43
+ self._running = False
44
+ self._restart_in_progress = False
45
+
46
+ # modules with thread pool
47
+ self.dispatcher: SchedulerDispatcher | None = None
48
+ self.dispatcher_pool_name = "dispatcher"
49
+
50
+ # Configure shutdown wait behavior from config or default
51
+ self.stop_wait = (
52
+ self.config.get("stop_wait", DEFAULT_STOP_WAIT) if self.config else DEFAULT_STOP_WAIT
53
+ )
54
+
55
+ def initialize(self, dispatcher: SchedulerDispatcher):
56
+ self.dispatcher = dispatcher
57
+ self.register_pool(
58
+ name=self.dispatcher_pool_name,
59
+ executor=self.dispatcher.dispatcher_executor,
60
+ max_workers=self.dispatcher.max_workers,
61
+ restart_on_failure=True,
62
+ )
63
+
64
+ def register_pool(
65
+ self,
66
+ name: str,
67
+ executor: ContextThreadPoolExecutor,
68
+ max_workers: int,
69
+ restart_on_failure: bool = True,
70
+ ) -> bool:
71
+ """
72
+ Register a thread pool for monitoring.
73
+
74
+ Args:
75
+ name: Unique identifier for the pool
76
+ executor: ThreadPoolExecutor instance to monitor
77
+ max_workers: Expected maximum worker count
78
+ restart_on_failure: Whether to restart if pool fails
79
+
80
+ Returns:
81
+ bool: True if registration succeeded, False if pool already registered
82
+ """
83
+ with self._pool_lock:
84
+ if name in self._pools:
85
+ logger.warning(f"Thread pool '{name}' is already registered")
86
+ return False
87
+
88
+ self._pools[name] = {
89
+ "executor": executor,
90
+ "max_workers": max_workers,
91
+ "restart": restart_on_failure,
92
+ "failure_count": 0,
93
+ "last_active": get_utc_now(),
94
+ "healthy": True,
95
+ }
96
+ logger.info(f"Registered thread pool '{name}' for monitoring")
97
+ return True
98
+
99
+ def unregister_pool(self, name: str) -> bool:
100
+ """
101
+ Remove a thread pool from monitoring.
102
+
103
+ Args:
104
+ name: Identifier of the pool to remove
105
+
106
+ Returns:
107
+ bool: True if removal succeeded, False if pool not found
108
+ """
109
+ with self._pool_lock:
110
+ if name not in self._pools:
111
+ logger.warning(f"Thread pool '{name}' not found in registry")
112
+ return False
113
+
114
+ del self._pools[name]
115
+ logger.info(f"Unregistered thread pool '{name}'")
116
+ return True
117
+
118
+ def _monitor_loop(self) -> None:
119
+ """Main monitoring loop that periodically checks all registered pools."""
120
+ logger.info(f"Starting monitor loop with {self.check_interval} second interval")
121
+
122
+ while self._running:
123
+ time.sleep(self.check_interval)
124
+ try:
125
+ self._check_pools_health()
126
+ except Exception as e:
127
+ logger.error(f"Error during health check: {e!s}", exc_info=True)
128
+
129
+ logger.debug("Monitor loop exiting")
130
+
131
+ def _check_pools_health(self) -> None:
132
+ """Check health of all registered thread pools."""
133
+ for name, pool_info in list(self._pools.items()):
134
+ is_healthy, reason = self._check_pool_health(
135
+ pool_info=pool_info,
136
+ stuck_max_interval=4,
137
+ )
138
+ if not is_healthy:
139
+ logger.info(f"Pool '{name}'. is_healthy: {is_healthy}. pool_info: {pool_info}")
140
+
141
+ with self._pool_lock:
142
+ if is_healthy:
143
+ pool_info["failure_count"] = 0
144
+ pool_info["healthy"] = True
145
+ else:
146
+ pool_info["failure_count"] += 1
147
+ pool_info["healthy"] = False
148
+ logger.info(
149
+ f"Pool '{name}' unhealthy ({pool_info['failure_count']}/{self.max_failures}): {reason}."
150
+ f" Note: This status does not necessarily indicate a problem with the pool itself - "
151
+ f"it may also be considered unhealthy if no tasks have been scheduled for an extended period"
152
+ )
153
+ if (
154
+ pool_info["failure_count"] >= self.max_failures
155
+ and pool_info["restart"]
156
+ and not self._restart_in_progress
157
+ ):
158
+ self._restart_pool(name, pool_info)
159
+
160
+ def _check_pool_health(
161
+ self, pool_info: dict, stuck_max_interval=4, stuck_thread_tolerance=None
162
+ ) -> tuple[bool, str]:
163
+ """
164
+ Check health of a single thread pool with enhanced task tracking.
165
+
166
+ Args:
167
+ pool_info: Dictionary containing pool configuration
168
+ stuck_max_interval: Maximum intervals before considering pool stuck
169
+ stuck_thread_tolerance: Maximum number of stuck threads to tolerate before restarting pool
170
+
171
+ Returns:
172
+ Tuple: (is_healthy, reason) where reason explains failure if not healthy
173
+ """
174
+ if stuck_thread_tolerance is None:
175
+ stuck_thread_tolerance = DEFAULT_STUCK_THREAD_TOLERANCE
176
+
177
+ executor = pool_info["executor"]
178
+
179
+ # Check if executor is shutdown
180
+ if executor._shutdown: # pylint: disable=protected-access
181
+ return False, "Executor is shutdown"
182
+
183
+ # Enhanced health check using dispatcher task tracking
184
+ stuck_tasks = []
185
+ if self.dispatcher:
186
+ running_tasks = self.dispatcher.get_running_tasks()
187
+ running_count = self.dispatcher.get_running_task_count()
188
+
189
+ # Log detailed task information
190
+ if running_tasks:
191
+ logger.debug(f"Currently running {running_count} tasks:")
192
+ for _task_id, task in running_tasks.items():
193
+ logger.debug(f" - {task.get_execution_info()}")
194
+ else:
195
+ logger.debug("No tasks currently running")
196
+
197
+ # Check for stuck tasks (running longer than expected)
198
+ for task in running_tasks.values():
199
+ if task.duration_seconds and task.duration_seconds > (
200
+ self.check_interval * stuck_max_interval
201
+ ):
202
+ stuck_tasks.append(task)
203
+
204
+ # Always log stuck tasks if any exist
205
+ if stuck_tasks:
206
+ logger.warning(f"Found {len(stuck_tasks)} potentially stuck tasks:")
207
+ for task in stuck_tasks:
208
+ task_info = task.get_execution_info()
209
+ messages_info = ""
210
+ if task.messages:
211
+ messages_info = f", Messages: {len(task.messages)} items - {[str(msg) for msg in task.messages[:3]]}"
212
+ if len(task.messages) > 3:
213
+ messages_info += f" ... and {len(task.messages) - 3} more"
214
+ logger.warning(f" - Stuck task: {task_info}{messages_info}")
215
+
216
+ # Check if stuck task count exceeds tolerance
217
+ # If thread pool size is smaller, use the smaller value as threshold
218
+ max_workers = pool_info.get("max_workers", 0)
219
+ effective_tolerance = (
220
+ min(stuck_thread_tolerance, max_workers)
221
+ if max_workers > 0
222
+ else stuck_thread_tolerance
223
+ )
224
+
225
+ if len(stuck_tasks) >= effective_tolerance:
226
+ return (
227
+ False,
228
+ f"Found {len(stuck_tasks)} stuck tasks (tolerance: {effective_tolerance})",
229
+ )
230
+
231
+ # Only check for stuck threads, not inactive threads
232
+ # Check if threads are stuck (no activity for specified intervals)
233
+ time_delta = (get_utc_now() - pool_info["last_active"]).total_seconds()
234
+ if time_delta >= self.check_interval * stuck_max_interval:
235
+ return False, f"No recent activity for {time_delta:.1f} seconds"
236
+
237
+ # If we got here, pool appears healthy
238
+ pool_info["last_active"] = get_utc_now()
239
+
240
+ return True, ""
241
+
242
+ def _restart_pool(self, name: str, pool_info: dict) -> None:
243
+ """
244
+ Attempt to restart a failed thread pool.
245
+
246
+ Args:
247
+ name: Name of the pool to restart
248
+ pool_info: Dictionary containing pool configuration
249
+ """
250
+ if self._restart_in_progress:
251
+ return
252
+
253
+ self._restart_in_progress = True
254
+ logger.info(f"Attempting to restart thread pool '{name}'")
255
+
256
+ try:
257
+ old_executor = pool_info["executor"]
258
+ self.dispatcher.shutdown()
259
+
260
+ # Create new executor with same parameters
261
+ new_executor = ContextThreadPoolExecutor(
262
+ max_workers=pool_info["max_workers"],
263
+ thread_name_prefix=self.dispatcher.thread_name_prefix, # pylint: disable=protected-access
264
+ )
265
+ self.unregister_pool(name=self.dispatcher_pool_name)
266
+ self.dispatcher.dispatcher_executor = new_executor
267
+ self.register_pool(
268
+ name=self.dispatcher_pool_name,
269
+ executor=self.dispatcher.dispatcher_executor,
270
+ max_workers=self.dispatcher.max_workers,
271
+ restart_on_failure=True,
272
+ )
273
+
274
+ # Replace in registry
275
+ start_time = perf_counter()
276
+ with self._pool_lock:
277
+ pool_info["executor"] = new_executor
278
+ pool_info["failure_count"] = 0
279
+ pool_info["healthy"] = True
280
+ pool_info["last_active"] = get_utc_now()
281
+
282
+ elapsed_time = perf_counter() - start_time
283
+ if elapsed_time > 1:
284
+ logger.warning(f"Long lock wait: {elapsed_time:.3f}s")
285
+
286
+ # Shutdown old executor
287
+ try:
288
+ old_executor.shutdown(wait=False)
289
+ except Exception as e:
290
+ logger.error(f"Error shutting down old executor: {e!s}", exc_info=True)
291
+
292
+ logger.info(f"Successfully restarted thread pool '{name}'")
293
+ except Exception as e:
294
+ logger.error(f"Failed to restart pool '{name}': {e!s}", exc_info=True)
295
+ finally:
296
+ self._restart_in_progress = False
297
+
298
+ def get_status(self, name: str | None = None) -> dict:
299
+ """
300
+ Get status of monitored pools.
301
+
302
+ Args:
303
+ name: Optional specific pool name to check
304
+
305
+ Returns:
306
+ Dictionary of status information
307
+ """
308
+ with self._pool_lock:
309
+ if name:
310
+ return {name: self._pools.get(name, {}).copy()}
311
+ return {k: v.copy() for k, v in self._pools.items()}
312
+
313
+ def __enter__(self):
314
+ """Context manager entry point."""
315
+ self.start()
316
+ return self
317
+
318
+ def __exit__(self, exc_type, exc_val, exc_tb):
319
+ """Context manager exit point."""
320
+ self.stop()
321
+
322
+ def start(self) -> bool:
323
+ """
324
+ Start the monitoring thread.
325
+
326
+ Returns:
327
+ bool: True if monitor started successfully, False if already running
328
+ """
329
+ if self._running:
330
+ logger.warning("Dispatcher Monitor is already running")
331
+ return False
332
+
333
+ self._running = True
334
+ self._monitor_thread = ContextThread(
335
+ target=self._monitor_loop, name="threadpool_monitor", daemon=True
336
+ )
337
+ self._monitor_thread.start()
338
+ logger.info("Dispatcher Monitor monitor started")
339
+ return True
340
+
341
+ def stop(self) -> None:
342
+ """
343
+ Stop the monitoring thread and clean up all managed thread pools.
344
+ Ensures proper shutdown of all monitored executors.
345
+ """
346
+ if not self._running:
347
+ return
348
+
349
+ # Stop the monitoring loop
350
+ self._running = False
351
+ if self._monitor_thread and self._monitor_thread.is_alive():
352
+ self._monitor_thread.join(timeout=5)
353
+
354
+ # Shutdown all registered pools
355
+ with self._pool_lock:
356
+ for name, pool_info in self._pools.items():
357
+ executor = pool_info["executor"]
358
+ if not executor._shutdown: # pylint: disable=protected-access
359
+ try:
360
+ logger.info(f"Shutting down thread pool '{name}'")
361
+ executor.shutdown(wait=self.stop_wait, cancel_futures=True)
362
+ logger.info(f"Successfully shut down thread pool '{name}'")
363
+ except Exception as e:
364
+ logger.error(f"Error shutting down pool '{name}': {e!s}", exc_info=True)
365
+
366
+ logger.info("Thread pool monitor and all pools stopped")