MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
import threading
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from time import perf_counter
|
|
5
|
+
|
|
6
|
+
from memos.configs.mem_scheduler import BaseSchedulerConfig
|
|
7
|
+
from memos.context.context import ContextThread, ContextThreadPoolExecutor
|
|
8
|
+
from memos.log import get_logger
|
|
9
|
+
from memos.mem_scheduler.general_modules.base import BaseSchedulerModule
|
|
10
|
+
from memos.mem_scheduler.schemas.general_schemas import (
|
|
11
|
+
DEFAULT_DISPATCHER_MONITOR_CHECK_INTERVAL,
|
|
12
|
+
DEFAULT_DISPATCHER_MONITOR_MAX_FAILURES,
|
|
13
|
+
DEFAULT_STOP_WAIT,
|
|
14
|
+
DEFAULT_STUCK_THREAD_TOLERANCE,
|
|
15
|
+
)
|
|
16
|
+
from memos.mem_scheduler.task_schedule_modules.dispatcher import SchedulerDispatcher
|
|
17
|
+
from memos.mem_scheduler.utils.db_utils import get_utc_now
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SchedulerDispatcherMonitor(BaseSchedulerModule):
|
|
24
|
+
"""Monitors and manages scheduling operations with LLM integration."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: BaseSchedulerConfig):
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.config: BaseSchedulerConfig = config
|
|
29
|
+
|
|
30
|
+
self.check_interval = self.config.get(
|
|
31
|
+
"dispatcher_monitor_check_interval", DEFAULT_DISPATCHER_MONITOR_CHECK_INTERVAL
|
|
32
|
+
)
|
|
33
|
+
self.max_failures = self.config.get(
|
|
34
|
+
"dispatcher_monitor_max_failures", DEFAULT_DISPATCHER_MONITOR_MAX_FAILURES
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Registry of monitored thread pools
|
|
38
|
+
self._pools: dict[str, dict] = {}
|
|
39
|
+
self._pool_lock = threading.Lock()
|
|
40
|
+
|
|
41
|
+
# thread pool monitor
|
|
42
|
+
self._monitor_thread: threading.Thread | None = None
|
|
43
|
+
self._running = False
|
|
44
|
+
self._restart_in_progress = False
|
|
45
|
+
|
|
46
|
+
# modules with thread pool
|
|
47
|
+
self.dispatcher: SchedulerDispatcher | None = None
|
|
48
|
+
self.dispatcher_pool_name = "dispatcher"
|
|
49
|
+
|
|
50
|
+
# Configure shutdown wait behavior from config or default
|
|
51
|
+
self.stop_wait = (
|
|
52
|
+
self.config.get("stop_wait", DEFAULT_STOP_WAIT) if self.config else DEFAULT_STOP_WAIT
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
def initialize(self, dispatcher: SchedulerDispatcher):
|
|
56
|
+
self.dispatcher = dispatcher
|
|
57
|
+
self.register_pool(
|
|
58
|
+
name=self.dispatcher_pool_name,
|
|
59
|
+
executor=self.dispatcher.dispatcher_executor,
|
|
60
|
+
max_workers=self.dispatcher.max_workers,
|
|
61
|
+
restart_on_failure=True,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def register_pool(
|
|
65
|
+
self,
|
|
66
|
+
name: str,
|
|
67
|
+
executor: ContextThreadPoolExecutor,
|
|
68
|
+
max_workers: int,
|
|
69
|
+
restart_on_failure: bool = True,
|
|
70
|
+
) -> bool:
|
|
71
|
+
"""
|
|
72
|
+
Register a thread pool for monitoring.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
name: Unique identifier for the pool
|
|
76
|
+
executor: ThreadPoolExecutor instance to monitor
|
|
77
|
+
max_workers: Expected maximum worker count
|
|
78
|
+
restart_on_failure: Whether to restart if pool fails
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
bool: True if registration succeeded, False if pool already registered
|
|
82
|
+
"""
|
|
83
|
+
with self._pool_lock:
|
|
84
|
+
if name in self._pools:
|
|
85
|
+
logger.warning(f"Thread pool '{name}' is already registered")
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
self._pools[name] = {
|
|
89
|
+
"executor": executor,
|
|
90
|
+
"max_workers": max_workers,
|
|
91
|
+
"restart": restart_on_failure,
|
|
92
|
+
"failure_count": 0,
|
|
93
|
+
"last_active": get_utc_now(),
|
|
94
|
+
"healthy": True,
|
|
95
|
+
}
|
|
96
|
+
logger.info(f"Registered thread pool '{name}' for monitoring")
|
|
97
|
+
return True
|
|
98
|
+
|
|
99
|
+
def unregister_pool(self, name: str) -> bool:
|
|
100
|
+
"""
|
|
101
|
+
Remove a thread pool from monitoring.
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
name: Identifier of the pool to remove
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
bool: True if removal succeeded, False if pool not found
|
|
108
|
+
"""
|
|
109
|
+
with self._pool_lock:
|
|
110
|
+
if name not in self._pools:
|
|
111
|
+
logger.warning(f"Thread pool '{name}' not found in registry")
|
|
112
|
+
return False
|
|
113
|
+
|
|
114
|
+
del self._pools[name]
|
|
115
|
+
logger.info(f"Unregistered thread pool '{name}'")
|
|
116
|
+
return True
|
|
117
|
+
|
|
118
|
+
def _monitor_loop(self) -> None:
|
|
119
|
+
"""Main monitoring loop that periodically checks all registered pools."""
|
|
120
|
+
logger.info(f"Starting monitor loop with {self.check_interval} second interval")
|
|
121
|
+
|
|
122
|
+
while self._running:
|
|
123
|
+
time.sleep(self.check_interval)
|
|
124
|
+
try:
|
|
125
|
+
self._check_pools_health()
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logger.error(f"Error during health check: {e!s}", exc_info=True)
|
|
128
|
+
|
|
129
|
+
logger.debug("Monitor loop exiting")
|
|
130
|
+
|
|
131
|
+
def _check_pools_health(self) -> None:
|
|
132
|
+
"""Check health of all registered thread pools."""
|
|
133
|
+
for name, pool_info in list(self._pools.items()):
|
|
134
|
+
is_healthy, reason = self._check_pool_health(
|
|
135
|
+
pool_info=pool_info,
|
|
136
|
+
stuck_max_interval=4,
|
|
137
|
+
)
|
|
138
|
+
if not is_healthy:
|
|
139
|
+
logger.info(f"Pool '{name}'. is_healthy: {is_healthy}. pool_info: {pool_info}")
|
|
140
|
+
|
|
141
|
+
with self._pool_lock:
|
|
142
|
+
if is_healthy:
|
|
143
|
+
pool_info["failure_count"] = 0
|
|
144
|
+
pool_info["healthy"] = True
|
|
145
|
+
else:
|
|
146
|
+
pool_info["failure_count"] += 1
|
|
147
|
+
pool_info["healthy"] = False
|
|
148
|
+
logger.info(
|
|
149
|
+
f"Pool '{name}' unhealthy ({pool_info['failure_count']}/{self.max_failures}): {reason}."
|
|
150
|
+
f" Note: This status does not necessarily indicate a problem with the pool itself - "
|
|
151
|
+
f"it may also be considered unhealthy if no tasks have been scheduled for an extended period"
|
|
152
|
+
)
|
|
153
|
+
if (
|
|
154
|
+
pool_info["failure_count"] >= self.max_failures
|
|
155
|
+
and pool_info["restart"]
|
|
156
|
+
and not self._restart_in_progress
|
|
157
|
+
):
|
|
158
|
+
self._restart_pool(name, pool_info)
|
|
159
|
+
|
|
160
|
+
def _check_pool_health(
|
|
161
|
+
self, pool_info: dict, stuck_max_interval=4, stuck_thread_tolerance=None
|
|
162
|
+
) -> tuple[bool, str]:
|
|
163
|
+
"""
|
|
164
|
+
Check health of a single thread pool with enhanced task tracking.
|
|
165
|
+
|
|
166
|
+
Args:
|
|
167
|
+
pool_info: Dictionary containing pool configuration
|
|
168
|
+
stuck_max_interval: Maximum intervals before considering pool stuck
|
|
169
|
+
stuck_thread_tolerance: Maximum number of stuck threads to tolerate before restarting pool
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Tuple: (is_healthy, reason) where reason explains failure if not healthy
|
|
173
|
+
"""
|
|
174
|
+
if stuck_thread_tolerance is None:
|
|
175
|
+
stuck_thread_tolerance = DEFAULT_STUCK_THREAD_TOLERANCE
|
|
176
|
+
|
|
177
|
+
executor = pool_info["executor"]
|
|
178
|
+
|
|
179
|
+
# Check if executor is shutdown
|
|
180
|
+
if executor._shutdown: # pylint: disable=protected-access
|
|
181
|
+
return False, "Executor is shutdown"
|
|
182
|
+
|
|
183
|
+
# Enhanced health check using dispatcher task tracking
|
|
184
|
+
stuck_tasks = []
|
|
185
|
+
if self.dispatcher:
|
|
186
|
+
running_tasks = self.dispatcher.get_running_tasks()
|
|
187
|
+
running_count = self.dispatcher.get_running_task_count()
|
|
188
|
+
|
|
189
|
+
# Log detailed task information
|
|
190
|
+
if running_tasks:
|
|
191
|
+
logger.debug(f"Currently running {running_count} tasks:")
|
|
192
|
+
for _task_id, task in running_tasks.items():
|
|
193
|
+
logger.debug(f" - {task.get_execution_info()}")
|
|
194
|
+
else:
|
|
195
|
+
logger.debug("No tasks currently running")
|
|
196
|
+
|
|
197
|
+
# Check for stuck tasks (running longer than expected)
|
|
198
|
+
for task in running_tasks.values():
|
|
199
|
+
if task.duration_seconds and task.duration_seconds > (
|
|
200
|
+
self.check_interval * stuck_max_interval
|
|
201
|
+
):
|
|
202
|
+
stuck_tasks.append(task)
|
|
203
|
+
|
|
204
|
+
# Always log stuck tasks if any exist
|
|
205
|
+
if stuck_tasks:
|
|
206
|
+
logger.warning(f"Found {len(stuck_tasks)} potentially stuck tasks:")
|
|
207
|
+
for task in stuck_tasks:
|
|
208
|
+
task_info = task.get_execution_info()
|
|
209
|
+
messages_info = ""
|
|
210
|
+
if task.messages:
|
|
211
|
+
messages_info = f", Messages: {len(task.messages)} items - {[str(msg) for msg in task.messages[:3]]}"
|
|
212
|
+
if len(task.messages) > 3:
|
|
213
|
+
messages_info += f" ... and {len(task.messages) - 3} more"
|
|
214
|
+
logger.warning(f" - Stuck task: {task_info}{messages_info}")
|
|
215
|
+
|
|
216
|
+
# Check if stuck task count exceeds tolerance
|
|
217
|
+
# If thread pool size is smaller, use the smaller value as threshold
|
|
218
|
+
max_workers = pool_info.get("max_workers", 0)
|
|
219
|
+
effective_tolerance = (
|
|
220
|
+
min(stuck_thread_tolerance, max_workers)
|
|
221
|
+
if max_workers > 0
|
|
222
|
+
else stuck_thread_tolerance
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
if len(stuck_tasks) >= effective_tolerance:
|
|
226
|
+
return (
|
|
227
|
+
False,
|
|
228
|
+
f"Found {len(stuck_tasks)} stuck tasks (tolerance: {effective_tolerance})",
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Only check for stuck threads, not inactive threads
|
|
232
|
+
# Check if threads are stuck (no activity for specified intervals)
|
|
233
|
+
time_delta = (get_utc_now() - pool_info["last_active"]).total_seconds()
|
|
234
|
+
if time_delta >= self.check_interval * stuck_max_interval:
|
|
235
|
+
return False, f"No recent activity for {time_delta:.1f} seconds"
|
|
236
|
+
|
|
237
|
+
# If we got here, pool appears healthy
|
|
238
|
+
pool_info["last_active"] = get_utc_now()
|
|
239
|
+
|
|
240
|
+
return True, ""
|
|
241
|
+
|
|
242
|
+
def _restart_pool(self, name: str, pool_info: dict) -> None:
|
|
243
|
+
"""
|
|
244
|
+
Attempt to restart a failed thread pool.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
name: Name of the pool to restart
|
|
248
|
+
pool_info: Dictionary containing pool configuration
|
|
249
|
+
"""
|
|
250
|
+
if self._restart_in_progress:
|
|
251
|
+
return
|
|
252
|
+
|
|
253
|
+
self._restart_in_progress = True
|
|
254
|
+
logger.info(f"Attempting to restart thread pool '{name}'")
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
old_executor = pool_info["executor"]
|
|
258
|
+
self.dispatcher.shutdown()
|
|
259
|
+
|
|
260
|
+
# Create new executor with same parameters
|
|
261
|
+
new_executor = ContextThreadPoolExecutor(
|
|
262
|
+
max_workers=pool_info["max_workers"],
|
|
263
|
+
thread_name_prefix=self.dispatcher.thread_name_prefix, # pylint: disable=protected-access
|
|
264
|
+
)
|
|
265
|
+
self.unregister_pool(name=self.dispatcher_pool_name)
|
|
266
|
+
self.dispatcher.dispatcher_executor = new_executor
|
|
267
|
+
self.register_pool(
|
|
268
|
+
name=self.dispatcher_pool_name,
|
|
269
|
+
executor=self.dispatcher.dispatcher_executor,
|
|
270
|
+
max_workers=self.dispatcher.max_workers,
|
|
271
|
+
restart_on_failure=True,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
# Replace in registry
|
|
275
|
+
start_time = perf_counter()
|
|
276
|
+
with self._pool_lock:
|
|
277
|
+
pool_info["executor"] = new_executor
|
|
278
|
+
pool_info["failure_count"] = 0
|
|
279
|
+
pool_info["healthy"] = True
|
|
280
|
+
pool_info["last_active"] = get_utc_now()
|
|
281
|
+
|
|
282
|
+
elapsed_time = perf_counter() - start_time
|
|
283
|
+
if elapsed_time > 1:
|
|
284
|
+
logger.warning(f"Long lock wait: {elapsed_time:.3f}s")
|
|
285
|
+
|
|
286
|
+
# Shutdown old executor
|
|
287
|
+
try:
|
|
288
|
+
old_executor.shutdown(wait=False)
|
|
289
|
+
except Exception as e:
|
|
290
|
+
logger.error(f"Error shutting down old executor: {e!s}", exc_info=True)
|
|
291
|
+
|
|
292
|
+
logger.info(f"Successfully restarted thread pool '{name}'")
|
|
293
|
+
except Exception as e:
|
|
294
|
+
logger.error(f"Failed to restart pool '{name}': {e!s}", exc_info=True)
|
|
295
|
+
finally:
|
|
296
|
+
self._restart_in_progress = False
|
|
297
|
+
|
|
298
|
+
def get_status(self, name: str | None = None) -> dict:
|
|
299
|
+
"""
|
|
300
|
+
Get status of monitored pools.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
name: Optional specific pool name to check
|
|
304
|
+
|
|
305
|
+
Returns:
|
|
306
|
+
Dictionary of status information
|
|
307
|
+
"""
|
|
308
|
+
with self._pool_lock:
|
|
309
|
+
if name:
|
|
310
|
+
return {name: self._pools.get(name, {}).copy()}
|
|
311
|
+
return {k: v.copy() for k, v in self._pools.items()}
|
|
312
|
+
|
|
313
|
+
def __enter__(self):
|
|
314
|
+
"""Context manager entry point."""
|
|
315
|
+
self.start()
|
|
316
|
+
return self
|
|
317
|
+
|
|
318
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
319
|
+
"""Context manager exit point."""
|
|
320
|
+
self.stop()
|
|
321
|
+
|
|
322
|
+
def start(self) -> bool:
|
|
323
|
+
"""
|
|
324
|
+
Start the monitoring thread.
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
bool: True if monitor started successfully, False if already running
|
|
328
|
+
"""
|
|
329
|
+
if self._running:
|
|
330
|
+
logger.warning("Dispatcher Monitor is already running")
|
|
331
|
+
return False
|
|
332
|
+
|
|
333
|
+
self._running = True
|
|
334
|
+
self._monitor_thread = ContextThread(
|
|
335
|
+
target=self._monitor_loop, name="threadpool_monitor", daemon=True
|
|
336
|
+
)
|
|
337
|
+
self._monitor_thread.start()
|
|
338
|
+
logger.info("Dispatcher Monitor monitor started")
|
|
339
|
+
return True
|
|
340
|
+
|
|
341
|
+
def stop(self) -> None:
|
|
342
|
+
"""
|
|
343
|
+
Stop the monitoring thread and clean up all managed thread pools.
|
|
344
|
+
Ensures proper shutdown of all monitored executors.
|
|
345
|
+
"""
|
|
346
|
+
if not self._running:
|
|
347
|
+
return
|
|
348
|
+
|
|
349
|
+
# Stop the monitoring loop
|
|
350
|
+
self._running = False
|
|
351
|
+
if self._monitor_thread and self._monitor_thread.is_alive():
|
|
352
|
+
self._monitor_thread.join(timeout=5)
|
|
353
|
+
|
|
354
|
+
# Shutdown all registered pools
|
|
355
|
+
with self._pool_lock:
|
|
356
|
+
for name, pool_info in self._pools.items():
|
|
357
|
+
executor = pool_info["executor"]
|
|
358
|
+
if not executor._shutdown: # pylint: disable=protected-access
|
|
359
|
+
try:
|
|
360
|
+
logger.info(f"Shutting down thread pool '{name}'")
|
|
361
|
+
executor.shutdown(wait=self.stop_wait, cancel_futures=True)
|
|
362
|
+
logger.info(f"Successfully shut down thread pool '{name}'")
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.error(f"Error shutting down pool '{name}': {e!s}", exc_info=True)
|
|
365
|
+
|
|
366
|
+
logger.info("Thread pool monitor and all pools stopped")
|