MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,740 @@
1
+ import concurrent
2
+ import threading
3
+ import time
4
+
5
+ from collections import defaultdict
6
+ from collections.abc import Callable
7
+ from datetime import datetime, timezone
8
+ from typing import Any
9
+
10
+ from memos.context.context import (
11
+ ContextThreadPoolExecutor,
12
+ RequestContext,
13
+ generate_trace_id,
14
+ set_request_context,
15
+ )
16
+ from memos.log import get_logger
17
+ from memos.mem_scheduler.general_modules.base import BaseSchedulerModule
18
+ from memos.mem_scheduler.general_modules.task_threads import ThreadManager
19
+ from memos.mem_scheduler.schemas.general_schemas import (
20
+ DEFAULT_STOP_WAIT,
21
+ )
22
+ from memos.mem_scheduler.schemas.message_schemas import ScheduleLogForWebItem, ScheduleMessageItem
23
+ from memos.mem_scheduler.schemas.task_schemas import RunningTaskItem
24
+ from memos.mem_scheduler.task_schedule_modules.orchestrator import SchedulerOrchestrator
25
+ from memos.mem_scheduler.task_schedule_modules.redis_queue import SchedulerRedisQueue
26
+ from memos.mem_scheduler.task_schedule_modules.task_queue import ScheduleTaskQueue
27
+ from memos.mem_scheduler.utils.misc_utils import group_messages_by_user_and_mem_cube, is_cloud_env
28
+ from memos.mem_scheduler.utils.monitor_event_utils import emit_monitor_event, to_iso
29
+ from memos.mem_scheduler.utils.status_tracker import TaskStatusTracker
30
+
31
+
32
+ logger = get_logger(__name__)
33
+
34
+
35
+ class SchedulerDispatcher(BaseSchedulerModule):
36
+ """
37
+ Thread pool-based message dispatcher that routes messages to dedicated handlers
38
+ based on their labels.
39
+
40
+ Features:
41
+ - Dedicated thread pool per message label
42
+ - Batch message processing
43
+ - Graceful shutdown
44
+ - Bulk handler registration
45
+ - Thread race competition for parallel task execution
46
+ """
47
+
48
+ def __init__(
49
+ self,
50
+ max_workers: int = 30,
51
+ memos_message_queue: ScheduleTaskQueue | None = None,
52
+ enable_parallel_dispatch: bool = True,
53
+ config=None,
54
+ status_tracker: TaskStatusTracker | None = None,
55
+ metrics: Any | None = None,
56
+ submit_web_logs: Callable | None = None, # ADDED
57
+ orchestrator: SchedulerOrchestrator | None = None,
58
+ ):
59
+ super().__init__()
60
+ self.config = config
61
+
62
+ # Main dispatcher thread pool
63
+ self.max_workers = max_workers
64
+
65
+ # Accept either a ScheduleTaskQueue wrapper or a concrete queue instance
66
+ self.memos_message_queue = (
67
+ memos_message_queue.memos_message_queue
68
+ if hasattr(memos_message_queue, "memos_message_queue")
69
+ else memos_message_queue
70
+ )
71
+ self.orchestrator = SchedulerOrchestrator() if orchestrator is None else orchestrator
72
+ # Get multi-task timeout from config
73
+ self.multi_task_running_timeout = (
74
+ self.config.get("multi_task_running_timeout") if self.config else None
75
+ )
76
+
77
+ # Only initialize thread pool if in parallel mode
78
+ self.enable_parallel_dispatch = enable_parallel_dispatch
79
+ self.thread_name_prefix = "dispatcher"
80
+ if self.enable_parallel_dispatch:
81
+ self.dispatcher_executor = ContextThreadPoolExecutor(
82
+ max_workers=self.max_workers, thread_name_prefix=self.thread_name_prefix
83
+ )
84
+ logger.info(f"Max works of dispatcher is set to {self.max_workers}")
85
+ else:
86
+ self.dispatcher_executor = None
87
+ logger.info(f"enable_parallel_dispatch is set to {self.enable_parallel_dispatch}")
88
+
89
+ # Registered message handlers
90
+ self.handlers: dict[str, Callable] = {}
91
+
92
+ # Dispatcher running state
93
+ self._running = False
94
+
95
+ # Set to track active futures for monitoring purposes
96
+ self._futures = set()
97
+
98
+ # Thread race module for competitive task execution
99
+ self.thread_manager = ThreadManager(thread_pool_executor=self.dispatcher_executor)
100
+
101
+ # Task tracking for monitoring
102
+ self._running_tasks: dict[str, RunningTaskItem] = {}
103
+ self._task_lock = threading.Lock()
104
+
105
+ # Configure shutdown wait behavior from config or default
106
+ self.stop_wait = (
107
+ self.config.get("stop_wait", DEFAULT_STOP_WAIT) if self.config else DEFAULT_STOP_WAIT
108
+ )
109
+
110
+ self.metrics = metrics
111
+ self.status_tracker = status_tracker
112
+ self.submit_web_logs = submit_web_logs # ADDED
113
+
114
+ def on_messages_enqueued(self, msgs: list[ScheduleMessageItem]) -> None:
115
+ if not msgs:
116
+ return
117
+ # This is handled in BaseScheduler now
118
+
119
+ def _create_task_wrapper(self, handler: Callable, task_item: RunningTaskItem):
120
+ """
121
+ Create a wrapper around the handler to track task execution and capture results.
122
+
123
+ Args:
124
+ handler: The original handler function
125
+ task_item: The RunningTaskItem to track
126
+
127
+ Returns:
128
+ Wrapped handler function that captures results and logs completion
129
+ """
130
+
131
+ def wrapped_handler(messages: list[ScheduleMessageItem]):
132
+ start_time = time.time()
133
+ start_iso = datetime.fromtimestamp(start_time, tz=timezone.utc).isoformat()
134
+ if self.status_tracker:
135
+ for msg in messages:
136
+ self.status_tracker.task_started(task_id=msg.item_id, user_id=msg.user_id)
137
+ try:
138
+ first_msg = messages[0]
139
+ trace_id = getattr(first_msg, "trace_id", None) or generate_trace_id()
140
+ # Propagate trace_id and user info to logging context for this handler execution
141
+ ctx = RequestContext(
142
+ trace_id=trace_id,
143
+ user_name=getattr(first_msg, "user_name", None),
144
+ user_type=None,
145
+ )
146
+ set_request_context(ctx)
147
+
148
+ # --- mark start: record queuing time(now - enqueue_ts)---
149
+ now = time.time()
150
+ m = first_msg # All messages in this batch have same user and type
151
+ enq_ts = getattr(first_msg, "timestamp", None)
152
+
153
+ # Path 1: epoch seconds (preferred)
154
+ if isinstance(enq_ts, int | float):
155
+ enq_epoch = float(enq_ts)
156
+
157
+ # Path 2: datetime -> normalize to UTC epoch
158
+ elif hasattr(enq_ts, "timestamp"):
159
+ dt = enq_ts
160
+ if dt.tzinfo is None:
161
+ # treat naive as UTC to neutralize +8h skew
162
+ dt = dt.replace(tzinfo=timezone.utc)
163
+ enq_epoch = dt.timestamp()
164
+ else:
165
+ # fallback: treat as "just now"
166
+ enq_epoch = now
167
+
168
+ wait_sec = max(0.0, now - enq_epoch)
169
+ self.metrics.observe_task_wait_duration(wait_sec, m.user_id, m.label)
170
+
171
+ dequeue_ts = getattr(first_msg, "_dequeue_ts", None)
172
+ start_delay_ms = None
173
+ if isinstance(dequeue_ts, int | float):
174
+ start_delay_ms = max(0.0, start_time - dequeue_ts) * 1000
175
+
176
+ emit_monitor_event(
177
+ "start",
178
+ first_msg,
179
+ {
180
+ "start_ts": start_iso,
181
+ "start_delay_ms": start_delay_ms,
182
+ "enqueue_ts": to_iso(enq_ts),
183
+ "dequeue_ts": to_iso(
184
+ datetime.fromtimestamp(dequeue_ts, tz=timezone.utc)
185
+ if isinstance(dequeue_ts, int | float)
186
+ else None
187
+ ),
188
+ "event_duration_ms": start_delay_ms,
189
+ "total_duration_ms": self._calc_total_duration_ms(start_time, enq_ts),
190
+ },
191
+ )
192
+
193
+ # Execute the original handler
194
+ result = handler(messages)
195
+
196
+ # --- mark done ---
197
+ finish_time = time.time()
198
+ duration = finish_time - start_time
199
+ self.metrics.observe_task_duration(duration, m.user_id, m.label)
200
+ if self.status_tracker:
201
+ for msg in messages:
202
+ self.status_tracker.task_completed(task_id=msg.item_id, user_id=msg.user_id)
203
+ self._maybe_emit_task_completion(messages)
204
+ self.metrics.task_completed(user_id=m.user_id, task_type=m.label)
205
+
206
+ emit_monitor_event(
207
+ "finish",
208
+ first_msg,
209
+ {
210
+ "status": "ok",
211
+ "start_ts": start_iso,
212
+ "finish_ts": datetime.fromtimestamp(
213
+ finish_time, tz=timezone.utc
214
+ ).isoformat(),
215
+ "exec_duration_ms": duration * 1000,
216
+ "event_duration_ms": duration * 1000,
217
+ "total_duration_ms": self._calc_total_duration_ms(
218
+ finish_time, getattr(first_msg, "timestamp", None)
219
+ ),
220
+ },
221
+ )
222
+ # Redis ack is handled in finally to cover failure cases
223
+
224
+ # Mark task as completed and remove from tracking
225
+ with self._task_lock:
226
+ if task_item.item_id in self._running_tasks:
227
+ task_item.mark_completed(result)
228
+ del self._running_tasks[task_item.item_id]
229
+ logger.info(f"Task completed: {task_item.get_execution_info()}")
230
+ return result
231
+
232
+ except Exception as e:
233
+ m = messages[0]
234
+ finish_time = time.time()
235
+ self.metrics.task_failed(m.user_id, m.label, type(e).__name__)
236
+ if self.status_tracker:
237
+ for msg in messages:
238
+ self.status_tracker.task_failed(
239
+ task_id=msg.item_id, user_id=msg.user_id, error_message=str(e)
240
+ )
241
+ self._maybe_emit_task_completion(messages, error=e)
242
+ emit_monitor_event(
243
+ "finish",
244
+ m,
245
+ {
246
+ "status": "fail",
247
+ "start_ts": start_iso,
248
+ "finish_ts": datetime.fromtimestamp(
249
+ finish_time, tz=timezone.utc
250
+ ).isoformat(),
251
+ "exec_duration_ms": (finish_time - start_time) * 1000,
252
+ "event_duration_ms": (finish_time - start_time) * 1000,
253
+ "error_type": type(e).__name__,
254
+ "error_msg": str(e),
255
+ "total_duration_ms": self._calc_total_duration_ms(
256
+ finish_time, getattr(m, "timestamp", None)
257
+ ),
258
+ },
259
+ )
260
+ # Mark task as failed and remove from tracking
261
+ with self._task_lock:
262
+ if task_item.item_id in self._running_tasks:
263
+ task_item.mark_failed(str(e))
264
+ del self._running_tasks[task_item.item_id]
265
+ logger.error(f"Task failed: {task_item.get_execution_info()}, Error: {e}")
266
+
267
+ raise
268
+ finally:
269
+ # Ensure Redis messages are acknowledged even if handler fails
270
+ if (
271
+ isinstance(self.memos_message_queue, SchedulerRedisQueue)
272
+ and self.memos_message_queue is not None
273
+ ):
274
+ try:
275
+ for msg in messages:
276
+ redis_message_id = msg.redis_message_id
277
+ self.memos_message_queue.ack_message(
278
+ user_id=msg.user_id,
279
+ mem_cube_id=msg.mem_cube_id,
280
+ task_label=msg.label,
281
+ redis_message_id=redis_message_id,
282
+ message=msg,
283
+ )
284
+ except Exception as ack_err:
285
+ logger.warning(f"Ack in finally failed: {ack_err}")
286
+
287
+ return wrapped_handler
288
+
289
+ def _maybe_emit_task_completion(
290
+ self, messages: list[ScheduleMessageItem], error: Exception | None = None
291
+ ) -> None:
292
+ """If all item_ids under a business task are completed, emit a single completion log."""
293
+ if not self.submit_web_logs or not self.status_tracker:
294
+ return
295
+
296
+ # messages in one batch can belong to different business task_ids; check each
297
+ task_ids = set()
298
+ task_id_to_doc_id = {}
299
+
300
+ for msg in messages:
301
+ tid = getattr(msg, "task_id", None)
302
+ if tid:
303
+ task_ids.add(tid)
304
+ # Try to capture source_doc_id for this task if we haven't already
305
+ if tid not in task_id_to_doc_id:
306
+ info = msg.info or {}
307
+ sid = info.get("source_doc_id")
308
+ if sid:
309
+ task_id_to_doc_id[tid] = sid
310
+
311
+ if not task_ids:
312
+ return
313
+
314
+ # Use the first message only for shared fields; mem_cube_id is same within a batch
315
+ first = messages[0]
316
+ user_id = first.user_id
317
+ mem_cube_id = first.mem_cube_id
318
+
319
+ try:
320
+ cloud_env = is_cloud_env()
321
+ if not cloud_env:
322
+ return
323
+
324
+ for task_id in task_ids:
325
+ source_doc_id = task_id_to_doc_id.get(task_id)
326
+ status_data = self.status_tracker.get_task_status_by_business_id(
327
+ business_task_id=task_id, user_id=user_id
328
+ )
329
+ if not status_data:
330
+ continue
331
+
332
+ status = status_data.get("status")
333
+
334
+ if status == "completed":
335
+ # Only emit success log if we didn't just catch an exception locally
336
+ # (Although if status is 'completed', local error shouldn't happen theoretically,
337
+ # unless status update lags or is inconsistent. We trust status_tracker here.)
338
+ event = ScheduleLogForWebItem(
339
+ task_id=task_id,
340
+ user_id=user_id,
341
+ mem_cube_id=mem_cube_id,
342
+ label="taskStatus",
343
+ from_memory_type="status",
344
+ to_memory_type="status",
345
+ log_content=f"Task {task_id} completed",
346
+ status="completed",
347
+ source_doc_id=source_doc_id,
348
+ )
349
+ self.submit_web_logs(event)
350
+
351
+ elif status == "failed":
352
+ # Construct error message
353
+ error_msg = str(error) if error else None
354
+ if not error_msg:
355
+ # Try to get errors from status_tracker aggregation
356
+ errors = status_data.get("errors", [])
357
+ if errors:
358
+ error_msg = "; ".join(errors)
359
+ else:
360
+ error_msg = "Unknown error (check system logs)"
361
+
362
+ event = ScheduleLogForWebItem(
363
+ task_id=task_id,
364
+ user_id=user_id,
365
+ mem_cube_id=mem_cube_id,
366
+ label="taskStatus",
367
+ from_memory_type="status",
368
+ to_memory_type="status",
369
+ log_content=f"Task {task_id} failed: {error_msg}",
370
+ status="failed",
371
+ source_doc_id=source_doc_id,
372
+ )
373
+ self.submit_web_logs(event)
374
+ except Exception:
375
+ logger.warning(
376
+ "Failed to emit task completion log. user_id=%s mem_cube_id=%s task_ids=%s",
377
+ user_id,
378
+ mem_cube_id,
379
+ list(task_ids),
380
+ exc_info=True,
381
+ )
382
+
383
+ def get_running_tasks(
384
+ self, filter_func: Callable[[RunningTaskItem], bool] | None = None
385
+ ) -> dict[str, RunningTaskItem]:
386
+ """
387
+ Get a copy of currently running tasks, optionally filtered by a custom function.
388
+
389
+ Args:
390
+ filter_func: Optional function that takes a RunningTaskItem and returns True if it should be included.
391
+ Common filters can be created using helper methods like filter_by_user_id, filter_by_task_name, etc.
392
+
393
+ Returns:
394
+ Dictionary of running tasks keyed by task ID
395
+
396
+ Examples:
397
+ # Get all running tasks
398
+ all_tasks = dispatcher.get_running_tasks()
399
+
400
+ # Get tasks for specific user
401
+ user_tasks = dispatcher.get_running_tasks(lambda task: task.user_id == "user123")
402
+
403
+ # Get tasks for specific task name
404
+ handler_tasks = dispatcher.get_running_tasks(lambda task: task.task_name == "test_handler")
405
+
406
+ # Get tasks with multiple conditions
407
+ filtered_tasks = dispatcher.get_running_tasks(
408
+ lambda task: task.user_id == "user123" and task.status == "running"
409
+ )
410
+ """
411
+ with self._task_lock:
412
+ if filter_func is None:
413
+ return self._running_tasks.copy()
414
+
415
+ return {
416
+ task_id: task_item
417
+ for task_id, task_item in self._running_tasks.items()
418
+ if filter_func(task_item)
419
+ }
420
+
421
+ def get_running_task_count(self) -> int:
422
+ """
423
+ Get the count of currently running tasks.
424
+
425
+ Returns:
426
+ Number of running tasks
427
+ """
428
+ with self._task_lock:
429
+ return len(self._running_tasks)
430
+
431
+ def register_handler(self, label: str, handler: Callable[[list[ScheduleMessageItem]], None]):
432
+ """
433
+ Register a handler function for a specific message label.
434
+
435
+ Args:
436
+ label: Message label to handle
437
+ handler: Callable that processes messages of this label
438
+ """
439
+ self.handlers[label] = handler
440
+
441
+ def register_handlers(
442
+ self, handlers: dict[str, Callable[[list[ScheduleMessageItem]], None]]
443
+ ) -> None:
444
+ """
445
+ Bulk register multiple handlers from a dictionary.
446
+
447
+ Args:
448
+ handlers: Dictionary mapping labels to handler functions
449
+ Format: {label: handler_callable}
450
+ """
451
+ for label, handler in handlers.items():
452
+ if not isinstance(label, str):
453
+ logger.error(f"Invalid label type: {type(label)}. Expected str.")
454
+ continue
455
+ if not callable(handler):
456
+ logger.error(f"Handler for label '{label}' is not callable.")
457
+ continue
458
+ self.register_handler(label=label, handler=handler)
459
+ logger.info(f"Registered {len(handlers)} handlers in bulk")
460
+
461
+ def unregister_handler(self, label: str) -> bool:
462
+ """
463
+ Unregister a handler for a specific label.
464
+
465
+ Args:
466
+ label: The label to unregister the handler for
467
+
468
+ Returns:
469
+ bool: True if handler was found and removed, False otherwise
470
+ """
471
+ if label in self.handlers:
472
+ del self.handlers[label]
473
+ logger.info(f"Unregistered handler for label: {label}")
474
+ return True
475
+ else:
476
+ logger.warning(f"No handler found for label: {label}")
477
+ return False
478
+
479
+ def unregister_handlers(self, labels: list[str]) -> dict[str, bool]:
480
+ """
481
+ Unregister multiple handlers by their labels.
482
+
483
+ Args:
484
+ labels: List of labels to unregister handlers for
485
+
486
+ Returns:
487
+ dict[str, bool]: Dictionary mapping each label to whether it was successfully unregistered
488
+ """
489
+ results = {}
490
+ for label in labels:
491
+ results[label] = self.unregister_handler(label)
492
+
493
+ logger.info(f"Unregistered handlers for {len(labels)} labels")
494
+ return results
495
+
496
+ def stats(self) -> dict[str, int]:
497
+ """
498
+ Lightweight runtime stats for monitoring.
499
+
500
+ Returns:
501
+ {
502
+ 'running': <number of running tasks>,
503
+ 'inflight': <number of futures tracked (pending+running)>,
504
+ 'handlers': <registered handler count>,
505
+ }
506
+ """
507
+ try:
508
+ running = self.get_running_task_count()
509
+ except Exception:
510
+ running = 0
511
+ try:
512
+ with self._task_lock:
513
+ inflight = len(self._futures)
514
+ except Exception:
515
+ inflight = 0
516
+ try:
517
+ handlers = len(self.handlers)
518
+ except Exception:
519
+ handlers = 0
520
+ return {"running": running, "inflight": inflight, "handlers": handlers}
521
+
522
+ def _default_message_handler(self, messages: list[ScheduleMessageItem]) -> None:
523
+ logger.debug(f"Using _default_message_handler to deal with messages: {messages}")
524
+
525
+ def _handle_future_result(self, future):
526
+ with self._task_lock:
527
+ self._futures.discard(future)
528
+ try:
529
+ future.result() # this will throw exception
530
+ except Exception as e:
531
+ logger.error(f"Handler execution failed: {e!s}", exc_info=True)
532
+
533
+ @staticmethod
534
+ def _calc_total_duration_ms(finish_epoch: float, enqueue_ts) -> float | None:
535
+ """
536
+ Calculate total duration from enqueue timestamp to finish time in milliseconds.
537
+ """
538
+ try:
539
+ enq_epoch = None
540
+
541
+ if isinstance(enqueue_ts, int | float):
542
+ enq_epoch = float(enqueue_ts)
543
+ elif hasattr(enqueue_ts, "timestamp"):
544
+ dt = enqueue_ts
545
+ if dt.tzinfo is None:
546
+ dt = dt.replace(tzinfo=timezone.utc)
547
+ enq_epoch = dt.timestamp()
548
+
549
+ if enq_epoch is None:
550
+ return None
551
+
552
+ total_ms = max(0.0, finish_epoch - enq_epoch) * 1000
553
+ return total_ms
554
+ except Exception:
555
+ return None
556
+
557
+ def execute_task(
558
+ self,
559
+ user_id: str,
560
+ mem_cube_id: str,
561
+ task_label: str,
562
+ msgs: list[ScheduleMessageItem],
563
+ handler_call_back: Callable[[list[ScheduleMessageItem]], Any],
564
+ ):
565
+ if isinstance(msgs, ScheduleMessageItem):
566
+ msgs = [msgs]
567
+ # Create task tracking item for this dispatch
568
+ task_item = RunningTaskItem(
569
+ user_id=user_id,
570
+ mem_cube_id=mem_cube_id,
571
+ task_info=f"Processing {len(msgs)} message(s) with label '{task_label}' for user {user_id} and mem_cube {mem_cube_id}",
572
+ task_name=f"{task_label}_handler",
573
+ messages=msgs,
574
+ )
575
+
576
+ # Uniformly register the task before execution
577
+ with self._task_lock:
578
+ self._running_tasks[task_item.item_id] = task_item
579
+
580
+ # Create wrapped handler for task tracking
581
+ wrapped_handler = self._create_task_wrapper(handler_call_back, task_item)
582
+
583
+ # dispatch to different handler
584
+ logger.debug(f"Task started: {task_item.get_execution_info()}")
585
+
586
+ # If priority is LEVEL_1, force synchronous execution regardless of thread pool availability
587
+ use_thread_pool = self.enable_parallel_dispatch and self.dispatcher_executor is not None
588
+
589
+ if use_thread_pool:
590
+ # Submit and track the future
591
+ future = self.dispatcher_executor.submit(wrapped_handler, msgs)
592
+ with self._task_lock:
593
+ self._futures.add(future)
594
+ future.add_done_callback(self._handle_future_result)
595
+ logger.info(
596
+ f"Dispatch {len(msgs)} message(s) to {task_label} handler for user {user_id} and mem_cube {mem_cube_id}."
597
+ )
598
+ else:
599
+ # For synchronous execution, the wrapper will run and remove the task upon completion
600
+ logger.info(
601
+ f"Execute {len(msgs)} message(s) synchronously for {task_label} for user {user_id} and mem_cube {mem_cube_id}."
602
+ )
603
+ wrapped_handler(msgs)
604
+
605
+ def dispatch(self, msg_list: list[ScheduleMessageItem]):
606
+ """
607
+ Dispatch a list of messages to their respective handlers.
608
+
609
+ Args:
610
+ msg_list: List of ScheduleMessageItem objects to process
611
+ """
612
+ if not msg_list:
613
+ logger.debug("Received empty message list, skipping dispatch")
614
+ return
615
+
616
+ # Group messages by user_id and mem_cube_id first
617
+ user_cube_groups = group_messages_by_user_and_mem_cube(msg_list)
618
+
619
+ # Process each user and mem_cube combination
620
+ for user_id, cube_groups in user_cube_groups.items():
621
+ for mem_cube_id, user_cube_msgs in cube_groups.items():
622
+ # Group messages by their labels within each user/mem_cube combination
623
+ label_groups = defaultdict(list)
624
+ for message in user_cube_msgs:
625
+ label_groups[message.label].append(message)
626
+
627
+ # Process each label group within this user/mem_cube combination
628
+ for label, msgs in label_groups.items():
629
+ handler = self.handlers.get(label, self._default_message_handler)
630
+ self.execute_task(
631
+ user_id=user_id,
632
+ mem_cube_id=mem_cube_id,
633
+ task_label=label,
634
+ msgs=msgs,
635
+ handler_call_back=handler,
636
+ )
637
+
638
+ def join(self, timeout: float | None = None) -> bool:
639
+ """Wait for all dispatched tasks to complete.
640
+
641
+ Args:
642
+ timeout: Maximum time to wait in seconds. None means wait forever.
643
+
644
+ Returns:
645
+ bool: True if all tasks completed, False if timeout occurred.
646
+ """
647
+ if not self.enable_parallel_dispatch or self.dispatcher_executor is None:
648
+ return True # Serial mode requires no waiting
649
+
650
+ done, not_done = concurrent.futures.wait(
651
+ self._futures, timeout=timeout, return_when=concurrent.futures.ALL_COMPLETED
652
+ )
653
+
654
+ # Check for exceptions in completed tasks
655
+ for future in done:
656
+ try:
657
+ future.result()
658
+ except Exception:
659
+ logger.error("Handler failed during shutdown", exc_info=True)
660
+
661
+ return len(not_done) == 0
662
+
663
+ def run_competitive_tasks(
664
+ self, tasks: dict[str, Callable[[threading.Event], Any]], timeout: float = 10.0
665
+ ) -> tuple[str, Any] | None:
666
+ """
667
+ Run multiple tasks in a competitive race, returning the result of the first task to complete.
668
+
669
+ Args:
670
+ tasks: Dictionary mapping task names to task functions that accept a stop_flag parameter
671
+ timeout: Maximum time to wait for any task to complete (in seconds)
672
+
673
+ Returns:
674
+ Tuple of (task_name, result) from the winning task, or None if no task completes
675
+ """
676
+ logger.info(f"Starting competitive execution of {len(tasks)} tasks")
677
+ return self.thread_manager.run_race(tasks, timeout)
678
+
679
+ def run_multiple_tasks(
680
+ self,
681
+ tasks: dict[str, tuple[Callable, tuple]],
682
+ use_thread_pool: bool | None = None,
683
+ timeout: float | None = None,
684
+ ) -> dict[str, Any]:
685
+ """
686
+ Execute multiple tasks concurrently and return all results.
687
+
688
+ Args:
689
+ tasks: Dictionary mapping task names to (task_execution_function, task_execution_parameters) tuples
690
+ use_thread_pool: Whether to use ThreadPoolExecutor. If None, uses dispatcher's parallel mode setting
691
+ timeout: Maximum time to wait for all tasks to complete (in seconds). If None, uses config default.
692
+
693
+ Returns:
694
+ Dictionary mapping task names to their results
695
+
696
+ Raises:
697
+ TimeoutError: If tasks don't complete within the specified timeout
698
+ """
699
+ # Use dispatcher's parallel mode setting if not explicitly specified
700
+ if use_thread_pool is None:
701
+ use_thread_pool = self.enable_parallel_dispatch
702
+
703
+ # Use config timeout if not explicitly provided
704
+ if timeout is None:
705
+ timeout = self.multi_task_running_timeout
706
+
707
+ logger.info(
708
+ f"Executing {len(tasks)} tasks concurrently (thread_pool: {use_thread_pool}, timeout: {timeout})"
709
+ )
710
+
711
+ try:
712
+ results = self.thread_manager.run_multiple_tasks(
713
+ tasks=tasks, use_thread_pool=use_thread_pool, timeout=timeout
714
+ )
715
+ logger.info(
716
+ f"Successfully completed {len([r for r in results.values() if r is not None])}/{len(tasks)} tasks"
717
+ )
718
+ return results
719
+ except Exception as e:
720
+ logger.error(f"Multiple tasks execution failed: {e}", exc_info=True)
721
+ raise
722
+
723
+ def shutdown(self) -> None:
724
+ """Gracefully shutdown the dispatcher."""
725
+ self._running = False
726
+
727
+ # Shutdown executor
728
+ try:
729
+ self.dispatcher_executor.shutdown(wait=self.stop_wait, cancel_futures=True)
730
+ except Exception as e:
731
+ logger.error(f"Executor shutdown error: {e}", exc_info=True)
732
+ finally:
733
+ self._futures.clear()
734
+
735
+ def __enter__(self):
736
+ self._running = True
737
+ return self
738
+
739
+ def __exit__(self, exc_type, exc_val, exc_tb):
740
+ self.shutdown()