MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,324 @@
1
+ # memos/reranker/http_bge.py
2
+ from __future__ import annotations
3
+
4
+ import re
5
+
6
+ from collections.abc import Iterable
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ import requests
10
+
11
+ from memos.log import get_logger
12
+ from memos.utils import timed_with_status
13
+
14
+ from .base import BaseReranker
15
+ from .concat import concat_original_source
16
+
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ if TYPE_CHECKING:
22
+ from memos.memories.textual.item import TextualMemoryItem
23
+
24
+ # Strip a leading "[...]" tag (e.g., "[2025-09-01] ..." or "[meta] ...")
25
+ # before sending text to the reranker. This keeps inputs clean and
26
+ # avoids misleading the model with bracketed prefixes.
27
+ _TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
28
+ DEFAULT_BOOST_WEIGHTS = {"user_id": 0.5, "tags": 0.2, "session_id": 0.3}
29
+
30
+
31
+ def _value_matches(item_value: Any, wanted: Any) -> bool:
32
+ """
33
+ Generic matching:
34
+ - if item_value is list/tuple/set: check membership (any match if wanted is iterable)
35
+ - else: equality (any match if wanted is iterable)
36
+ """
37
+
38
+ def _iterable(x):
39
+ # exclude strings from "iterable"
40
+ return isinstance(x, Iterable) and not isinstance(x, str | bytes)
41
+
42
+ if _iterable(item_value):
43
+ if _iterable(wanted):
44
+ return any(w in item_value for w in wanted)
45
+ return wanted in item_value
46
+ else:
47
+ if _iterable(wanted):
48
+ return any(item_value == w for w in wanted)
49
+ return item_value == wanted
50
+
51
+
52
+ class HTTPBGEReranker(BaseReranker):
53
+ """
54
+ HTTP-based BGE reranker.
55
+
56
+ This class sends (query, documents[]) to a remote HTTP endpoint that
57
+ performs cross-encoder-style re-ranking (e.g., BGE reranker) and returns
58
+ relevance scores. It then maps those scores back onto the original
59
+ TextualMemoryItem list and returns (item, score) pairs sorted by score.
60
+
61
+ Notes
62
+ -----
63
+ - The endpoint is expected to accept JSON:
64
+ {
65
+ "model": "<model-name>",
66
+ "query": "<query text>",
67
+ "documents": ["doc1", "doc2", ...]
68
+ }
69
+ - Two response shapes are supported:
70
+ 1) {"results": [{"index": <int>, "relevance_score": <float>}, ...]}
71
+ where "index" refers to the *position in the documents array*.
72
+ 2) {"data": [{"score": <float>}, ...]} (aligned by list order)
73
+ - If the service fails or responds unexpectedly, this falls back to
74
+ returning the original items with 0.0 scores (best-effort).
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ reranker_url: str,
80
+ token: str = "",
81
+ model: str = "bge-reranker-v2-m3",
82
+ timeout: int = 10,
83
+ max_query_tokens: int | None = None,
84
+ concate_len: int | None = None,
85
+ headers_extra: dict | None = None,
86
+ rerank_source: str | None = None,
87
+ boost_weights: dict[str, float] | None = None,
88
+ boost_default: float = 0.0,
89
+ warn_unknown_filter_keys: bool = True,
90
+ **kwargs,
91
+ ):
92
+ """
93
+ Parameters
94
+ ----------
95
+ reranker_url : str
96
+ HTTP endpoint for the reranker service.
97
+ token : str, optional
98
+ Bearer token for auth. If non-empty, added to the Authorization header.
99
+ model : str, optional
100
+ Model identifier understood by the server.
101
+ timeout : int, optional
102
+ Request timeout (seconds).
103
+ headers_extra : dict | None, optional
104
+ Additional headers to merge into the request headers.
105
+ """
106
+ if not reranker_url:
107
+ raise ValueError("reranker_url must not be empty")
108
+ self.reranker_url = reranker_url
109
+ self.token = token or ""
110
+ self.model = model
111
+ self.timeout = timeout
112
+ self.max_query_tokens = max_query_tokens
113
+ self.concate_len = concate_len
114
+ self.headers_extra = headers_extra or {}
115
+ self.rerank_source = rerank_source
116
+
117
+ self.boost_weights = (
118
+ DEFAULT_BOOST_WEIGHTS.copy()
119
+ if boost_weights is None
120
+ else {k: float(v) for k, v in boost_weights.items()}
121
+ )
122
+ self.boost_default = float(boost_default)
123
+ self.warn_unknown_filter_keys = bool(warn_unknown_filter_keys)
124
+ self._warned_missing_keys: set[str] = set()
125
+
126
+ @timed_with_status(
127
+ log_prefix="model_timed_rerank",
128
+ log_extra_args={"model_name_or_path": "reranker"},
129
+ fallback=lambda exc, self, query, graph_results, top_k, *a, **kw: [
130
+ (item, 0.0) for item in graph_results[:top_k]
131
+ ],
132
+ )
133
+ def rerank(
134
+ self,
135
+ query: str,
136
+ graph_results: list[TextualMemoryItem] | list[dict[str, Any]],
137
+ top_k: int,
138
+ search_priority: dict | None = None,
139
+ **kwargs,
140
+ ) -> list[tuple[TextualMemoryItem, float]]:
141
+ """
142
+ Rank candidate memories by relevance to the query.
143
+
144
+ Parameters
145
+ ----------
146
+ query : str
147
+ The search query.
148
+ graph_results : list[TextualMemoryItem]
149
+ Candidate items to re-rank. Each item is expected to have a
150
+ `.memory` str field; non-strings are ignored.
151
+ top_k : int
152
+ Return at most this many items.
153
+ search_priority : dict | None, optional
154
+ Currently unused. Present to keep signature compatible.
155
+
156
+ Returns
157
+ -------
158
+ list[tuple[TextualMemoryItem, float]]
159
+ Re-ranked items with scores, sorted descending by score.
160
+ """
161
+
162
+ if self.max_query_tokens and len(query) > self.max_query_tokens:
163
+ single_concate_len = self.concate_len // 2
164
+ query = query[:single_concate_len] + "\n" + query[-single_concate_len:]
165
+
166
+ if not graph_results:
167
+ return []
168
+
169
+ # Build a mapping from "payload docs index" -> "original graph_results index"
170
+ # Only include items that have a non-empty string memory. This ensures that
171
+ # any index returned by the server can be mapped back correctly.
172
+ if self.rerank_source:
173
+ documents = concat_original_source(graph_results, self.rerank_source)
174
+ else:
175
+ documents = []
176
+ filtered_graph_results = []
177
+ for item in graph_results:
178
+ m = item.get("memory") if isinstance(item, dict) else getattr(item, "memory", None)
179
+
180
+ if isinstance(m, str) and m:
181
+ documents.append(_TAG1.sub("", m))
182
+ filtered_graph_results.append(item)
183
+ graph_results = filtered_graph_results
184
+
185
+ logger.info(f"[HTTPBGERerankerSample] query: {query} , documents: {documents[:5]}...")
186
+
187
+ if not documents:
188
+ return []
189
+
190
+ headers = {"Content-Type": "application/json", **self.headers_extra}
191
+ payload = {"model": self.model, "query": query, "documents": documents}
192
+
193
+ # Make the HTTP request to the reranker service
194
+ resp = requests.post(self.reranker_url, headers=headers, json=payload, timeout=self.timeout)
195
+ resp.raise_for_status()
196
+ data = resp.json()
197
+
198
+ scored_items: list[tuple[TextualMemoryItem, float]] = []
199
+
200
+ if "results" in data:
201
+ # Format:
202
+ # dict("results": [{"index": int, "relevance_score": float},
203
+ # ...])
204
+ rows = data.get("results", [])
205
+ for r in rows:
206
+ idx = r.get("index")
207
+ # The returned index refers to 'documents' (i.e., our 'pairs' order),
208
+ # so we must map it back to the original graph_results index.
209
+ if isinstance(idx, int) and 0 <= idx < len(graph_results):
210
+ raw_score = float(r.get("relevance_score", r.get("score", 0.0)))
211
+ item = graph_results[idx]
212
+ # generic boost
213
+ score = self._apply_boost_generic(item, raw_score, search_priority)
214
+ scored_items.append((item, score))
215
+
216
+ scored_items.sort(key=lambda x: x[1], reverse=True)
217
+ return scored_items[: min(top_k, len(scored_items))]
218
+
219
+ elif "data" in data:
220
+ # Format: {"data": [{"score": float}, ...]} aligned by list order
221
+ rows = data.get("data", [])
222
+ # Build a list of scores aligned with our 'documents' (pairs)
223
+ score_list = [float(r.get("score", 0.0)) for r in rows]
224
+
225
+ if len(score_list) < len(graph_results):
226
+ score_list += [0.0] * (len(graph_results) - len(score_list))
227
+ elif len(score_list) > len(graph_results):
228
+ score_list = score_list[: len(graph_results)]
229
+
230
+ scored_items = []
231
+ for item, raw_score in zip(graph_results, score_list, strict=False):
232
+ score = self._apply_boost_generic(item, raw_score, search_priority)
233
+ scored_items.append((item, score))
234
+
235
+ scored_items.sort(key=lambda x: x[1], reverse=True)
236
+ return scored_items[: min(top_k, len(scored_items))]
237
+
238
+ else:
239
+ # Unexpected response schema: return a 0.0-scored fallback of the first top_k valid docs
240
+ # Note: we use 'pairs' to keep alignment with valid (string) docs.
241
+ return [(item, 0.0) for item in graph_results[:top_k]]
242
+
243
+ def _get_attr_or_key(self, obj: Any, key: str) -> Any:
244
+ """
245
+ Resolve `key` on `obj` with one-level fallback into `obj.metadata`.
246
+
247
+ Priority:
248
+ 1) obj.<key>
249
+ 2) obj[key]
250
+ 3) obj.metadata.<key>
251
+ 4) obj.metadata[key]
252
+ """
253
+ if obj is None:
254
+ return None
255
+
256
+ # support input like "metadata.user_id"
257
+ if "." in key:
258
+ head, tail = key.split(".", 1)
259
+ base = self._get_attr_or_key(obj, head)
260
+ return self._get_attr_or_key(base, tail)
261
+
262
+ def _resolve(o: Any, k: str):
263
+ if o is None:
264
+ return None
265
+ v = getattr(o, k, None)
266
+ if v is not None:
267
+ return v
268
+ if hasattr(o, "get"):
269
+ try:
270
+ return o.get(k)
271
+ except Exception:
272
+ return None
273
+ return None
274
+
275
+ # 1) find in obj
276
+ v = _resolve(obj, key)
277
+ if v is not None:
278
+ return v
279
+
280
+ # 2) find in obj.metadata
281
+ meta = _resolve(obj, "metadata")
282
+ if meta is not None:
283
+ return _resolve(meta, key)
284
+
285
+ return None
286
+
287
+ def _apply_boost_generic(
288
+ self,
289
+ item: TextualMemoryItem,
290
+ base_score: float,
291
+ search_filter: dict | None,
292
+ ) -> float:
293
+ """
294
+ Multiply base_score by (1 + weight) for each matching key in search_filter.
295
+ - key resolution: self._get_attr_or_key(item, key)
296
+ - weight = boost_weights.get(key, self.boost_default)
297
+ - unknown key -> one-time warning
298
+ """
299
+ if not search_filter:
300
+ return base_score
301
+
302
+ score = float(base_score)
303
+
304
+ for key, wanted in search_filter.items():
305
+ # _get_attr_or_key automatically find key in item and
306
+ # item.metadata ("metadata.user_id" supported)
307
+ resolved = self._get_attr_or_key(item, key)
308
+
309
+ if resolved is None:
310
+ if self.warn_unknown_filter_keys and key not in self._warned_missing_keys:
311
+ logger.warning(
312
+ "[HTTPBGEReranker] search_filter key '%s' not found on TextualMemoryItem or metadata",
313
+ key,
314
+ )
315
+ self._warned_missing_keys.add(key)
316
+ continue
317
+
318
+ if _value_matches(resolved, wanted):
319
+ w = float(self.boost_weights.get(key, self.boost_default))
320
+ if w != 0.0:
321
+ score *= 1.0 + w
322
+ score = min(max(0.0, score), 1.0)
323
+
324
+ return score
@@ -0,0 +1,327 @@
1
+ # memos/reranker/http_bge.py
2
+ from __future__ import annotations
3
+
4
+ import re
5
+
6
+ from collections.abc import Iterable
7
+ from typing import TYPE_CHECKING, Any
8
+
9
+ import requests
10
+
11
+ from memos.log import get_logger
12
+ from memos.reranker.strategies import RerankerStrategyFactory
13
+ from memos.utils import timed
14
+
15
+ from .base import BaseReranker
16
+
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ if TYPE_CHECKING:
22
+ from memos.memories.textual.item import TextualMemoryItem
23
+
24
+ # Strip a leading "[...]" tag (e.g., "[2025-09-01] ..." or "[meta] ...")
25
+ # before sending text to the reranker. This keeps inputs clean and
26
+ # avoids misleading the model with bracketed prefixes.
27
+ _TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
28
+ DEFAULT_BOOST_WEIGHTS = {"user_id": 0.5, "tags": 0.2, "session_id": 0.3}
29
+
30
+
31
+ def _value_matches(item_value: Any, wanted: Any) -> bool:
32
+ """
33
+ Generic matching:
34
+ - if item_value is list/tuple/set: check membership (any match if wanted is iterable)
35
+ - else: equality (any match if wanted is iterable)
36
+ """
37
+
38
+ def _iterable(x):
39
+ # exclude strings from "iterable"
40
+ return isinstance(x, Iterable) and not isinstance(x, str | bytes)
41
+
42
+ if _iterable(item_value):
43
+ if _iterable(wanted):
44
+ return any(w in item_value for w in wanted)
45
+ return wanted in item_value
46
+ else:
47
+ if _iterable(wanted):
48
+ return any(item_value == w for w in wanted)
49
+ return item_value == wanted
50
+
51
+
52
+ class HTTPBGERerankerStrategy(BaseReranker):
53
+ """
54
+ HTTP-based BGE reranker.
55
+
56
+ This class sends (query, documents[]) to a remote HTTP endpoint that
57
+ performs cross-encoder-style re-ranking (e.g., BGE reranker) and returns
58
+ relevance scores. It then maps those scores back onto the original
59
+ TextualMemoryItem list and returns (item, score) pairs sorted by score.
60
+
61
+ Notes
62
+ -----
63
+ - The endpoint is expected to accept JSON:
64
+ {
65
+ "model": "<model-name>",
66
+ "query": "<query text>",
67
+ "documents": ["doc1", "doc2", ...]
68
+ }
69
+ - Two response shapes are supported:
70
+ 1) {"results": [{"index": <int>, "relevance_score": <float>}, ...]}
71
+ where "index" refers to the *position in the documents array*.
72
+ 2) {"data": [{"score": <float>}, ...]} (aligned by list order)
73
+ - If the service fails or responds unexpectedly, this falls back to
74
+ returning the original items with 0.0 scores (best-effort).
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ reranker_url: str,
80
+ token: str = "",
81
+ model: str = "bge-reranker-v2-m3",
82
+ timeout: int = 10,
83
+ max_query_tokens: int | None = None,
84
+ concate_len: int | None = None,
85
+ headers_extra: dict | None = None,
86
+ rerank_source: str | None = None,
87
+ boost_weights: dict[str, float] | None = None,
88
+ boost_default: float = 0.0,
89
+ warn_unknown_filter_keys: bool = True,
90
+ reranker_strategy: str = "single_turn",
91
+ **kwargs,
92
+ ):
93
+ """
94
+ Parameters
95
+ ----------
96
+ reranker_url : str
97
+ HTTP endpoint for the reranker service.
98
+ token : str, optional
99
+ Bearer token for auth. If non-empty, added to the Authorization header.
100
+ model : str, optional
101
+ Model identifier understood by the server.
102
+ timeout : int, optional
103
+ Request timeout (seconds).
104
+ headers_extra : dict | None, optional
105
+ Additional headers to merge into the request headers.
106
+ """
107
+ if not reranker_url:
108
+ raise ValueError("reranker_url must not be empty")
109
+ self.reranker_url = reranker_url
110
+ self.token = token or ""
111
+ self.model = model
112
+ self.timeout = timeout
113
+ self.max_query_tokens = max_query_tokens
114
+ self.concate_len = concate_len
115
+ self.headers_extra = headers_extra or {}
116
+
117
+ self.boost_weights = (
118
+ DEFAULT_BOOST_WEIGHTS.copy()
119
+ if boost_weights is None
120
+ else {k: float(v) for k, v in boost_weights.items()}
121
+ )
122
+ self.boost_default = float(boost_default)
123
+ self.warn_unknown_filter_keys = bool(warn_unknown_filter_keys)
124
+ self._warned_missing_keys: set[str] = set()
125
+ self.reranker_strategy = RerankerStrategyFactory.from_config(reranker_strategy)
126
+
127
+ @timed(log=True, log_prefix="RerankerStrategy")
128
+ def rerank(
129
+ self,
130
+ query: str,
131
+ graph_results: list[TextualMemoryItem],
132
+ top_k: int,
133
+ search_filter: dict | None = None,
134
+ **kwargs,
135
+ ) -> list[tuple[TextualMemoryItem, float]]:
136
+ """
137
+ Rank candidate memories by relevance to the query.
138
+
139
+ Parameters
140
+ ----------
141
+ query : str
142
+ The search query.
143
+ graph_results : list[TextualMemoryItem]
144
+ Candidate items to re-rank. Each item is expected to have a
145
+ `.memory` str field; non-strings are ignored.
146
+ top_k : int
147
+ Return at most this many items.
148
+ search_filter : dict | None
149
+ Currently unused. Present to keep signature compatible.
150
+
151
+ Returns
152
+ -------
153
+ list[tuple[TextualMemoryItem, float]]
154
+ Re-ranked items with scores, sorted descending by score.
155
+ """
156
+ if self.max_query_tokens and len(query) > self.max_query_tokens:
157
+ single_concate_len = self.concate_len // 2
158
+ query = query[:single_concate_len] + "\n" + query[-single_concate_len:]
159
+
160
+ if not graph_results:
161
+ return []
162
+
163
+ tracker, original_items, documents = self.reranker_strategy.prepare_documents(
164
+ query, graph_results, top_k
165
+ )
166
+
167
+ logger.info(
168
+ f"[HTTPBGEWithSourceReranker] strategy: {self.reranker_strategy}, "
169
+ f"query: {query}, documents count: {len(documents)}"
170
+ )
171
+ logger.info(f"[HTTPBGEWithSourceReranker] sample documents: {documents[:3]}...")
172
+
173
+ if not documents:
174
+ return []
175
+
176
+ headers = {"Content-Type": "application/json", **self.headers_extra}
177
+ payload = {"model": self.model, "query": query, "documents": documents}
178
+
179
+ try:
180
+ # Make the HTTP request to the reranker service
181
+ resp = requests.post(
182
+ self.reranker_url, headers=headers, json=payload, timeout=self.timeout
183
+ )
184
+ resp.raise_for_status()
185
+ data = resp.json()
186
+
187
+ scored_items: list[tuple[TextualMemoryItem, float]] = []
188
+
189
+ if "results" in data:
190
+ # Format:
191
+ # dict("results": [{"index": int, "relevance_score": float},
192
+ # ...])
193
+ rows = data.get("results", [])
194
+
195
+ ranked_indices = []
196
+ scores = []
197
+ for r in rows:
198
+ idx = r.get("index")
199
+ # The returned index refers to 'documents' (i.e., our 'pairs' order),
200
+ # so we must map it back to the original graph_results index.
201
+ if isinstance(idx, int) and 0 <= idx < len(graph_results):
202
+ raw_score = float(r.get("relevance_score", r.get("score", 0.0)))
203
+ ranked_indices.append(idx)
204
+ scores.append(raw_score)
205
+ reconstructed_items = self.reranker_strategy.reconstruct_items(
206
+ ranked_indices=ranked_indices,
207
+ scores=scores,
208
+ tracker=tracker,
209
+ original_items=original_items,
210
+ top_k=top_k,
211
+ graph_results=graph_results,
212
+ documents=documents,
213
+ )
214
+ return reconstructed_items
215
+
216
+ elif "data" in data:
217
+ # Format: {"data": [{"score": float}, ...]} aligned by list order
218
+ rows = data.get("data", [])
219
+ # Build a list of scores aligned with our 'documents' (pairs)
220
+ score_list = [float(r.get("score", 0.0)) for r in rows]
221
+
222
+ if len(score_list) < len(graph_results):
223
+ score_list += [0.0] * (len(graph_results) - len(score_list))
224
+ elif len(score_list) > len(graph_results):
225
+ score_list = score_list[: len(graph_results)]
226
+
227
+ scored_items = []
228
+ for item, raw_score in zip(graph_results, score_list, strict=False):
229
+ score = self._apply_boost_generic(item, raw_score, search_filter)
230
+ scored_items.append((item, score))
231
+
232
+ scored_items.sort(key=lambda x: x[1], reverse=True)
233
+ return scored_items[: min(top_k, len(scored_items))]
234
+
235
+ else:
236
+ # Unexpected response schema: return a 0.0-scored fallback of the first top_k valid docs
237
+ # Note: we use 'pairs' to keep alignment with valid (string) docs.
238
+ return [(item, 0.0) for item in graph_results[:top_k]]
239
+
240
+ except Exception as e:
241
+ # Network error, timeout, JSON decode error, etc.
242
+ # Degrade gracefully by returning first top_k valid docs with 0.0 score.
243
+ logger.error(f"[HTTPBGEReranker] request failed: {e}")
244
+ return [(item, 0.0) for item in graph_results[:top_k]]
245
+
246
+ def _get_attr_or_key(self, obj: Any, key: str) -> Any:
247
+ """
248
+ Resolve `key` on `obj` with one-level fallback into `obj.metadata`.
249
+
250
+ Priority:
251
+ 1) obj.<key>
252
+ 2) obj[key]
253
+ 3) obj.metadata.<key>
254
+ 4) obj.metadata[key]
255
+ """
256
+ if obj is None:
257
+ return None
258
+
259
+ # support input like "metadata.user_id"
260
+ if "." in key:
261
+ head, tail = key.split(".", 1)
262
+ base = self._get_attr_or_key(obj, head)
263
+ return self._get_attr_or_key(base, tail)
264
+
265
+ def _resolve(o: Any, k: str):
266
+ if o is None:
267
+ return None
268
+ v = getattr(o, k, None)
269
+ if v is not None:
270
+ return v
271
+ if hasattr(o, "get"):
272
+ try:
273
+ return o.get(k)
274
+ except Exception:
275
+ return None
276
+ return None
277
+
278
+ # 1) find in obj
279
+ v = _resolve(obj, key)
280
+ if v is not None:
281
+ return v
282
+
283
+ # 2) find in obj.metadata
284
+ meta = _resolve(obj, "metadata")
285
+ if meta is not None:
286
+ return _resolve(meta, key)
287
+
288
+ return None
289
+
290
+ def _apply_boost_generic(
291
+ self,
292
+ item: TextualMemoryItem,
293
+ base_score: float,
294
+ search_filter: dict | None,
295
+ ) -> float:
296
+ """
297
+ Multiply base_score by (1 + weight) for each matching key in search_filter.
298
+ - key resolution: self._get_attr_or_key(item, key)
299
+ - weight = boost_weights.get(key, self.boost_default)
300
+ - unknown key -> one-time warning
301
+ """
302
+ if not search_filter:
303
+ return base_score
304
+
305
+ score = float(base_score)
306
+
307
+ for key, wanted in search_filter.items():
308
+ # _get_attr_or_key automatically find key in item and
309
+ # item.metadata ("metadata.user_id" supported)
310
+ resolved = self._get_attr_or_key(item, key)
311
+
312
+ if resolved is None:
313
+ if self.warn_unknown_filter_keys and key not in self._warned_missing_keys:
314
+ logger.warning(
315
+ "[HTTPBGEReranker] search_filter key '%s' not found on TextualMemoryItem or metadata",
316
+ key,
317
+ )
318
+ self._warned_missing_keys.add(key)
319
+ continue
320
+
321
+ if _value_matches(resolved, wanted):
322
+ w = float(self.boost_weights.get(key, self.boost_default))
323
+ if w != 0.0:
324
+ score *= 1.0 + w
325
+ score = min(max(0.0, score), 1.0)
326
+
327
+ return score
memos/reranker/noop.py ADDED
@@ -0,0 +1,19 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from memos.utils import timed
6
+
7
+ from .base import BaseReranker
8
+
9
+
10
+ if TYPE_CHECKING:
11
+ from memos.memories.textual.item import TextualMemoryItem
12
+
13
+
14
+ class NoopReranker(BaseReranker):
15
+ @timed
16
+ def rerank(
17
+ self, query: str, graph_results: list, top_k: int, **kwargs
18
+ ) -> list[tuple[TextualMemoryItem, float]]:
19
+ return [(item, 0.0) for item in graph_results[:top_k]]
@@ -0,0 +1,4 @@
1
+ from .factory import RerankerStrategyFactory
2
+
3
+
4
+ __all__ = ["RerankerStrategyFactory"]