MemoryOS 2.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memoryos-2.0.3.dist-info/METADATA +418 -0
- memoryos-2.0.3.dist-info/RECORD +315 -0
- memoryos-2.0.3.dist-info/WHEEL +4 -0
- memoryos-2.0.3.dist-info/entry_points.txt +3 -0
- memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
- memos/__init__.py +20 -0
- memos/api/client.py +571 -0
- memos/api/config.py +1018 -0
- memos/api/context/dependencies.py +50 -0
- memos/api/exceptions.py +53 -0
- memos/api/handlers/__init__.py +62 -0
- memos/api/handlers/add_handler.py +158 -0
- memos/api/handlers/base_handler.py +194 -0
- memos/api/handlers/chat_handler.py +1401 -0
- memos/api/handlers/component_init.py +388 -0
- memos/api/handlers/config_builders.py +190 -0
- memos/api/handlers/feedback_handler.py +93 -0
- memos/api/handlers/formatters_handler.py +237 -0
- memos/api/handlers/memory_handler.py +316 -0
- memos/api/handlers/scheduler_handler.py +497 -0
- memos/api/handlers/search_handler.py +222 -0
- memos/api/handlers/suggestion_handler.py +117 -0
- memos/api/mcp_serve.py +614 -0
- memos/api/middleware/request_context.py +101 -0
- memos/api/product_api.py +38 -0
- memos/api/product_models.py +1206 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +477 -0
- memos/api/routers/server_router.py +394 -0
- memos/api/server_api.py +44 -0
- memos/api/start_api.py +433 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/charactertext_chunker.py +41 -0
- memos/chunkers/factory.py +24 -0
- memos/chunkers/markdown_chunker.py +62 -0
- memos/chunkers/sentence_chunker.py +54 -0
- memos/chunkers/simple_chunker.py +50 -0
- memos/cli.py +113 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +59 -0
- memos/configs/embedder.py +88 -0
- memos/configs/graph_db.py +236 -0
- memos/configs/internet_retriever.py +100 -0
- memos/configs/llm.py +151 -0
- memos/configs/mem_agent.py +54 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +105 -0
- memos/configs/mem_os.py +83 -0
- memos/configs/mem_reader.py +91 -0
- memos/configs/mem_scheduler.py +385 -0
- memos/configs/mem_user.py +70 -0
- memos/configs/memory.py +324 -0
- memos/configs/parser.py +38 -0
- memos/configs/reranker.py +18 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +80 -0
- memos/context/context.py +355 -0
- memos/dependency.py +52 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/ark.py +95 -0
- memos/embedders/base.py +106 -0
- memos/embedders/factory.py +29 -0
- memos/embedders/ollama.py +77 -0
- memos/embedders/sentence_transformer.py +49 -0
- memos/embedders/universal_api.py +51 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +274 -0
- memos/graph_dbs/factory.py +27 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/nebular.py +1794 -0
- memos/graph_dbs/neo4j.py +1942 -0
- memos/graph_dbs/neo4j_community.py +1058 -0
- memos/graph_dbs/polardb.py +5446 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +25 -0
- memos/llms/deepseek.py +13 -0
- memos/llms/factory.py +38 -0
- memos/llms/hf.py +443 -0
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +135 -0
- memos/llms/openai.py +222 -0
- memos/llms/openai_new.py +198 -0
- memos/llms/qwen.py +13 -0
- memos/llms/utils.py +14 -0
- memos/llms/vllm.py +218 -0
- memos/log.py +237 -0
- memos/mem_agent/base.py +19 -0
- memos/mem_agent/deepsearch_agent.py +391 -0
- memos/mem_agent/factory.py +36 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +30 -0
- memos/mem_cube/general.py +240 -0
- memos/mem_cube/navie.py +172 -0
- memos/mem_cube/utils.py +169 -0
- memos/mem_feedback/base.py +15 -0
- memos/mem_feedback/feedback.py +1192 -0
- memos/mem_feedback/simple_feedback.py +40 -0
- memos/mem_feedback/utils.py +230 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +1203 -0
- memos/mem_os/main.py +582 -0
- memos/mem_os/product.py +1608 -0
- memos/mem_os/product_server.py +455 -0
- memos/mem_os/utils/default_config.py +359 -0
- memos/mem_os/utils/format_utils.py +1403 -0
- memos/mem_os/utils/reference_utils.py +162 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +47 -0
- memos/mem_reader/factory.py +53 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/multi_modal_struct.py +965 -0
- memos/mem_reader/read_multi_modal/__init__.py +43 -0
- memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
- memos/mem_reader/read_multi_modal/base.py +273 -0
- memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
- memos/mem_reader/read_multi_modal/image_parser.py +359 -0
- memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
- memos/mem_reader/read_multi_modal/string_parser.py +139 -0
- memos/mem_reader/read_multi_modal/system_parser.py +327 -0
- memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
- memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
- memos/mem_reader/read_multi_modal/user_parser.py +218 -0
- memos/mem_reader/read_multi_modal/utils.py +358 -0
- memos/mem_reader/simple_struct.py +912 -0
- memos/mem_reader/strategy_struct.py +163 -0
- memos/mem_reader/utils.py +157 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
- memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +1319 -0
- memos/mem_scheduler/general_modules/__init__.py +0 -0
- memos/mem_scheduler/general_modules/api_misc.py +137 -0
- memos/mem_scheduler/general_modules/base.py +80 -0
- memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
- memos/mem_scheduler/general_modules/misc.py +313 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
- memos/mem_scheduler/general_modules/task_threads.py +315 -0
- memos/mem_scheduler/general_scheduler.py +1495 -0
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
- memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
- memos/mem_scheduler/monitors/general_monitor.py +394 -0
- memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
- memos/mem_scheduler/optimized_scheduler.py +410 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
- memos/mem_scheduler/orm_modules/base_model.py +729 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/orm_modules/redis_model.py +699 -0
- memos/mem_scheduler/scheduler_factory.py +23 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
- memos/mem_scheduler/schemas/api_schemas.py +233 -0
- memos/mem_scheduler/schemas/general_schemas.py +55 -0
- memos/mem_scheduler/schemas/message_schemas.py +173 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
- memos/mem_scheduler/schemas/task_schemas.py +132 -0
- memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
- memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
- memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
- memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
- memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
- memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/api_utils.py +77 -0
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +50 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/metrics.py +125 -0
- memos/mem_scheduler/utils/misc_utils.py +290 -0
- memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
- memos/mem_scheduler/utils/status_tracker.py +229 -0
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
- memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +502 -0
- memos/mem_user/persistent_factory.py +98 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/redis_persistent_user_manager.py +225 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +56 -0
- memos/memories/activation/kv.py +292 -0
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +42 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +92 -0
- memos/memories/textual/general.py +236 -0
- memos/memories/textual/item.py +304 -0
- memos/memories/textual/naive.py +187 -0
- memos/memories/textual/prefer_text_memory/__init__.py +0 -0
- memos/memories/textual/prefer_text_memory/adder.py +504 -0
- memos/memories/textual/prefer_text_memory/config.py +106 -0
- memos/memories/textual/prefer_text_memory/extractor.py +221 -0
- memos/memories/textual/prefer_text_memory/factory.py +85 -0
- memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
- memos/memories/textual/prefer_text_memory/spliter.py +132 -0
- memos/memories/textual/prefer_text_memory/utils.py +93 -0
- memos/memories/textual/preference.py +344 -0
- memos/memories/textual/simple_preference.py +161 -0
- memos/memories/textual/simple_tree.py +69 -0
- memos/memories/textual/tree.py +459 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
- memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
- memos/memos_tools/dinding_report_bot.py +453 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +142 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +310 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/multi_mem_cube/__init__.py +0 -0
- memos/multi_mem_cube/composite_cube.py +86 -0
- memos/multi_mem_cube/single_cube.py +874 -0
- memos/multi_mem_cube/views.py +54 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +21 -0
- memos/parsers/markitdown.py +28 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +25 -0
- memos/reranker/concat.py +103 -0
- memos/reranker/cosine_local.py +102 -0
- memos/reranker/factory.py +72 -0
- memos/reranker/http_bge.py +324 -0
- memos/reranker/http_bge_strategy.py +327 -0
- memos/reranker/noop.py +19 -0
- memos/reranker/strategies/__init__.py +4 -0
- memos/reranker/strategies/base.py +61 -0
- memos/reranker/strategies/concat_background.py +94 -0
- memos/reranker/strategies/concat_docsource.py +110 -0
- memos/reranker/strategies/dialogue_common.py +109 -0
- memos/reranker/strategies/factory.py +31 -0
- memos/reranker/strategies/single_turn.py +107 -0
- memos/reranker/strategies/singleturn_outmem.py +98 -0
- memos/settings.py +10 -0
- memos/templates/__init__.py +0 -0
- memos/templates/advanced_search_prompts.py +211 -0
- memos/templates/cloud_service_prompt.py +107 -0
- memos/templates/instruction_completion.py +66 -0
- memos/templates/mem_agent_prompts.py +85 -0
- memos/templates/mem_feedback_prompts.py +822 -0
- memos/templates/mem_reader_prompts.py +1096 -0
- memos/templates/mem_reader_strategy_prompts.py +238 -0
- memos/templates/mem_scheduler_prompts.py +626 -0
- memos/templates/mem_search_prompts.py +93 -0
- memos/templates/mos_prompts.py +403 -0
- memos/templates/prefer_complete_prompt.py +735 -0
- memos/templates/tool_mem_prompts.py +139 -0
- memos/templates/tree_reorganize_prompts.py +230 -0
- memos/types/__init__.py +34 -0
- memos/types/general_types.py +151 -0
- memos/types/openai_chat_completion_types/__init__.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
- memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
- memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
- memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
- memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
- memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
- memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
- memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
- memos/utils.py +123 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +117 -0
- memos/vec_dbs/factory.py +23 -0
- memos/vec_dbs/item.py +50 -0
- memos/vec_dbs/milvus.py +654 -0
- memos/vec_dbs/qdrant.py +355 -0
|
@@ -0,0 +1,324 @@
|
|
|
1
|
+
# memos/reranker/http_bge.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from memos.log import get_logger
|
|
12
|
+
from memos.utils import timed_with_status
|
|
13
|
+
|
|
14
|
+
from .base import BaseReranker
|
|
15
|
+
from .concat import concat_original_source
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
23
|
+
|
|
24
|
+
# Strip a leading "[...]" tag (e.g., "[2025-09-01] ..." or "[meta] ...")
|
|
25
|
+
# before sending text to the reranker. This keeps inputs clean and
|
|
26
|
+
# avoids misleading the model with bracketed prefixes.
|
|
27
|
+
_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
|
|
28
|
+
DEFAULT_BOOST_WEIGHTS = {"user_id": 0.5, "tags": 0.2, "session_id": 0.3}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _value_matches(item_value: Any, wanted: Any) -> bool:
|
|
32
|
+
"""
|
|
33
|
+
Generic matching:
|
|
34
|
+
- if item_value is list/tuple/set: check membership (any match if wanted is iterable)
|
|
35
|
+
- else: equality (any match if wanted is iterable)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def _iterable(x):
|
|
39
|
+
# exclude strings from "iterable"
|
|
40
|
+
return isinstance(x, Iterable) and not isinstance(x, str | bytes)
|
|
41
|
+
|
|
42
|
+
if _iterable(item_value):
|
|
43
|
+
if _iterable(wanted):
|
|
44
|
+
return any(w in item_value for w in wanted)
|
|
45
|
+
return wanted in item_value
|
|
46
|
+
else:
|
|
47
|
+
if _iterable(wanted):
|
|
48
|
+
return any(item_value == w for w in wanted)
|
|
49
|
+
return item_value == wanted
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class HTTPBGEReranker(BaseReranker):
|
|
53
|
+
"""
|
|
54
|
+
HTTP-based BGE reranker.
|
|
55
|
+
|
|
56
|
+
This class sends (query, documents[]) to a remote HTTP endpoint that
|
|
57
|
+
performs cross-encoder-style re-ranking (e.g., BGE reranker) and returns
|
|
58
|
+
relevance scores. It then maps those scores back onto the original
|
|
59
|
+
TextualMemoryItem list and returns (item, score) pairs sorted by score.
|
|
60
|
+
|
|
61
|
+
Notes
|
|
62
|
+
-----
|
|
63
|
+
- The endpoint is expected to accept JSON:
|
|
64
|
+
{
|
|
65
|
+
"model": "<model-name>",
|
|
66
|
+
"query": "<query text>",
|
|
67
|
+
"documents": ["doc1", "doc2", ...]
|
|
68
|
+
}
|
|
69
|
+
- Two response shapes are supported:
|
|
70
|
+
1) {"results": [{"index": <int>, "relevance_score": <float>}, ...]}
|
|
71
|
+
where "index" refers to the *position in the documents array*.
|
|
72
|
+
2) {"data": [{"score": <float>}, ...]} (aligned by list order)
|
|
73
|
+
- If the service fails or responds unexpectedly, this falls back to
|
|
74
|
+
returning the original items with 0.0 scores (best-effort).
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
reranker_url: str,
|
|
80
|
+
token: str = "",
|
|
81
|
+
model: str = "bge-reranker-v2-m3",
|
|
82
|
+
timeout: int = 10,
|
|
83
|
+
max_query_tokens: int | None = None,
|
|
84
|
+
concate_len: int | None = None,
|
|
85
|
+
headers_extra: dict | None = None,
|
|
86
|
+
rerank_source: str | None = None,
|
|
87
|
+
boost_weights: dict[str, float] | None = None,
|
|
88
|
+
boost_default: float = 0.0,
|
|
89
|
+
warn_unknown_filter_keys: bool = True,
|
|
90
|
+
**kwargs,
|
|
91
|
+
):
|
|
92
|
+
"""
|
|
93
|
+
Parameters
|
|
94
|
+
----------
|
|
95
|
+
reranker_url : str
|
|
96
|
+
HTTP endpoint for the reranker service.
|
|
97
|
+
token : str, optional
|
|
98
|
+
Bearer token for auth. If non-empty, added to the Authorization header.
|
|
99
|
+
model : str, optional
|
|
100
|
+
Model identifier understood by the server.
|
|
101
|
+
timeout : int, optional
|
|
102
|
+
Request timeout (seconds).
|
|
103
|
+
headers_extra : dict | None, optional
|
|
104
|
+
Additional headers to merge into the request headers.
|
|
105
|
+
"""
|
|
106
|
+
if not reranker_url:
|
|
107
|
+
raise ValueError("reranker_url must not be empty")
|
|
108
|
+
self.reranker_url = reranker_url
|
|
109
|
+
self.token = token or ""
|
|
110
|
+
self.model = model
|
|
111
|
+
self.timeout = timeout
|
|
112
|
+
self.max_query_tokens = max_query_tokens
|
|
113
|
+
self.concate_len = concate_len
|
|
114
|
+
self.headers_extra = headers_extra or {}
|
|
115
|
+
self.rerank_source = rerank_source
|
|
116
|
+
|
|
117
|
+
self.boost_weights = (
|
|
118
|
+
DEFAULT_BOOST_WEIGHTS.copy()
|
|
119
|
+
if boost_weights is None
|
|
120
|
+
else {k: float(v) for k, v in boost_weights.items()}
|
|
121
|
+
)
|
|
122
|
+
self.boost_default = float(boost_default)
|
|
123
|
+
self.warn_unknown_filter_keys = bool(warn_unknown_filter_keys)
|
|
124
|
+
self._warned_missing_keys: set[str] = set()
|
|
125
|
+
|
|
126
|
+
@timed_with_status(
|
|
127
|
+
log_prefix="model_timed_rerank",
|
|
128
|
+
log_extra_args={"model_name_or_path": "reranker"},
|
|
129
|
+
fallback=lambda exc, self, query, graph_results, top_k, *a, **kw: [
|
|
130
|
+
(item, 0.0) for item in graph_results[:top_k]
|
|
131
|
+
],
|
|
132
|
+
)
|
|
133
|
+
def rerank(
|
|
134
|
+
self,
|
|
135
|
+
query: str,
|
|
136
|
+
graph_results: list[TextualMemoryItem] | list[dict[str, Any]],
|
|
137
|
+
top_k: int,
|
|
138
|
+
search_priority: dict | None = None,
|
|
139
|
+
**kwargs,
|
|
140
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
141
|
+
"""
|
|
142
|
+
Rank candidate memories by relevance to the query.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
query : str
|
|
147
|
+
The search query.
|
|
148
|
+
graph_results : list[TextualMemoryItem]
|
|
149
|
+
Candidate items to re-rank. Each item is expected to have a
|
|
150
|
+
`.memory` str field; non-strings are ignored.
|
|
151
|
+
top_k : int
|
|
152
|
+
Return at most this many items.
|
|
153
|
+
search_priority : dict | None, optional
|
|
154
|
+
Currently unused. Present to keep signature compatible.
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
list[tuple[TextualMemoryItem, float]]
|
|
159
|
+
Re-ranked items with scores, sorted descending by score.
|
|
160
|
+
"""
|
|
161
|
+
|
|
162
|
+
if self.max_query_tokens and len(query) > self.max_query_tokens:
|
|
163
|
+
single_concate_len = self.concate_len // 2
|
|
164
|
+
query = query[:single_concate_len] + "\n" + query[-single_concate_len:]
|
|
165
|
+
|
|
166
|
+
if not graph_results:
|
|
167
|
+
return []
|
|
168
|
+
|
|
169
|
+
# Build a mapping from "payload docs index" -> "original graph_results index"
|
|
170
|
+
# Only include items that have a non-empty string memory. This ensures that
|
|
171
|
+
# any index returned by the server can be mapped back correctly.
|
|
172
|
+
if self.rerank_source:
|
|
173
|
+
documents = concat_original_source(graph_results, self.rerank_source)
|
|
174
|
+
else:
|
|
175
|
+
documents = []
|
|
176
|
+
filtered_graph_results = []
|
|
177
|
+
for item in graph_results:
|
|
178
|
+
m = item.get("memory") if isinstance(item, dict) else getattr(item, "memory", None)
|
|
179
|
+
|
|
180
|
+
if isinstance(m, str) and m:
|
|
181
|
+
documents.append(_TAG1.sub("", m))
|
|
182
|
+
filtered_graph_results.append(item)
|
|
183
|
+
graph_results = filtered_graph_results
|
|
184
|
+
|
|
185
|
+
logger.info(f"[HTTPBGERerankerSample] query: {query} , documents: {documents[:5]}...")
|
|
186
|
+
|
|
187
|
+
if not documents:
|
|
188
|
+
return []
|
|
189
|
+
|
|
190
|
+
headers = {"Content-Type": "application/json", **self.headers_extra}
|
|
191
|
+
payload = {"model": self.model, "query": query, "documents": documents}
|
|
192
|
+
|
|
193
|
+
# Make the HTTP request to the reranker service
|
|
194
|
+
resp = requests.post(self.reranker_url, headers=headers, json=payload, timeout=self.timeout)
|
|
195
|
+
resp.raise_for_status()
|
|
196
|
+
data = resp.json()
|
|
197
|
+
|
|
198
|
+
scored_items: list[tuple[TextualMemoryItem, float]] = []
|
|
199
|
+
|
|
200
|
+
if "results" in data:
|
|
201
|
+
# Format:
|
|
202
|
+
# dict("results": [{"index": int, "relevance_score": float},
|
|
203
|
+
# ...])
|
|
204
|
+
rows = data.get("results", [])
|
|
205
|
+
for r in rows:
|
|
206
|
+
idx = r.get("index")
|
|
207
|
+
# The returned index refers to 'documents' (i.e., our 'pairs' order),
|
|
208
|
+
# so we must map it back to the original graph_results index.
|
|
209
|
+
if isinstance(idx, int) and 0 <= idx < len(graph_results):
|
|
210
|
+
raw_score = float(r.get("relevance_score", r.get("score", 0.0)))
|
|
211
|
+
item = graph_results[idx]
|
|
212
|
+
# generic boost
|
|
213
|
+
score = self._apply_boost_generic(item, raw_score, search_priority)
|
|
214
|
+
scored_items.append((item, score))
|
|
215
|
+
|
|
216
|
+
scored_items.sort(key=lambda x: x[1], reverse=True)
|
|
217
|
+
return scored_items[: min(top_k, len(scored_items))]
|
|
218
|
+
|
|
219
|
+
elif "data" in data:
|
|
220
|
+
# Format: {"data": [{"score": float}, ...]} aligned by list order
|
|
221
|
+
rows = data.get("data", [])
|
|
222
|
+
# Build a list of scores aligned with our 'documents' (pairs)
|
|
223
|
+
score_list = [float(r.get("score", 0.0)) for r in rows]
|
|
224
|
+
|
|
225
|
+
if len(score_list) < len(graph_results):
|
|
226
|
+
score_list += [0.0] * (len(graph_results) - len(score_list))
|
|
227
|
+
elif len(score_list) > len(graph_results):
|
|
228
|
+
score_list = score_list[: len(graph_results)]
|
|
229
|
+
|
|
230
|
+
scored_items = []
|
|
231
|
+
for item, raw_score in zip(graph_results, score_list, strict=False):
|
|
232
|
+
score = self._apply_boost_generic(item, raw_score, search_priority)
|
|
233
|
+
scored_items.append((item, score))
|
|
234
|
+
|
|
235
|
+
scored_items.sort(key=lambda x: x[1], reverse=True)
|
|
236
|
+
return scored_items[: min(top_k, len(scored_items))]
|
|
237
|
+
|
|
238
|
+
else:
|
|
239
|
+
# Unexpected response schema: return a 0.0-scored fallback of the first top_k valid docs
|
|
240
|
+
# Note: we use 'pairs' to keep alignment with valid (string) docs.
|
|
241
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|
|
242
|
+
|
|
243
|
+
def _get_attr_or_key(self, obj: Any, key: str) -> Any:
|
|
244
|
+
"""
|
|
245
|
+
Resolve `key` on `obj` with one-level fallback into `obj.metadata`.
|
|
246
|
+
|
|
247
|
+
Priority:
|
|
248
|
+
1) obj.<key>
|
|
249
|
+
2) obj[key]
|
|
250
|
+
3) obj.metadata.<key>
|
|
251
|
+
4) obj.metadata[key]
|
|
252
|
+
"""
|
|
253
|
+
if obj is None:
|
|
254
|
+
return None
|
|
255
|
+
|
|
256
|
+
# support input like "metadata.user_id"
|
|
257
|
+
if "." in key:
|
|
258
|
+
head, tail = key.split(".", 1)
|
|
259
|
+
base = self._get_attr_or_key(obj, head)
|
|
260
|
+
return self._get_attr_or_key(base, tail)
|
|
261
|
+
|
|
262
|
+
def _resolve(o: Any, k: str):
|
|
263
|
+
if o is None:
|
|
264
|
+
return None
|
|
265
|
+
v = getattr(o, k, None)
|
|
266
|
+
if v is not None:
|
|
267
|
+
return v
|
|
268
|
+
if hasattr(o, "get"):
|
|
269
|
+
try:
|
|
270
|
+
return o.get(k)
|
|
271
|
+
except Exception:
|
|
272
|
+
return None
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
# 1) find in obj
|
|
276
|
+
v = _resolve(obj, key)
|
|
277
|
+
if v is not None:
|
|
278
|
+
return v
|
|
279
|
+
|
|
280
|
+
# 2) find in obj.metadata
|
|
281
|
+
meta = _resolve(obj, "metadata")
|
|
282
|
+
if meta is not None:
|
|
283
|
+
return _resolve(meta, key)
|
|
284
|
+
|
|
285
|
+
return None
|
|
286
|
+
|
|
287
|
+
def _apply_boost_generic(
|
|
288
|
+
self,
|
|
289
|
+
item: TextualMemoryItem,
|
|
290
|
+
base_score: float,
|
|
291
|
+
search_filter: dict | None,
|
|
292
|
+
) -> float:
|
|
293
|
+
"""
|
|
294
|
+
Multiply base_score by (1 + weight) for each matching key in search_filter.
|
|
295
|
+
- key resolution: self._get_attr_or_key(item, key)
|
|
296
|
+
- weight = boost_weights.get(key, self.boost_default)
|
|
297
|
+
- unknown key -> one-time warning
|
|
298
|
+
"""
|
|
299
|
+
if not search_filter:
|
|
300
|
+
return base_score
|
|
301
|
+
|
|
302
|
+
score = float(base_score)
|
|
303
|
+
|
|
304
|
+
for key, wanted in search_filter.items():
|
|
305
|
+
# _get_attr_or_key automatically find key in item and
|
|
306
|
+
# item.metadata ("metadata.user_id" supported)
|
|
307
|
+
resolved = self._get_attr_or_key(item, key)
|
|
308
|
+
|
|
309
|
+
if resolved is None:
|
|
310
|
+
if self.warn_unknown_filter_keys and key not in self._warned_missing_keys:
|
|
311
|
+
logger.warning(
|
|
312
|
+
"[HTTPBGEReranker] search_filter key '%s' not found on TextualMemoryItem or metadata",
|
|
313
|
+
key,
|
|
314
|
+
)
|
|
315
|
+
self._warned_missing_keys.add(key)
|
|
316
|
+
continue
|
|
317
|
+
|
|
318
|
+
if _value_matches(resolved, wanted):
|
|
319
|
+
w = float(self.boost_weights.get(key, self.boost_default))
|
|
320
|
+
if w != 0.0:
|
|
321
|
+
score *= 1.0 + w
|
|
322
|
+
score = min(max(0.0, score), 1.0)
|
|
323
|
+
|
|
324
|
+
return score
|
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
# memos/reranker/http_bge.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from memos.log import get_logger
|
|
12
|
+
from memos.reranker.strategies import RerankerStrategyFactory
|
|
13
|
+
from memos.utils import timed
|
|
14
|
+
|
|
15
|
+
from .base import BaseReranker
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
23
|
+
|
|
24
|
+
# Strip a leading "[...]" tag (e.g., "[2025-09-01] ..." or "[meta] ...")
|
|
25
|
+
# before sending text to the reranker. This keeps inputs clean and
|
|
26
|
+
# avoids misleading the model with bracketed prefixes.
|
|
27
|
+
_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
|
|
28
|
+
DEFAULT_BOOST_WEIGHTS = {"user_id": 0.5, "tags": 0.2, "session_id": 0.3}
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _value_matches(item_value: Any, wanted: Any) -> bool:
|
|
32
|
+
"""
|
|
33
|
+
Generic matching:
|
|
34
|
+
- if item_value is list/tuple/set: check membership (any match if wanted is iterable)
|
|
35
|
+
- else: equality (any match if wanted is iterable)
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def _iterable(x):
|
|
39
|
+
# exclude strings from "iterable"
|
|
40
|
+
return isinstance(x, Iterable) and not isinstance(x, str | bytes)
|
|
41
|
+
|
|
42
|
+
if _iterable(item_value):
|
|
43
|
+
if _iterable(wanted):
|
|
44
|
+
return any(w in item_value for w in wanted)
|
|
45
|
+
return wanted in item_value
|
|
46
|
+
else:
|
|
47
|
+
if _iterable(wanted):
|
|
48
|
+
return any(item_value == w for w in wanted)
|
|
49
|
+
return item_value == wanted
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class HTTPBGERerankerStrategy(BaseReranker):
|
|
53
|
+
"""
|
|
54
|
+
HTTP-based BGE reranker.
|
|
55
|
+
|
|
56
|
+
This class sends (query, documents[]) to a remote HTTP endpoint that
|
|
57
|
+
performs cross-encoder-style re-ranking (e.g., BGE reranker) and returns
|
|
58
|
+
relevance scores. It then maps those scores back onto the original
|
|
59
|
+
TextualMemoryItem list and returns (item, score) pairs sorted by score.
|
|
60
|
+
|
|
61
|
+
Notes
|
|
62
|
+
-----
|
|
63
|
+
- The endpoint is expected to accept JSON:
|
|
64
|
+
{
|
|
65
|
+
"model": "<model-name>",
|
|
66
|
+
"query": "<query text>",
|
|
67
|
+
"documents": ["doc1", "doc2", ...]
|
|
68
|
+
}
|
|
69
|
+
- Two response shapes are supported:
|
|
70
|
+
1) {"results": [{"index": <int>, "relevance_score": <float>}, ...]}
|
|
71
|
+
where "index" refers to the *position in the documents array*.
|
|
72
|
+
2) {"data": [{"score": <float>}, ...]} (aligned by list order)
|
|
73
|
+
- If the service fails or responds unexpectedly, this falls back to
|
|
74
|
+
returning the original items with 0.0 scores (best-effort).
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
reranker_url: str,
|
|
80
|
+
token: str = "",
|
|
81
|
+
model: str = "bge-reranker-v2-m3",
|
|
82
|
+
timeout: int = 10,
|
|
83
|
+
max_query_tokens: int | None = None,
|
|
84
|
+
concate_len: int | None = None,
|
|
85
|
+
headers_extra: dict | None = None,
|
|
86
|
+
rerank_source: str | None = None,
|
|
87
|
+
boost_weights: dict[str, float] | None = None,
|
|
88
|
+
boost_default: float = 0.0,
|
|
89
|
+
warn_unknown_filter_keys: bool = True,
|
|
90
|
+
reranker_strategy: str = "single_turn",
|
|
91
|
+
**kwargs,
|
|
92
|
+
):
|
|
93
|
+
"""
|
|
94
|
+
Parameters
|
|
95
|
+
----------
|
|
96
|
+
reranker_url : str
|
|
97
|
+
HTTP endpoint for the reranker service.
|
|
98
|
+
token : str, optional
|
|
99
|
+
Bearer token for auth. If non-empty, added to the Authorization header.
|
|
100
|
+
model : str, optional
|
|
101
|
+
Model identifier understood by the server.
|
|
102
|
+
timeout : int, optional
|
|
103
|
+
Request timeout (seconds).
|
|
104
|
+
headers_extra : dict | None, optional
|
|
105
|
+
Additional headers to merge into the request headers.
|
|
106
|
+
"""
|
|
107
|
+
if not reranker_url:
|
|
108
|
+
raise ValueError("reranker_url must not be empty")
|
|
109
|
+
self.reranker_url = reranker_url
|
|
110
|
+
self.token = token or ""
|
|
111
|
+
self.model = model
|
|
112
|
+
self.timeout = timeout
|
|
113
|
+
self.max_query_tokens = max_query_tokens
|
|
114
|
+
self.concate_len = concate_len
|
|
115
|
+
self.headers_extra = headers_extra or {}
|
|
116
|
+
|
|
117
|
+
self.boost_weights = (
|
|
118
|
+
DEFAULT_BOOST_WEIGHTS.copy()
|
|
119
|
+
if boost_weights is None
|
|
120
|
+
else {k: float(v) for k, v in boost_weights.items()}
|
|
121
|
+
)
|
|
122
|
+
self.boost_default = float(boost_default)
|
|
123
|
+
self.warn_unknown_filter_keys = bool(warn_unknown_filter_keys)
|
|
124
|
+
self._warned_missing_keys: set[str] = set()
|
|
125
|
+
self.reranker_strategy = RerankerStrategyFactory.from_config(reranker_strategy)
|
|
126
|
+
|
|
127
|
+
@timed(log=True, log_prefix="RerankerStrategy")
|
|
128
|
+
def rerank(
|
|
129
|
+
self,
|
|
130
|
+
query: str,
|
|
131
|
+
graph_results: list[TextualMemoryItem],
|
|
132
|
+
top_k: int,
|
|
133
|
+
search_filter: dict | None = None,
|
|
134
|
+
**kwargs,
|
|
135
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
136
|
+
"""
|
|
137
|
+
Rank candidate memories by relevance to the query.
|
|
138
|
+
|
|
139
|
+
Parameters
|
|
140
|
+
----------
|
|
141
|
+
query : str
|
|
142
|
+
The search query.
|
|
143
|
+
graph_results : list[TextualMemoryItem]
|
|
144
|
+
Candidate items to re-rank. Each item is expected to have a
|
|
145
|
+
`.memory` str field; non-strings are ignored.
|
|
146
|
+
top_k : int
|
|
147
|
+
Return at most this many items.
|
|
148
|
+
search_filter : dict | None
|
|
149
|
+
Currently unused. Present to keep signature compatible.
|
|
150
|
+
|
|
151
|
+
Returns
|
|
152
|
+
-------
|
|
153
|
+
list[tuple[TextualMemoryItem, float]]
|
|
154
|
+
Re-ranked items with scores, sorted descending by score.
|
|
155
|
+
"""
|
|
156
|
+
if self.max_query_tokens and len(query) > self.max_query_tokens:
|
|
157
|
+
single_concate_len = self.concate_len // 2
|
|
158
|
+
query = query[:single_concate_len] + "\n" + query[-single_concate_len:]
|
|
159
|
+
|
|
160
|
+
if not graph_results:
|
|
161
|
+
return []
|
|
162
|
+
|
|
163
|
+
tracker, original_items, documents = self.reranker_strategy.prepare_documents(
|
|
164
|
+
query, graph_results, top_k
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
logger.info(
|
|
168
|
+
f"[HTTPBGEWithSourceReranker] strategy: {self.reranker_strategy}, "
|
|
169
|
+
f"query: {query}, documents count: {len(documents)}"
|
|
170
|
+
)
|
|
171
|
+
logger.info(f"[HTTPBGEWithSourceReranker] sample documents: {documents[:3]}...")
|
|
172
|
+
|
|
173
|
+
if not documents:
|
|
174
|
+
return []
|
|
175
|
+
|
|
176
|
+
headers = {"Content-Type": "application/json", **self.headers_extra}
|
|
177
|
+
payload = {"model": self.model, "query": query, "documents": documents}
|
|
178
|
+
|
|
179
|
+
try:
|
|
180
|
+
# Make the HTTP request to the reranker service
|
|
181
|
+
resp = requests.post(
|
|
182
|
+
self.reranker_url, headers=headers, json=payload, timeout=self.timeout
|
|
183
|
+
)
|
|
184
|
+
resp.raise_for_status()
|
|
185
|
+
data = resp.json()
|
|
186
|
+
|
|
187
|
+
scored_items: list[tuple[TextualMemoryItem, float]] = []
|
|
188
|
+
|
|
189
|
+
if "results" in data:
|
|
190
|
+
# Format:
|
|
191
|
+
# dict("results": [{"index": int, "relevance_score": float},
|
|
192
|
+
# ...])
|
|
193
|
+
rows = data.get("results", [])
|
|
194
|
+
|
|
195
|
+
ranked_indices = []
|
|
196
|
+
scores = []
|
|
197
|
+
for r in rows:
|
|
198
|
+
idx = r.get("index")
|
|
199
|
+
# The returned index refers to 'documents' (i.e., our 'pairs' order),
|
|
200
|
+
# so we must map it back to the original graph_results index.
|
|
201
|
+
if isinstance(idx, int) and 0 <= idx < len(graph_results):
|
|
202
|
+
raw_score = float(r.get("relevance_score", r.get("score", 0.0)))
|
|
203
|
+
ranked_indices.append(idx)
|
|
204
|
+
scores.append(raw_score)
|
|
205
|
+
reconstructed_items = self.reranker_strategy.reconstruct_items(
|
|
206
|
+
ranked_indices=ranked_indices,
|
|
207
|
+
scores=scores,
|
|
208
|
+
tracker=tracker,
|
|
209
|
+
original_items=original_items,
|
|
210
|
+
top_k=top_k,
|
|
211
|
+
graph_results=graph_results,
|
|
212
|
+
documents=documents,
|
|
213
|
+
)
|
|
214
|
+
return reconstructed_items
|
|
215
|
+
|
|
216
|
+
elif "data" in data:
|
|
217
|
+
# Format: {"data": [{"score": float}, ...]} aligned by list order
|
|
218
|
+
rows = data.get("data", [])
|
|
219
|
+
# Build a list of scores aligned with our 'documents' (pairs)
|
|
220
|
+
score_list = [float(r.get("score", 0.0)) for r in rows]
|
|
221
|
+
|
|
222
|
+
if len(score_list) < len(graph_results):
|
|
223
|
+
score_list += [0.0] * (len(graph_results) - len(score_list))
|
|
224
|
+
elif len(score_list) > len(graph_results):
|
|
225
|
+
score_list = score_list[: len(graph_results)]
|
|
226
|
+
|
|
227
|
+
scored_items = []
|
|
228
|
+
for item, raw_score in zip(graph_results, score_list, strict=False):
|
|
229
|
+
score = self._apply_boost_generic(item, raw_score, search_filter)
|
|
230
|
+
scored_items.append((item, score))
|
|
231
|
+
|
|
232
|
+
scored_items.sort(key=lambda x: x[1], reverse=True)
|
|
233
|
+
return scored_items[: min(top_k, len(scored_items))]
|
|
234
|
+
|
|
235
|
+
else:
|
|
236
|
+
# Unexpected response schema: return a 0.0-scored fallback of the first top_k valid docs
|
|
237
|
+
# Note: we use 'pairs' to keep alignment with valid (string) docs.
|
|
238
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|
|
239
|
+
|
|
240
|
+
except Exception as e:
|
|
241
|
+
# Network error, timeout, JSON decode error, etc.
|
|
242
|
+
# Degrade gracefully by returning first top_k valid docs with 0.0 score.
|
|
243
|
+
logger.error(f"[HTTPBGEReranker] request failed: {e}")
|
|
244
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|
|
245
|
+
|
|
246
|
+
def _get_attr_or_key(self, obj: Any, key: str) -> Any:
|
|
247
|
+
"""
|
|
248
|
+
Resolve `key` on `obj` with one-level fallback into `obj.metadata`.
|
|
249
|
+
|
|
250
|
+
Priority:
|
|
251
|
+
1) obj.<key>
|
|
252
|
+
2) obj[key]
|
|
253
|
+
3) obj.metadata.<key>
|
|
254
|
+
4) obj.metadata[key]
|
|
255
|
+
"""
|
|
256
|
+
if obj is None:
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
# support input like "metadata.user_id"
|
|
260
|
+
if "." in key:
|
|
261
|
+
head, tail = key.split(".", 1)
|
|
262
|
+
base = self._get_attr_or_key(obj, head)
|
|
263
|
+
return self._get_attr_or_key(base, tail)
|
|
264
|
+
|
|
265
|
+
def _resolve(o: Any, k: str):
|
|
266
|
+
if o is None:
|
|
267
|
+
return None
|
|
268
|
+
v = getattr(o, k, None)
|
|
269
|
+
if v is not None:
|
|
270
|
+
return v
|
|
271
|
+
if hasattr(o, "get"):
|
|
272
|
+
try:
|
|
273
|
+
return o.get(k)
|
|
274
|
+
except Exception:
|
|
275
|
+
return None
|
|
276
|
+
return None
|
|
277
|
+
|
|
278
|
+
# 1) find in obj
|
|
279
|
+
v = _resolve(obj, key)
|
|
280
|
+
if v is not None:
|
|
281
|
+
return v
|
|
282
|
+
|
|
283
|
+
# 2) find in obj.metadata
|
|
284
|
+
meta = _resolve(obj, "metadata")
|
|
285
|
+
if meta is not None:
|
|
286
|
+
return _resolve(meta, key)
|
|
287
|
+
|
|
288
|
+
return None
|
|
289
|
+
|
|
290
|
+
def _apply_boost_generic(
|
|
291
|
+
self,
|
|
292
|
+
item: TextualMemoryItem,
|
|
293
|
+
base_score: float,
|
|
294
|
+
search_filter: dict | None,
|
|
295
|
+
) -> float:
|
|
296
|
+
"""
|
|
297
|
+
Multiply base_score by (1 + weight) for each matching key in search_filter.
|
|
298
|
+
- key resolution: self._get_attr_or_key(item, key)
|
|
299
|
+
- weight = boost_weights.get(key, self.boost_default)
|
|
300
|
+
- unknown key -> one-time warning
|
|
301
|
+
"""
|
|
302
|
+
if not search_filter:
|
|
303
|
+
return base_score
|
|
304
|
+
|
|
305
|
+
score = float(base_score)
|
|
306
|
+
|
|
307
|
+
for key, wanted in search_filter.items():
|
|
308
|
+
# _get_attr_or_key automatically find key in item and
|
|
309
|
+
# item.metadata ("metadata.user_id" supported)
|
|
310
|
+
resolved = self._get_attr_or_key(item, key)
|
|
311
|
+
|
|
312
|
+
if resolved is None:
|
|
313
|
+
if self.warn_unknown_filter_keys and key not in self._warned_missing_keys:
|
|
314
|
+
logger.warning(
|
|
315
|
+
"[HTTPBGEReranker] search_filter key '%s' not found on TextualMemoryItem or metadata",
|
|
316
|
+
key,
|
|
317
|
+
)
|
|
318
|
+
self._warned_missing_keys.add(key)
|
|
319
|
+
continue
|
|
320
|
+
|
|
321
|
+
if _value_matches(resolved, wanted):
|
|
322
|
+
w = float(self.boost_weights.get(key, self.boost_default))
|
|
323
|
+
if w != 0.0:
|
|
324
|
+
score *= 1.0 + w
|
|
325
|
+
score = min(max(0.0, score), 1.0)
|
|
326
|
+
|
|
327
|
+
return score
|
memos/reranker/noop.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from memos.utils import timed
|
|
6
|
+
|
|
7
|
+
from .base import BaseReranker
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class NoopReranker(BaseReranker):
|
|
15
|
+
@timed
|
|
16
|
+
def rerank(
|
|
17
|
+
self, query: str, graph_results: list, top_k: int, **kwargs
|
|
18
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
19
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|