MemoryOS 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/METADATA +8 -2
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/RECORD +92 -69
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/WHEEL +1 -1
- memos/__init__.py +1 -1
- memos/api/client.py +109 -0
- memos/api/config.py +35 -8
- memos/api/context/dependencies.py +15 -66
- memos/api/middleware/request_context.py +63 -0
- memos/api/product_api.py +5 -2
- memos/api/product_models.py +107 -16
- memos/api/routers/product_router.py +62 -19
- memos/api/start_api.py +13 -0
- memos/configs/graph_db.py +4 -0
- memos/configs/mem_scheduler.py +38 -3
- memos/configs/memory.py +13 -0
- memos/configs/reranker.py +18 -0
- memos/context/context.py +255 -0
- memos/embedders/factory.py +2 -0
- memos/graph_dbs/base.py +4 -2
- memos/graph_dbs/nebular.py +368 -223
- memos/graph_dbs/neo4j.py +49 -13
- memos/graph_dbs/neo4j_community.py +13 -3
- memos/llms/factory.py +2 -0
- memos/llms/openai.py +74 -2
- memos/llms/vllm.py +2 -0
- memos/log.py +128 -4
- memos/mem_cube/general.py +3 -1
- memos/mem_os/core.py +89 -23
- memos/mem_os/main.py +3 -6
- memos/mem_os/product.py +418 -154
- memos/mem_os/utils/reference_utils.py +20 -0
- memos/mem_reader/factory.py +2 -0
- memos/mem_reader/simple_struct.py +204 -82
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +569 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +126 -56
- memos/mem_scheduler/general_modules/dispatcher.py +2 -2
- memos/mem_scheduler/general_modules/misc.py +99 -1
- memos/mem_scheduler/general_modules/scheduler_logger.py +17 -11
- memos/mem_scheduler/general_scheduler.py +40 -88
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +308 -0
- memos/mem_scheduler/{general_modules → memory_manage_modules}/retriever.py +34 -7
- memos/mem_scheduler/monitors/dispatcher_monitor.py +9 -8
- memos/mem_scheduler/monitors/general_monitor.py +119 -39
- memos/mem_scheduler/optimized_scheduler.py +124 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/base_model.py +635 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/scheduler_factory.py +2 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +96 -29
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +33 -0
- memos/mem_scheduler/utils/filter_utils.py +1 -1
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_user/mysql_user_manager.py +4 -2
- memos/memories/activation/kv.py +2 -1
- memos/memories/textual/item.py +96 -17
- memos/memories/textual/naive.py +1 -1
- memos/memories/textual/tree.py +57 -3
- memos/memories/textual/tree_text_memory/organize/handler.py +4 -2
- memos/memories/textual/tree_text_memory/organize/manager.py +28 -14
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +1 -2
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +75 -23
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +10 -6
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -2
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +119 -21
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +172 -44
- memos/memories/textual/tree_text_memory/retrieve/utils.py +6 -4
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +5 -4
- memos/memos_tools/notification_utils.py +46 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +22 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/parsers/factory.py +2 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +24 -0
- memos/reranker/concat.py +59 -0
- memos/reranker/cosine_local.py +96 -0
- memos/reranker/factory.py +48 -0
- memos/reranker/http_bge.py +312 -0
- memos/reranker/noop.py +16 -0
- memos/templates/mem_reader_prompts.py +289 -40
- memos/templates/mem_scheduler_prompts.py +242 -0
- memos/templates/mos_prompts.py +133 -60
- memos/types.py +4 -1
- memos/api/context/context.py +0 -147
- memos/mem_scheduler/mos_for_test_scheduler.py +0 -146
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/entry_points.txt +0 -0
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info/licenses}/LICENSE +0 -0
- /memos/mem_scheduler/{general_modules → webservice_modules}/rabbitmq_service.py +0 -0
- /memos/mem_scheduler/{general_modules → webservice_modules}/redis_service.py +0 -0
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# memos/reranker/cosine_local.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING
|
|
5
|
+
|
|
6
|
+
from .base import BaseReranker
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
if TYPE_CHECKING:
|
|
10
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
import numpy as _np
|
|
14
|
+
|
|
15
|
+
_HAS_NUMPY = True
|
|
16
|
+
except Exception:
|
|
17
|
+
_HAS_NUMPY = False
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _cosine_one_to_many(q: list[float], m: list[list[float]]) -> list[float]:
|
|
21
|
+
"""
|
|
22
|
+
Compute cosine similarities between a single vector q and a matrix m (rows are candidates).
|
|
23
|
+
"""
|
|
24
|
+
if not _HAS_NUMPY:
|
|
25
|
+
|
|
26
|
+
def dot(a, b): # lowercase per N806
|
|
27
|
+
return sum(x * y for x, y in zip(a, b, strict=False))
|
|
28
|
+
|
|
29
|
+
def norm(a): # lowercase per N806
|
|
30
|
+
return sum(x * x for x in a) ** 0.5
|
|
31
|
+
|
|
32
|
+
qn = norm(q) or 1e-10
|
|
33
|
+
sims = []
|
|
34
|
+
for v in m:
|
|
35
|
+
vn = norm(v) or 1e-10
|
|
36
|
+
sims.append(dot(q, v) / (qn * vn))
|
|
37
|
+
return sims
|
|
38
|
+
|
|
39
|
+
qv = _np.asarray(q, dtype=float) # lowercase
|
|
40
|
+
mv = _np.asarray(m, dtype=float) # lowercase
|
|
41
|
+
qn = _np.linalg.norm(qv) or 1e-10
|
|
42
|
+
mn = _np.linalg.norm(mv, axis=1) # lowercase
|
|
43
|
+
dots = mv @ qv
|
|
44
|
+
return (dots / (mn * qn + 1e-10)).tolist()
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CosineLocalReranker(BaseReranker):
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
level_weights: dict[str, float] | None = None,
|
|
51
|
+
level_field: str = "background",
|
|
52
|
+
**kwargs,
|
|
53
|
+
):
|
|
54
|
+
self.level_weights = level_weights or {"topic": 1.0, "concept": 1.0, "fact": 1.0}
|
|
55
|
+
self.level_field = level_field
|
|
56
|
+
|
|
57
|
+
def rerank(
|
|
58
|
+
self,
|
|
59
|
+
query: str,
|
|
60
|
+
graph_results: list,
|
|
61
|
+
top_k: int,
|
|
62
|
+
**kwargs,
|
|
63
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
64
|
+
if not graph_results:
|
|
65
|
+
return []
|
|
66
|
+
|
|
67
|
+
query_embedding: list[float] | None = kwargs.get("query_embedding")
|
|
68
|
+
if not query_embedding:
|
|
69
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|
|
70
|
+
|
|
71
|
+
items_with_emb = [
|
|
72
|
+
it
|
|
73
|
+
for it in graph_results
|
|
74
|
+
if getattr(it, "metadata", None) and getattr(it.metadata, "embedding", None)
|
|
75
|
+
]
|
|
76
|
+
if not items_with_emb:
|
|
77
|
+
return [(item, 0.5) for item in graph_results[:top_k]]
|
|
78
|
+
|
|
79
|
+
cand_vecs = [it.metadata.embedding for it in items_with_emb]
|
|
80
|
+
sims = _cosine_one_to_many(query_embedding, cand_vecs)
|
|
81
|
+
|
|
82
|
+
def get_weight(it: TextualMemoryItem) -> float:
|
|
83
|
+
level = getattr(it.metadata, self.level_field, None)
|
|
84
|
+
return self.level_weights.get(level, 1.0)
|
|
85
|
+
|
|
86
|
+
weighted = [sim * get_weight(it) for sim, it in zip(sims, items_with_emb, strict=False)]
|
|
87
|
+
scored_pairs = list(zip(items_with_emb, weighted, strict=False))
|
|
88
|
+
scored_pairs.sort(key=lambda x: x[1], reverse=True)
|
|
89
|
+
|
|
90
|
+
top_items = scored_pairs[:top_k]
|
|
91
|
+
if len(top_items) < top_k:
|
|
92
|
+
chosen = {it.id for it, _ in top_items}
|
|
93
|
+
remain = [(it, -1.0) for it in graph_results if it.id not in chosen]
|
|
94
|
+
top_items.extend(remain[: top_k - len(top_items)])
|
|
95
|
+
|
|
96
|
+
return top_items
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# memos/reranker/factory.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
|
|
6
|
+
# Import singleton decorator
|
|
7
|
+
from memos.memos_tools.singleton import singleton_factory
|
|
8
|
+
|
|
9
|
+
from .cosine_local import CosineLocalReranker
|
|
10
|
+
from .http_bge import HTTPBGEReranker
|
|
11
|
+
from .noop import NoopReranker
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from memos.configs.reranker import RerankerConfigFactory
|
|
16
|
+
|
|
17
|
+
from .base import BaseReranker
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RerankerFactory:
|
|
21
|
+
@staticmethod
|
|
22
|
+
@singleton_factory("RerankerFactory")
|
|
23
|
+
def from_config(cfg: RerankerConfigFactory | None) -> BaseReranker | None:
|
|
24
|
+
if not cfg:
|
|
25
|
+
return None
|
|
26
|
+
|
|
27
|
+
backend = (cfg.backend or "").lower()
|
|
28
|
+
c: dict[str, Any] = cfg.config or {}
|
|
29
|
+
|
|
30
|
+
if backend in {"http_bge", "bge"}:
|
|
31
|
+
return HTTPBGEReranker(
|
|
32
|
+
reranker_url=c.get("url") or c.get("endpoint") or c.get("reranker_url"),
|
|
33
|
+
model=c.get("model", "bge-reranker-v2-m3"),
|
|
34
|
+
timeout=int(c.get("timeout", 10)),
|
|
35
|
+
headers_extra=c.get("headers_extra"),
|
|
36
|
+
rerank_source=c.get("rerank_source"),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
if backend in {"cosine_local", "cosine"}:
|
|
40
|
+
return CosineLocalReranker(
|
|
41
|
+
level_weights=c.get("level_weights"),
|
|
42
|
+
level_field=c.get("level_field", "background"),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
if backend in {"noop", "none", "disabled"}:
|
|
46
|
+
return NoopReranker()
|
|
47
|
+
|
|
48
|
+
raise ValueError(f"Unknown reranker backend: {cfg.backend}")
|
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
# memos/reranker/http_bge.py
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
from collections.abc import Iterable
|
|
7
|
+
from typing import TYPE_CHECKING, Any
|
|
8
|
+
|
|
9
|
+
import requests
|
|
10
|
+
|
|
11
|
+
from memos.log import get_logger
|
|
12
|
+
|
|
13
|
+
from .base import BaseReranker
|
|
14
|
+
from .concat import concat_original_source
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
22
|
+
|
|
23
|
+
# Strip a leading "[...]" tag (e.g., "[2025-09-01] ..." or "[meta] ...")
|
|
24
|
+
# before sending text to the reranker. This keeps inputs clean and
|
|
25
|
+
# avoids misleading the model with bracketed prefixes.
|
|
26
|
+
_TAG1 = re.compile(r"^\s*\[[^\]]*\]\s*")
|
|
27
|
+
DEFAULT_BOOST_WEIGHTS = {"user_id": 0.5, "tags": 0.2, "session_id": 0.3}
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _value_matches(item_value: Any, wanted: Any) -> bool:
|
|
31
|
+
"""
|
|
32
|
+
Generic matching:
|
|
33
|
+
- if item_value is list/tuple/set: check membership (any match if wanted is iterable)
|
|
34
|
+
- else: equality (any match if wanted is iterable)
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def _iterable(x):
|
|
38
|
+
# exclude strings from "iterable"
|
|
39
|
+
return isinstance(x, Iterable) and not isinstance(x, str | bytes)
|
|
40
|
+
|
|
41
|
+
if _iterable(item_value):
|
|
42
|
+
if _iterable(wanted):
|
|
43
|
+
return any(w in item_value for w in wanted)
|
|
44
|
+
return wanted in item_value
|
|
45
|
+
else:
|
|
46
|
+
if _iterable(wanted):
|
|
47
|
+
return any(item_value == w for w in wanted)
|
|
48
|
+
return item_value == wanted
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class HTTPBGEReranker(BaseReranker):
|
|
52
|
+
"""
|
|
53
|
+
HTTP-based BGE reranker.
|
|
54
|
+
|
|
55
|
+
This class sends (query, documents[]) to a remote HTTP endpoint that
|
|
56
|
+
performs cross-encoder-style re-ranking (e.g., BGE reranker) and returns
|
|
57
|
+
relevance scores. It then maps those scores back onto the original
|
|
58
|
+
TextualMemoryItem list and returns (item, score) pairs sorted by score.
|
|
59
|
+
|
|
60
|
+
Notes
|
|
61
|
+
-----
|
|
62
|
+
- The endpoint is expected to accept JSON:
|
|
63
|
+
{
|
|
64
|
+
"model": "<model-name>",
|
|
65
|
+
"query": "<query text>",
|
|
66
|
+
"documents": ["doc1", "doc2", ...]
|
|
67
|
+
}
|
|
68
|
+
- Two response shapes are supported:
|
|
69
|
+
1) {"results": [{"index": <int>, "relevance_score": <float>}, ...]}
|
|
70
|
+
where "index" refers to the *position in the documents array*.
|
|
71
|
+
2) {"data": [{"score": <float>}, ...]} (aligned by list order)
|
|
72
|
+
- If the service fails or responds unexpectedly, this falls back to
|
|
73
|
+
returning the original items with 0.0 scores (best-effort).
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(
|
|
77
|
+
self,
|
|
78
|
+
reranker_url: str,
|
|
79
|
+
token: str = "",
|
|
80
|
+
model: str = "bge-reranker-v2-m3",
|
|
81
|
+
timeout: int = 10,
|
|
82
|
+
headers_extra: dict | None = None,
|
|
83
|
+
rerank_source: list[str] | None = None,
|
|
84
|
+
boost_weights: dict[str, float] | None = None,
|
|
85
|
+
boost_default: float = 0.0,
|
|
86
|
+
warn_unknown_filter_keys: bool = True,
|
|
87
|
+
**kwargs,
|
|
88
|
+
):
|
|
89
|
+
"""
|
|
90
|
+
Parameters
|
|
91
|
+
----------
|
|
92
|
+
reranker_url : str
|
|
93
|
+
HTTP endpoint for the reranker service.
|
|
94
|
+
token : str, optional
|
|
95
|
+
Bearer token for auth. If non-empty, added to the Authorization header.
|
|
96
|
+
model : str, optional
|
|
97
|
+
Model identifier understood by the server.
|
|
98
|
+
timeout : int, optional
|
|
99
|
+
Request timeout (seconds).
|
|
100
|
+
headers_extra : dict | None, optional
|
|
101
|
+
Additional headers to merge into the request headers.
|
|
102
|
+
"""
|
|
103
|
+
if not reranker_url:
|
|
104
|
+
raise ValueError("reranker_url must not be empty")
|
|
105
|
+
self.reranker_url = reranker_url
|
|
106
|
+
self.token = token or ""
|
|
107
|
+
self.model = model
|
|
108
|
+
self.timeout = timeout
|
|
109
|
+
self.headers_extra = headers_extra or {}
|
|
110
|
+
self.concat_source = rerank_source
|
|
111
|
+
|
|
112
|
+
self.boost_weights = (
|
|
113
|
+
DEFAULT_BOOST_WEIGHTS.copy()
|
|
114
|
+
if boost_weights is None
|
|
115
|
+
else {k: float(v) for k, v in boost_weights.items()}
|
|
116
|
+
)
|
|
117
|
+
self.boost_default = float(boost_default)
|
|
118
|
+
self.warn_unknown_filter_keys = bool(warn_unknown_filter_keys)
|
|
119
|
+
self._warned_missing_keys: set[str] = set()
|
|
120
|
+
|
|
121
|
+
def rerank(
|
|
122
|
+
self,
|
|
123
|
+
query: str,
|
|
124
|
+
graph_results: list[TextualMemoryItem],
|
|
125
|
+
top_k: int,
|
|
126
|
+
search_filter: dict | None = None,
|
|
127
|
+
**kwargs,
|
|
128
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
129
|
+
"""
|
|
130
|
+
Rank candidate memories by relevance to the query.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
query : str
|
|
135
|
+
The search query.
|
|
136
|
+
graph_results : list[TextualMemoryItem]
|
|
137
|
+
Candidate items to re-rank. Each item is expected to have a
|
|
138
|
+
`.memory` str field; non-strings are ignored.
|
|
139
|
+
top_k : int
|
|
140
|
+
Return at most this many items.
|
|
141
|
+
search_filter : dict | None
|
|
142
|
+
Currently unused. Present to keep signature compatible.
|
|
143
|
+
|
|
144
|
+
Returns
|
|
145
|
+
-------
|
|
146
|
+
list[tuple[TextualMemoryItem, float]]
|
|
147
|
+
Re-ranked items with scores, sorted descending by score.
|
|
148
|
+
"""
|
|
149
|
+
if not graph_results:
|
|
150
|
+
return []
|
|
151
|
+
|
|
152
|
+
# Build a mapping from "payload docs index" -> "original graph_results index"
|
|
153
|
+
# Only include items that have a non-empty string memory. This ensures that
|
|
154
|
+
# any index returned by the server can be mapped back correctly.
|
|
155
|
+
if self.concat_source:
|
|
156
|
+
documents = concat_original_source(graph_results, self.concat_source)
|
|
157
|
+
else:
|
|
158
|
+
documents = [
|
|
159
|
+
(_TAG1.sub("", m) if isinstance((m := getattr(item, "memory", None)), str) else m)
|
|
160
|
+
for item in graph_results
|
|
161
|
+
]
|
|
162
|
+
documents = [d for d in documents if isinstance(d, str) and d]
|
|
163
|
+
|
|
164
|
+
logger.info(f"[HTTPBGERerankerSample] query: {query} , documents: {documents[:5]}...")
|
|
165
|
+
|
|
166
|
+
if not documents:
|
|
167
|
+
return []
|
|
168
|
+
|
|
169
|
+
headers = {"Content-Type": "application/json", **self.headers_extra}
|
|
170
|
+
payload = {"model": self.model, "query": query, "documents": documents}
|
|
171
|
+
|
|
172
|
+
try:
|
|
173
|
+
# Make the HTTP request to the reranker service
|
|
174
|
+
resp = requests.post(
|
|
175
|
+
self.reranker_url, headers=headers, json=payload, timeout=self.timeout
|
|
176
|
+
)
|
|
177
|
+
resp.raise_for_status()
|
|
178
|
+
data = resp.json()
|
|
179
|
+
|
|
180
|
+
scored_items: list[tuple[TextualMemoryItem, float]] = []
|
|
181
|
+
|
|
182
|
+
if "results" in data:
|
|
183
|
+
# Format:
|
|
184
|
+
# dict("results": [{"index": int, "relevance_score": float},
|
|
185
|
+
# ...])
|
|
186
|
+
rows = data.get("results", [])
|
|
187
|
+
for r in rows:
|
|
188
|
+
idx = r.get("index")
|
|
189
|
+
# The returned index refers to 'documents' (i.e., our 'pairs' order),
|
|
190
|
+
# so we must map it back to the original graph_results index.
|
|
191
|
+
if isinstance(idx, int) and 0 <= idx < len(graph_results):
|
|
192
|
+
raw_score = float(r.get("relevance_score", r.get("score", 0.0)))
|
|
193
|
+
item = graph_results[idx]
|
|
194
|
+
# generic boost
|
|
195
|
+
score = self._apply_boost_generic(item, raw_score, search_filter)
|
|
196
|
+
scored_items.append((item, score))
|
|
197
|
+
|
|
198
|
+
scored_items.sort(key=lambda x: x[1], reverse=True)
|
|
199
|
+
return scored_items[: min(top_k, len(scored_items))]
|
|
200
|
+
|
|
201
|
+
elif "data" in data:
|
|
202
|
+
# Format: {"data": [{"score": float}, ...]} aligned by list order
|
|
203
|
+
rows = data.get("data", [])
|
|
204
|
+
# Build a list of scores aligned with our 'documents' (pairs)
|
|
205
|
+
score_list = [float(r.get("score", 0.0)) for r in rows]
|
|
206
|
+
|
|
207
|
+
if len(score_list) < len(graph_results):
|
|
208
|
+
score_list += [0.0] * (len(graph_results) - len(score_list))
|
|
209
|
+
elif len(score_list) > len(graph_results):
|
|
210
|
+
score_list = score_list[: len(graph_results)]
|
|
211
|
+
|
|
212
|
+
scored_items = []
|
|
213
|
+
for item, raw_score in zip(graph_results, score_list, strict=False):
|
|
214
|
+
score = self._apply_boost_generic(item, raw_score, search_filter)
|
|
215
|
+
scored_items.append((item, score))
|
|
216
|
+
|
|
217
|
+
scored_items.sort(key=lambda x: x[1], reverse=True)
|
|
218
|
+
return scored_items[: min(top_k, len(scored_items))]
|
|
219
|
+
|
|
220
|
+
else:
|
|
221
|
+
# Unexpected response schema: return a 0.0-scored fallback of the first top_k valid docs
|
|
222
|
+
# Note: we use 'pairs' to keep alignment with valid (string) docs.
|
|
223
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|
|
224
|
+
|
|
225
|
+
except Exception as e:
|
|
226
|
+
# Network error, timeout, JSON decode error, etc.
|
|
227
|
+
# Degrade gracefully by returning first top_k valid docs with 0.0 score.
|
|
228
|
+
logger.error(f"[HTTPBGEReranker] request failed: {e}")
|
|
229
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|
|
230
|
+
|
|
231
|
+
def _get_attr_or_key(self, obj: Any, key: str) -> Any:
|
|
232
|
+
"""
|
|
233
|
+
Resolve `key` on `obj` with one-level fallback into `obj.metadata`.
|
|
234
|
+
|
|
235
|
+
Priority:
|
|
236
|
+
1) obj.<key>
|
|
237
|
+
2) obj[key]
|
|
238
|
+
3) obj.metadata.<key>
|
|
239
|
+
4) obj.metadata[key]
|
|
240
|
+
"""
|
|
241
|
+
if obj is None:
|
|
242
|
+
return None
|
|
243
|
+
|
|
244
|
+
# support input like "metadata.user_id"
|
|
245
|
+
if "." in key:
|
|
246
|
+
head, tail = key.split(".", 1)
|
|
247
|
+
base = self._get_attr_or_key(obj, head)
|
|
248
|
+
return self._get_attr_or_key(base, tail)
|
|
249
|
+
|
|
250
|
+
def _resolve(o: Any, k: str):
|
|
251
|
+
if o is None:
|
|
252
|
+
return None
|
|
253
|
+
v = getattr(o, k, None)
|
|
254
|
+
if v is not None:
|
|
255
|
+
return v
|
|
256
|
+
if hasattr(o, "get"):
|
|
257
|
+
try:
|
|
258
|
+
return o.get(k)
|
|
259
|
+
except Exception:
|
|
260
|
+
return None
|
|
261
|
+
return None
|
|
262
|
+
|
|
263
|
+
# 1) find in obj
|
|
264
|
+
v = _resolve(obj, key)
|
|
265
|
+
if v is not None:
|
|
266
|
+
return v
|
|
267
|
+
|
|
268
|
+
# 2) find in obj.metadata
|
|
269
|
+
meta = _resolve(obj, "metadata")
|
|
270
|
+
if meta is not None:
|
|
271
|
+
return _resolve(meta, key)
|
|
272
|
+
|
|
273
|
+
return None
|
|
274
|
+
|
|
275
|
+
def _apply_boost_generic(
|
|
276
|
+
self,
|
|
277
|
+
item: TextualMemoryItem,
|
|
278
|
+
base_score: float,
|
|
279
|
+
search_filter: dict | None,
|
|
280
|
+
) -> float:
|
|
281
|
+
"""
|
|
282
|
+
Multiply base_score by (1 + weight) for each matching key in search_filter.
|
|
283
|
+
- key resolution: self._get_attr_or_key(item, key)
|
|
284
|
+
- weight = boost_weights.get(key, self.boost_default)
|
|
285
|
+
- unknown key -> one-time warning
|
|
286
|
+
"""
|
|
287
|
+
if not search_filter:
|
|
288
|
+
return base_score
|
|
289
|
+
|
|
290
|
+
score = float(base_score)
|
|
291
|
+
|
|
292
|
+
for key, wanted in search_filter.items():
|
|
293
|
+
# _get_attr_or_key automatically find key in item and
|
|
294
|
+
# item.metadata ("metadata.user_id" supported)
|
|
295
|
+
resolved = self._get_attr_or_key(item, key)
|
|
296
|
+
|
|
297
|
+
if resolved is None:
|
|
298
|
+
if self.warn_unknown_filter_keys and key not in self._warned_missing_keys:
|
|
299
|
+
logger.warning(
|
|
300
|
+
"[HTTPBGEReranker] search_filter key '%s' not found on TextualMemoryItem or metadata",
|
|
301
|
+
key,
|
|
302
|
+
)
|
|
303
|
+
self._warned_missing_keys.add(key)
|
|
304
|
+
continue
|
|
305
|
+
|
|
306
|
+
if _value_matches(resolved, wanted):
|
|
307
|
+
w = float(self.boost_weights.get(key, self.boost_default))
|
|
308
|
+
if w != 0.0:
|
|
309
|
+
score *= 1.0 + w
|
|
310
|
+
score = min(max(0.0, score), 1.0)
|
|
311
|
+
|
|
312
|
+
return score
|
memos/reranker/noop.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from .base import BaseReranker
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
if TYPE_CHECKING:
|
|
9
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class NoopReranker(BaseReranker):
|
|
13
|
+
def rerank(
|
|
14
|
+
self, query: str, graph_results: list, top_k: int, **kwargs
|
|
15
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
16
|
+
return [(item, 0.0) for item in graph_results[:top_k]]
|