MemoryOS 0.0.1__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- memoryos-0.1.12.dist-info/METADATA +257 -0
- memoryos-0.1.12.dist-info/RECORD +117 -0
- memos/__init__.py +20 -1
- memos/api/start_api.py +420 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/factory.py +22 -0
- memos/chunkers/sentence_chunker.py +35 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +45 -0
- memos/configs/embedder.py +53 -0
- memos/configs/graph_db.py +45 -0
- memos/configs/llm.py +71 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +89 -0
- memos/configs/mem_os.py +70 -0
- memos/configs/mem_reader.py +53 -0
- memos/configs/mem_scheduler.py +78 -0
- memos/configs/memory.py +190 -0
- memos/configs/parser.py +38 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +64 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/base.py +15 -0
- memos/embedders/factory.py +23 -0
- memos/embedders/ollama.py +74 -0
- memos/embedders/sentence_transformer.py +40 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +215 -0
- memos/graph_dbs/factory.py +21 -0
- memos/graph_dbs/neo4j.py +827 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +16 -0
- memos/llms/factory.py +25 -0
- memos/llms/hf.py +231 -0
- memos/llms/ollama.py +82 -0
- memos/llms/openai.py +34 -0
- memos/llms/utils.py +14 -0
- memos/log.py +78 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +29 -0
- memos/mem_cube/general.py +146 -0
- memos/mem_cube/utils.py +24 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +819 -0
- memos/mem_os/main.py +12 -0
- memos/mem_os/product.py +89 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +27 -0
- memos/mem_reader/factory.py +21 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/simple_struct.py +241 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/base_scheduler.py +164 -0
- memos/mem_scheduler/general_scheduler.py +305 -0
- memos/mem_scheduler/modules/__init__.py +0 -0
- memos/mem_scheduler/modules/base.py +74 -0
- memos/mem_scheduler/modules/dispatcher.py +103 -0
- memos/mem_scheduler/modules/monitor.py +82 -0
- memos/mem_scheduler/modules/redis_service.py +146 -0
- memos/mem_scheduler/modules/retriever.py +41 -0
- memos/mem_scheduler/modules/schemas.py +146 -0
- memos/mem_scheduler/scheduler_factory.py +21 -0
- memos/mem_scheduler/utils.py +26 -0
- memos/mem_user/user_manager.py +478 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +232 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +34 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +89 -0
- memos/memories/textual/general.py +286 -0
- memos/memories/textual/item.py +167 -0
- memos/memories/textual/naive.py +185 -0
- memos/memories/textual/tree.py +289 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +64 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +166 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +19 -0
- memos/parsers/markitdown.py +22 -0
- memos/settings.py +8 -0
- memos/templates/__init__.py +0 -0
- memos/templates/mem_reader_prompts.py +98 -0
- memos/templates/mem_scheduler_prompts.py +65 -0
- memos/types.py +55 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +105 -0
- memos/vec_dbs/factory.py +21 -0
- memos/vec_dbs/item.py +43 -0
- memos/vec_dbs/qdrant.py +292 -0
- memoryos-0.0.1.dist-info/METADATA +0 -53
- memoryos-0.0.1.dist-info/RECORD +0 -5
- {memoryos-0.0.1.dist-info → memoryos-0.1.12.dist-info}/LICENSE +0 -0
- {memoryos-0.0.1.dist-info → memoryos-0.1.12.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
|
|
3
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
4
|
+
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
5
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
6
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GraphMemoryRetriever:
|
|
10
|
+
"""
|
|
11
|
+
Unified memory retriever that combines both graph-based and vector-based retrieval logic.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, graph_store: Neo4jGraphDB, embedder: OllamaEmbedder):
|
|
15
|
+
self.graph_store = graph_store
|
|
16
|
+
self.embedder = embedder
|
|
17
|
+
|
|
18
|
+
def retrieve(
|
|
19
|
+
self,
|
|
20
|
+
query: str,
|
|
21
|
+
parsed_goal: ParsedTaskGoal,
|
|
22
|
+
top_k: int,
|
|
23
|
+
memory_scope: str,
|
|
24
|
+
query_embedding: list[list[float]] | None = None,
|
|
25
|
+
) -> list[TextualMemoryItem]:
|
|
26
|
+
"""
|
|
27
|
+
Perform hybrid memory retrieval:
|
|
28
|
+
- Run graph-based lookup from dispatch plan.
|
|
29
|
+
- Run vector similarity search from embedded query.
|
|
30
|
+
- Merge and return combined result set.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
query (str): Original task query.
|
|
34
|
+
parsed_goal (dict): parsed_goal.
|
|
35
|
+
top_k (int): Number of candidates to return.
|
|
36
|
+
memory_scope (str): One of ['working', 'long_term', 'user'].
|
|
37
|
+
query_embedding(list of embedding): list of embedding of query
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
list: Combined memory items.
|
|
41
|
+
"""
|
|
42
|
+
if memory_scope not in ["WorkingMemory", "LongTermMemory", "UserMemory"]:
|
|
43
|
+
raise ValueError(f"Unsupported memory scope: {memory_scope}")
|
|
44
|
+
|
|
45
|
+
if memory_scope == "WorkingMemory":
|
|
46
|
+
# For working memory, retrieve all entries (no filtering)
|
|
47
|
+
working_memories = self.graph_store.get_all_memory_items(scope="WorkingMemory")
|
|
48
|
+
return [TextualMemoryItem.from_dict(record) for record in working_memories]
|
|
49
|
+
|
|
50
|
+
# Step 1: Structured graph-based retrieval
|
|
51
|
+
graph_results = self._graph_recall(parsed_goal, memory_scope)
|
|
52
|
+
|
|
53
|
+
# Step 2: Vector similarity search
|
|
54
|
+
vector_results = self._vector_recall(query_embedding, memory_scope, top_k)
|
|
55
|
+
|
|
56
|
+
# Step 3: Merge and deduplicate results
|
|
57
|
+
combined = {item.id: item for item in graph_results + vector_results}
|
|
58
|
+
|
|
59
|
+
# Debug: 打印在 graph_results 中但不在 combined 中的 id
|
|
60
|
+
graph_ids = {item.id for item in graph_results}
|
|
61
|
+
combined_ids = set(combined.keys())
|
|
62
|
+
lost_ids = graph_ids - combined_ids
|
|
63
|
+
|
|
64
|
+
if lost_ids:
|
|
65
|
+
print(
|
|
66
|
+
f"[DEBUG] The following nodes were in graph_results but missing in combined: {lost_ids}"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return list(combined.values())
|
|
70
|
+
|
|
71
|
+
def _graph_recall(
|
|
72
|
+
self, parsed_goal: ParsedTaskGoal, memory_scope: str
|
|
73
|
+
) -> list[TextualMemoryItem]:
|
|
74
|
+
"""
|
|
75
|
+
Perform structured node-based retrieval from Neo4j.
|
|
76
|
+
- keys must match exactly (n.key IN keys)
|
|
77
|
+
- tags must overlap with at least 2 input tags
|
|
78
|
+
- scope filters by memory_type if provided
|
|
79
|
+
"""
|
|
80
|
+
candidate_ids = set()
|
|
81
|
+
|
|
82
|
+
# 1) key-based OR branch
|
|
83
|
+
if parsed_goal.keys:
|
|
84
|
+
key_filters = [
|
|
85
|
+
{"field": "key", "op": "in", "value": parsed_goal.keys},
|
|
86
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
87
|
+
]
|
|
88
|
+
key_ids = self.graph_store.get_by_metadata(key_filters)
|
|
89
|
+
candidate_ids.update(key_ids)
|
|
90
|
+
|
|
91
|
+
# 2) tag-based OR branch
|
|
92
|
+
if parsed_goal.tags:
|
|
93
|
+
tag_filters = [
|
|
94
|
+
{"field": "tags", "op": "contains", "value": parsed_goal.tags},
|
|
95
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
96
|
+
]
|
|
97
|
+
tag_ids = self.graph_store.get_by_metadata(tag_filters)
|
|
98
|
+
candidate_ids.update(tag_ids)
|
|
99
|
+
|
|
100
|
+
# No matches → return empty
|
|
101
|
+
if not candidate_ids:
|
|
102
|
+
return []
|
|
103
|
+
|
|
104
|
+
# Load nodes and post-filter
|
|
105
|
+
node_dicts = self.graph_store.get_nodes(list(candidate_ids))
|
|
106
|
+
|
|
107
|
+
final_nodes = []
|
|
108
|
+
for node in node_dicts:
|
|
109
|
+
meta = node.get("metadata", {})
|
|
110
|
+
node_key = meta.get("key")
|
|
111
|
+
node_tags = meta.get("tags", []) or []
|
|
112
|
+
|
|
113
|
+
keep = False
|
|
114
|
+
# key equals to node_key
|
|
115
|
+
if parsed_goal.keys and node_key in parsed_goal.keys:
|
|
116
|
+
keep = True
|
|
117
|
+
# overlap tags more than 2
|
|
118
|
+
elif parsed_goal.tags:
|
|
119
|
+
overlap = len(set(node_tags) & set(parsed_goal.tags))
|
|
120
|
+
if overlap >= 2:
|
|
121
|
+
keep = True
|
|
122
|
+
if keep:
|
|
123
|
+
final_nodes.append(TextualMemoryItem.from_dict(node))
|
|
124
|
+
return final_nodes
|
|
125
|
+
|
|
126
|
+
def _vector_recall(
|
|
127
|
+
self,
|
|
128
|
+
query_embedding: list[list[float]],
|
|
129
|
+
memory_scope: str,
|
|
130
|
+
top_k: int = 20,
|
|
131
|
+
max_num: int = 5,
|
|
132
|
+
) -> list[TextualMemoryItem]:
|
|
133
|
+
"""
|
|
134
|
+
# TODO: tackle with post-filter and pre-filter(5.18+) better.
|
|
135
|
+
Perform vector-based similarity retrieval using query embedding.
|
|
136
|
+
"""
|
|
137
|
+
all_matches = []
|
|
138
|
+
|
|
139
|
+
def search_single(vec):
|
|
140
|
+
return (
|
|
141
|
+
self.graph_store.search_by_embedding(vector=vec, top_k=top_k, scope=memory_scope)
|
|
142
|
+
or []
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
146
|
+
futures = [executor.submit(search_single, vec) for vec in query_embedding[:max_num]]
|
|
147
|
+
for future in concurrent.futures.as_completed(futures):
|
|
148
|
+
result = future.result()
|
|
149
|
+
all_matches.extend(result)
|
|
150
|
+
|
|
151
|
+
if not all_matches:
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
# Step 3: Extract matched IDs and retrieve full nodes
|
|
155
|
+
unique_ids = set({r["id"] for r in all_matches})
|
|
156
|
+
node_dicts = self.graph_store.get_nodes(list(unique_ids))
|
|
157
|
+
|
|
158
|
+
return [TextualMemoryItem.from_dict(record) for record in node_dicts]
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
4
|
+
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
5
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
6
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def batch_cosine_similarity(
|
|
10
|
+
query_vec: list[float], candidate_vecs: list[list[float]]
|
|
11
|
+
) -> list[float]:
|
|
12
|
+
"""
|
|
13
|
+
Compute cosine similarity between a single query vector and multiple candidate vectors using NumPy.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
query_vec (list[float]): The query embedding.
|
|
17
|
+
candidate_vecs (list[list[float]]): A list of memory embeddings.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
list[float]: Cosine similarity scores for each candidate.
|
|
21
|
+
"""
|
|
22
|
+
query = np.array(query_vec)
|
|
23
|
+
candidates = np.array(candidate_vecs)
|
|
24
|
+
|
|
25
|
+
# Normalize query and candidates
|
|
26
|
+
query_norm = np.linalg.norm(query)
|
|
27
|
+
candidates_norm = np.linalg.norm(candidates, axis=1)
|
|
28
|
+
|
|
29
|
+
# Compute dot products
|
|
30
|
+
dot_products = np.dot(candidates, query)
|
|
31
|
+
|
|
32
|
+
# Avoid division by zero
|
|
33
|
+
eps = 1e-10
|
|
34
|
+
similarities = dot_products / (candidates_norm * query_norm + eps)
|
|
35
|
+
|
|
36
|
+
return similarities.tolist()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MemoryReranker:
|
|
40
|
+
"""
|
|
41
|
+
Rank retrieved memory cards by structural priority and contextual similarity.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
|
|
45
|
+
self.llm = llm
|
|
46
|
+
self.embedder = embedder
|
|
47
|
+
|
|
48
|
+
# Structural priority weights
|
|
49
|
+
self.level_weights = {
|
|
50
|
+
"topic": 1.0,
|
|
51
|
+
"concept": 1.0,
|
|
52
|
+
"fact": 1.0,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def rerank(
|
|
56
|
+
self,
|
|
57
|
+
query: str,
|
|
58
|
+
query_embedding: list[float],
|
|
59
|
+
graph_results: list,
|
|
60
|
+
top_k: int,
|
|
61
|
+
parsed_goal: ParsedTaskGoal,
|
|
62
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
63
|
+
"""
|
|
64
|
+
Rerank memory items by relevance to task.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query (str): Original task.
|
|
68
|
+
query_embedding(list[float]): embedding of query
|
|
69
|
+
graph_results (list): Combined retrieval results.
|
|
70
|
+
top_k (int): Number of top results to return.
|
|
71
|
+
parsed_goal (dict): Structured task representation.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
list(tuple): Ranked list of memory items with similarity score.
|
|
75
|
+
"""
|
|
76
|
+
# Step 1: Filter out items without embeddings
|
|
77
|
+
items_with_embeddings = [item for item in graph_results if item.metadata.embedding]
|
|
78
|
+
embeddings = [item.metadata.embedding for item in items_with_embeddings]
|
|
79
|
+
|
|
80
|
+
if not embeddings:
|
|
81
|
+
return graph_results[:top_k]
|
|
82
|
+
|
|
83
|
+
# Step 2: Compute cosine similarities
|
|
84
|
+
similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
|
|
85
|
+
|
|
86
|
+
# Step 3: Apply structural weight boost
|
|
87
|
+
def get_weight(item: TextualMemoryItem) -> float:
|
|
88
|
+
level = item.metadata.background
|
|
89
|
+
return self.level_weights.get(level, 1.0)
|
|
90
|
+
|
|
91
|
+
weighted_scores = [
|
|
92
|
+
sim * get_weight(item)
|
|
93
|
+
for sim, item in zip(similarity_scores, items_with_embeddings, strict=False)
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
# Step 4: Sort by weighted score
|
|
97
|
+
sorted_items = sorted(
|
|
98
|
+
zip(items_with_embeddings, weighted_scores, strict=False),
|
|
99
|
+
key=lambda pair: pair[1],
|
|
100
|
+
reverse=True,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Step 5: Return top-k items with fallback
|
|
104
|
+
top_items = sorted_items[:top_k]
|
|
105
|
+
|
|
106
|
+
if len(top_items) < top_k:
|
|
107
|
+
selected_items = [item for item, _ in top_items]
|
|
108
|
+
remaining = [(item, -1.0) for item in graph_results if item not in selected_items]
|
|
109
|
+
top_items.extend(remaining[: top_k - len(top_items)])
|
|
110
|
+
|
|
111
|
+
return top_items # list of (item, score)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class ParsedTaskGoal:
|
|
6
|
+
"""
|
|
7
|
+
Goal structure for both Fast & LLM.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
memories: list[str] = field(default_factory=list)
|
|
11
|
+
keys: list[str] = field(default_factory=list)
|
|
12
|
+
tags: list[str] = field(default_factory=list)
|
|
13
|
+
goal_type: str | None = None # e.g., 'default', 'explanation', etc.
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
7
|
+
from memos.graph_dbs.factory import Neo4jGraphDB
|
|
8
|
+
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
9
|
+
from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
|
|
10
|
+
|
|
11
|
+
from .reasoner import MemoryReasoner
|
|
12
|
+
from .recall import GraphMemoryRetriever
|
|
13
|
+
from .reranker import MemoryReranker
|
|
14
|
+
from .task_goal_parser import TaskGoalParser
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class Searcher:
|
|
18
|
+
def __init__(
|
|
19
|
+
self,
|
|
20
|
+
dispatcher_llm: OpenAILLM | OllamaLLM,
|
|
21
|
+
graph_store: Neo4jGraphDB,
|
|
22
|
+
embedder: OllamaEmbedder,
|
|
23
|
+
):
|
|
24
|
+
self.graph_store = graph_store
|
|
25
|
+
self.embedder = embedder
|
|
26
|
+
|
|
27
|
+
self.task_goal_parser = TaskGoalParser(dispatcher_llm)
|
|
28
|
+
self.graph_retriever = GraphMemoryRetriever(self.graph_store, self.embedder)
|
|
29
|
+
self.reranker = MemoryReranker(dispatcher_llm, self.embedder)
|
|
30
|
+
self.reasoner = MemoryReasoner(dispatcher_llm)
|
|
31
|
+
|
|
32
|
+
def search(
|
|
33
|
+
self, query: str, top_k: int, info=None, mode: str = "fast", memory_type: str = "All"
|
|
34
|
+
) -> list[TextualMemoryItem]:
|
|
35
|
+
"""
|
|
36
|
+
Search for memories based on a query.
|
|
37
|
+
User query -> TaskGoalParser -> GraphMemoryRetriever ->
|
|
38
|
+
MemoryReranker -> MemoryReasoner -> Final output
|
|
39
|
+
Args:
|
|
40
|
+
query (str): The query to search for.
|
|
41
|
+
top_k (int): The number of top results to return.
|
|
42
|
+
info (dict): Leave a record of memory consumption.
|
|
43
|
+
mode (str, optional): The mode of the search.
|
|
44
|
+
- 'fast': Uses a faster search process, sacrificing some precision for speed.
|
|
45
|
+
- 'fine': Uses a more detailed search process, invoking large models for higher precision, but slower performance.
|
|
46
|
+
memory_type (str): Type restriction for search.
|
|
47
|
+
['All', 'WorkingMemory', 'LongTermMemory', 'UserMemory']
|
|
48
|
+
Returns:
|
|
49
|
+
list[TextualMemoryItem]: List of matching memories.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
# Step 1: Parse task structure into topic, concept, and fact levels
|
|
53
|
+
parsed_goal = self.task_goal_parser.parse(query)
|
|
54
|
+
|
|
55
|
+
if parsed_goal.memories:
|
|
56
|
+
query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
|
|
57
|
+
|
|
58
|
+
# Step 2a: Working memory retrieval (Path A)
|
|
59
|
+
def retrieve_from_working_memory():
|
|
60
|
+
"""
|
|
61
|
+
Direct structure-based retrieval from working memory.
|
|
62
|
+
"""
|
|
63
|
+
if memory_type not in ["All", "WorkingMemory"]:
|
|
64
|
+
return []
|
|
65
|
+
|
|
66
|
+
working_memory = self.graph_retriever.retrieve(
|
|
67
|
+
query=query, parsed_goal=parsed_goal, top_k=top_k, memory_scope="WorkingMemory"
|
|
68
|
+
)
|
|
69
|
+
# Rerank working_memory results
|
|
70
|
+
ranked_memories = self.reranker.rerank(
|
|
71
|
+
query=query,
|
|
72
|
+
query_embedding=query_embedding[0],
|
|
73
|
+
graph_results=working_memory,
|
|
74
|
+
top_k=top_k,
|
|
75
|
+
parsed_goal=parsed_goal,
|
|
76
|
+
)
|
|
77
|
+
return ranked_memories
|
|
78
|
+
|
|
79
|
+
# Step 2b: Parallel long-term and user memory retrieval (Path B)
|
|
80
|
+
def retrieve_ranked_long_term_and_user():
|
|
81
|
+
"""
|
|
82
|
+
Retrieve from both long-term and user memory, then rank and merge results.
|
|
83
|
+
"""
|
|
84
|
+
long_term_items = (
|
|
85
|
+
self.graph_retriever.retrieve(
|
|
86
|
+
query=query,
|
|
87
|
+
query_embedding=query_embedding,
|
|
88
|
+
parsed_goal=parsed_goal,
|
|
89
|
+
top_k=top_k * 2,
|
|
90
|
+
memory_scope="LongTermMemory",
|
|
91
|
+
)
|
|
92
|
+
if memory_type in ["All", "LongTermMemory"]
|
|
93
|
+
else []
|
|
94
|
+
)
|
|
95
|
+
user_items = (
|
|
96
|
+
self.graph_retriever.retrieve(
|
|
97
|
+
query=query,
|
|
98
|
+
query_embedding=query_embedding,
|
|
99
|
+
parsed_goal=parsed_goal,
|
|
100
|
+
top_k=top_k * 2,
|
|
101
|
+
memory_scope="UserMemory",
|
|
102
|
+
)
|
|
103
|
+
if memory_type in ["All", "UserMemory"]
|
|
104
|
+
else []
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
# Rerank combined results
|
|
108
|
+
ranked_memories = self.reranker.rerank(
|
|
109
|
+
query=query,
|
|
110
|
+
query_embedding=query_embedding[0],
|
|
111
|
+
graph_results=long_term_items + user_items,
|
|
112
|
+
top_k=top_k * 2,
|
|
113
|
+
parsed_goal=parsed_goal,
|
|
114
|
+
)
|
|
115
|
+
return ranked_memories
|
|
116
|
+
|
|
117
|
+
# Step 3: Parallel execution of both paths
|
|
118
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
|
119
|
+
future_working = executor.submit(retrieve_from_working_memory)
|
|
120
|
+
future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
|
|
121
|
+
|
|
122
|
+
working_results = future_working.result()
|
|
123
|
+
hybrid_results = future_hybrid.result()
|
|
124
|
+
searched_res = working_results + hybrid_results
|
|
125
|
+
|
|
126
|
+
# Deduplicate by item.memory, keep higher score
|
|
127
|
+
deduped_result = {}
|
|
128
|
+
for item, score in searched_res:
|
|
129
|
+
mem_key = item.memory
|
|
130
|
+
if mem_key not in deduped_result or score > deduped_result[mem_key][1]:
|
|
131
|
+
deduped_result[mem_key] = (item, score)
|
|
132
|
+
|
|
133
|
+
searched_res = []
|
|
134
|
+
for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
|
|
135
|
+
:top_k
|
|
136
|
+
]:
|
|
137
|
+
new_meta = SearchedTreeNodeTextualMemoryMetadata(
|
|
138
|
+
**item.metadata.model_dump(), relativity=score
|
|
139
|
+
)
|
|
140
|
+
searched_res.append(
|
|
141
|
+
TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
# Step 4: Reasoning over all retrieved and ranked memory
|
|
145
|
+
if mode == "fine":
|
|
146
|
+
searched_res = self.reasoner.reason(
|
|
147
|
+
query=query,
|
|
148
|
+
ranked_memories=searched_res,
|
|
149
|
+
parsed_goal=parsed_goal,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Step 5: Update usage history with current timestamp
|
|
153
|
+
now_time = datetime.now().isoformat()
|
|
154
|
+
usage_record = json.dumps(
|
|
155
|
+
{"time": now_time, "info": info}
|
|
156
|
+
) # `info` should be a serializable dict or string
|
|
157
|
+
|
|
158
|
+
for item in searched_res:
|
|
159
|
+
if (
|
|
160
|
+
hasattr(item, "id")
|
|
161
|
+
and hasattr(item, "metadata")
|
|
162
|
+
and hasattr(item.metadata, "usage")
|
|
163
|
+
):
|
|
164
|
+
item.metadata.usage.append(usage_record)
|
|
165
|
+
self.graph_store.update_node(item.id, {"usage": item.metadata.usage})
|
|
166
|
+
return searched_res
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from string import Template
|
|
4
|
+
|
|
5
|
+
from memos.llms.base import BaseLLM
|
|
6
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
|
+
from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TaskGoalParser:
|
|
11
|
+
"""
|
|
12
|
+
Unified TaskGoalParser:
|
|
13
|
+
- mode == 'fast': directly use origin task_description
|
|
14
|
+
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, llm=BaseLLM, mode: str = "fast"):
|
|
18
|
+
self.llm = llm
|
|
19
|
+
self.mode = mode
|
|
20
|
+
|
|
21
|
+
def parse(self, task_description: str, context: str = "") -> ParsedTaskGoal:
|
|
22
|
+
"""
|
|
23
|
+
Parse user input into structured semantic layers.
|
|
24
|
+
Returns:
|
|
25
|
+
ParsedTaskGoal: object containing topic/concept/fact levels and optional metadata
|
|
26
|
+
- mode == 'fast': use jieba to split words only
|
|
27
|
+
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
28
|
+
"""
|
|
29
|
+
if self.mode == "fast":
|
|
30
|
+
return self._parse_fast(task_description)
|
|
31
|
+
elif self.mode == "fine":
|
|
32
|
+
if not self.llm:
|
|
33
|
+
raise ValueError("LLM not provided for slow mode.")
|
|
34
|
+
return self._parse_fine(task_description)
|
|
35
|
+
else:
|
|
36
|
+
raise ValueError(f"Unknown mode: {self.mode}")
|
|
37
|
+
|
|
38
|
+
def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal:
|
|
39
|
+
"""
|
|
40
|
+
Fast mode: simple jieba word split.
|
|
41
|
+
"""
|
|
42
|
+
return ParsedTaskGoal(
|
|
43
|
+
memories=[task_description], keys=[task_description], tags=[], goal_type="default"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def _parse_fine(self, query: str) -> ParsedTaskGoal:
|
|
47
|
+
"""
|
|
48
|
+
Slow mode: LLM structured parse.
|
|
49
|
+
"""
|
|
50
|
+
prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context="")
|
|
51
|
+
response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
|
|
52
|
+
return self._parse_response(response)
|
|
53
|
+
|
|
54
|
+
def _parse_response(self, response: str) -> ParsedTaskGoal:
|
|
55
|
+
"""
|
|
56
|
+
Parse LLM JSON output safely.
|
|
57
|
+
"""
|
|
58
|
+
try:
|
|
59
|
+
response = response.replace("```", "").replace("json", "")
|
|
60
|
+
response_json = json.loads(response.strip())
|
|
61
|
+
return ParsedTaskGoal(
|
|
62
|
+
memories=response_json.get("memories", []),
|
|
63
|
+
keys=response_json.get("keys", []),
|
|
64
|
+
tags=response_json.get("tags", []),
|
|
65
|
+
goal_type=response_json.get("goal_type", "default"),
|
|
66
|
+
)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
raise ValueError(f"Failed to parse LLM output: {e}\nRaw response:\n{response}") from e
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Prompt for task parsing
|
|
2
|
+
TASK_PARSE_PROMPT = """
|
|
3
|
+
You are a task parsing expert. Given a user's task instruction, extract the following structured information:
|
|
4
|
+
|
|
5
|
+
Given a user task instruction and optional related memory context,
|
|
6
|
+
extract the following structured information:
|
|
7
|
+
1. Keys: the high-level keywords directly relevant to the user’s task.
|
|
8
|
+
2. Tags: thematic tags to help categorize and retrieve related memories.
|
|
9
|
+
3. Goal Type: retrieval | qa | generation
|
|
10
|
+
4. Memories: Provide 2–5 short semantic expansions or rephrasings of the task instruction.
|
|
11
|
+
These are used for improved embedding search coverage.
|
|
12
|
+
Each should be clear, concise, and meaningful for retrieval.
|
|
13
|
+
|
|
14
|
+
Task description:
|
|
15
|
+
\"\"\"$task\"\"\"
|
|
16
|
+
|
|
17
|
+
Context (if any):
|
|
18
|
+
\"\"\"$context\"\"\"
|
|
19
|
+
|
|
20
|
+
Return strictly in this JSON format:
|
|
21
|
+
{
|
|
22
|
+
"keys": [...],
|
|
23
|
+
"tags": [...],
|
|
24
|
+
"goal_type": "retrieval | qa | generation",
|
|
25
|
+
"memories": ["...", "...", ...]
|
|
26
|
+
}
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
REASON_PROMPT = """
|
|
31
|
+
You are a reasoning agent working with a memory system. You will synthesize knowledge from multiple memory cards to construct a meaningful response to the task below.
|
|
32
|
+
|
|
33
|
+
Task: ${task}
|
|
34
|
+
|
|
35
|
+
Memory cards (with metadata):
|
|
36
|
+
${detailed_memory_list}
|
|
37
|
+
|
|
38
|
+
Please perform:
|
|
39
|
+
1. Clustering by theme (topic/concept/fact)
|
|
40
|
+
2. Identify useful chains or connections
|
|
41
|
+
3. Return a curated list of memory card IDs with reasons.
|
|
42
|
+
|
|
43
|
+
Output in JSON:
|
|
44
|
+
{
|
|
45
|
+
"selected_ids": [...],
|
|
46
|
+
"explanation": "..."
|
|
47
|
+
}
|
|
48
|
+
"""
|
|
File without changes
|
memos/parsers/base.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from memos.configs.parser import BaseParserConfig
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseParser(ABC):
|
|
7
|
+
"""Base class for all parsers."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def __init__(self, config: BaseParserConfig):
|
|
11
|
+
"""Initialize the parser with the given configuration."""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def parse(self, file_path: str) -> str:
|
|
15
|
+
"""Parse the file at the given path and return its content as a string."""
|
memos/parsers/factory.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import Any, ClassVar
|
|
2
|
+
|
|
3
|
+
from memos.configs.parser import ParserConfigFactory
|
|
4
|
+
from memos.parsers.base import BaseParser
|
|
5
|
+
from memos.parsers.markitdown import MarkItDownParser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ParserFactory(BaseParser):
|
|
9
|
+
"""Factory class for creating Parser instances."""
|
|
10
|
+
|
|
11
|
+
backend_to_class: ClassVar[dict[str, Any]] = {"markitdown": MarkItDownParser}
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def from_config(cls, config_factory: ParserConfigFactory) -> BaseParser:
|
|
15
|
+
backend = config_factory.backend
|
|
16
|
+
if backend not in cls.backend_to_class:
|
|
17
|
+
raise ValueError(f"Invalid backend: {backend}")
|
|
18
|
+
parser_class = cls.backend_to_class[backend]
|
|
19
|
+
return parser_class(config_factory.config)
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from markitdown import MarkItDown
|
|
2
|
+
|
|
3
|
+
from memos.configs.parser import MarkItDownParserConfig
|
|
4
|
+
from memos.log import get_logger
|
|
5
|
+
from memos.parsers.base import BaseParser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
logger = get_logger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MarkItDownParser(BaseParser):
|
|
12
|
+
"""MarkItDown Parser class."""
|
|
13
|
+
|
|
14
|
+
def __init__(self, config: MarkItDownParserConfig):
|
|
15
|
+
self.config = config
|
|
16
|
+
|
|
17
|
+
def parse(self, file_path: str) -> str:
|
|
18
|
+
"""Parse the file at the given path and return its content as a MarkDown string."""
|
|
19
|
+
md = MarkItDown(enable_plugins=False)
|
|
20
|
+
result = md.convert(file_path)
|
|
21
|
+
|
|
22
|
+
return result.text_content
|
memos/settings.py
ADDED
|
File without changes
|