MemoryOS 0.0.1__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (119) hide show
  1. memoryos-0.1.12.dist-info/METADATA +257 -0
  2. memoryos-0.1.12.dist-info/RECORD +117 -0
  3. memos/__init__.py +20 -1
  4. memos/api/start_api.py +420 -0
  5. memos/chunkers/__init__.py +4 -0
  6. memos/chunkers/base.py +24 -0
  7. memos/chunkers/factory.py +22 -0
  8. memos/chunkers/sentence_chunker.py +35 -0
  9. memos/configs/__init__.py +0 -0
  10. memos/configs/base.py +82 -0
  11. memos/configs/chunker.py +45 -0
  12. memos/configs/embedder.py +53 -0
  13. memos/configs/graph_db.py +45 -0
  14. memos/configs/llm.py +71 -0
  15. memos/configs/mem_chat.py +81 -0
  16. memos/configs/mem_cube.py +89 -0
  17. memos/configs/mem_os.py +70 -0
  18. memos/configs/mem_reader.py +53 -0
  19. memos/configs/mem_scheduler.py +78 -0
  20. memos/configs/memory.py +190 -0
  21. memos/configs/parser.py +38 -0
  22. memos/configs/utils.py +8 -0
  23. memos/configs/vec_db.py +64 -0
  24. memos/deprecation.py +262 -0
  25. memos/embedders/__init__.py +0 -0
  26. memos/embedders/base.py +15 -0
  27. memos/embedders/factory.py +23 -0
  28. memos/embedders/ollama.py +74 -0
  29. memos/embedders/sentence_transformer.py +40 -0
  30. memos/exceptions.py +30 -0
  31. memos/graph_dbs/__init__.py +0 -0
  32. memos/graph_dbs/base.py +215 -0
  33. memos/graph_dbs/factory.py +21 -0
  34. memos/graph_dbs/neo4j.py +827 -0
  35. memos/hello_world.py +97 -0
  36. memos/llms/__init__.py +0 -0
  37. memos/llms/base.py +16 -0
  38. memos/llms/factory.py +25 -0
  39. memos/llms/hf.py +231 -0
  40. memos/llms/ollama.py +82 -0
  41. memos/llms/openai.py +34 -0
  42. memos/llms/utils.py +14 -0
  43. memos/log.py +78 -0
  44. memos/mem_chat/__init__.py +0 -0
  45. memos/mem_chat/base.py +30 -0
  46. memos/mem_chat/factory.py +21 -0
  47. memos/mem_chat/simple.py +200 -0
  48. memos/mem_cube/__init__.py +0 -0
  49. memos/mem_cube/base.py +29 -0
  50. memos/mem_cube/general.py +146 -0
  51. memos/mem_cube/utils.py +24 -0
  52. memos/mem_os/client.py +5 -0
  53. memos/mem_os/core.py +819 -0
  54. memos/mem_os/main.py +12 -0
  55. memos/mem_os/product.py +89 -0
  56. memos/mem_reader/__init__.py +0 -0
  57. memos/mem_reader/base.py +27 -0
  58. memos/mem_reader/factory.py +21 -0
  59. memos/mem_reader/memory.py +298 -0
  60. memos/mem_reader/simple_struct.py +241 -0
  61. memos/mem_scheduler/__init__.py +0 -0
  62. memos/mem_scheduler/base_scheduler.py +164 -0
  63. memos/mem_scheduler/general_scheduler.py +305 -0
  64. memos/mem_scheduler/modules/__init__.py +0 -0
  65. memos/mem_scheduler/modules/base.py +74 -0
  66. memos/mem_scheduler/modules/dispatcher.py +103 -0
  67. memos/mem_scheduler/modules/monitor.py +82 -0
  68. memos/mem_scheduler/modules/redis_service.py +146 -0
  69. memos/mem_scheduler/modules/retriever.py +41 -0
  70. memos/mem_scheduler/modules/schemas.py +146 -0
  71. memos/mem_scheduler/scheduler_factory.py +21 -0
  72. memos/mem_scheduler/utils.py +26 -0
  73. memos/mem_user/user_manager.py +478 -0
  74. memos/memories/__init__.py +0 -0
  75. memos/memories/activation/__init__.py +0 -0
  76. memos/memories/activation/base.py +42 -0
  77. memos/memories/activation/item.py +25 -0
  78. memos/memories/activation/kv.py +232 -0
  79. memos/memories/base.py +19 -0
  80. memos/memories/factory.py +34 -0
  81. memos/memories/parametric/__init__.py +0 -0
  82. memos/memories/parametric/base.py +19 -0
  83. memos/memories/parametric/item.py +11 -0
  84. memos/memories/parametric/lora.py +41 -0
  85. memos/memories/textual/__init__.py +0 -0
  86. memos/memories/textual/base.py +89 -0
  87. memos/memories/textual/general.py +286 -0
  88. memos/memories/textual/item.py +167 -0
  89. memos/memories/textual/naive.py +185 -0
  90. memos/memories/textual/tree.py +289 -0
  91. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  92. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  93. memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
  94. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  95. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +64 -0
  96. memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
  97. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  98. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
  99. memos/memories/textual/tree_text_memory/retrieve/searcher.py +166 -0
  100. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
  101. memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
  102. memos/parsers/__init__.py +0 -0
  103. memos/parsers/base.py +15 -0
  104. memos/parsers/factory.py +19 -0
  105. memos/parsers/markitdown.py +22 -0
  106. memos/settings.py +8 -0
  107. memos/templates/__init__.py +0 -0
  108. memos/templates/mem_reader_prompts.py +98 -0
  109. memos/templates/mem_scheduler_prompts.py +65 -0
  110. memos/types.py +55 -0
  111. memos/vec_dbs/__init__.py +0 -0
  112. memos/vec_dbs/base.py +105 -0
  113. memos/vec_dbs/factory.py +21 -0
  114. memos/vec_dbs/item.py +43 -0
  115. memos/vec_dbs/qdrant.py +292 -0
  116. memoryos-0.0.1.dist-info/METADATA +0 -53
  117. memoryos-0.0.1.dist-info/RECORD +0 -5
  118. {memoryos-0.0.1.dist-info → memoryos-0.1.12.dist-info}/LICENSE +0 -0
  119. {memoryos-0.0.1.dist-info → memoryos-0.1.12.dist-info}/WHEEL +0 -0
@@ -0,0 +1,158 @@
1
+ import concurrent.futures
2
+
3
+ from memos.embedders.factory import OllamaEmbedder
4
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
5
+ from memos.memories.textual.item import TextualMemoryItem
6
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
+
8
+
9
+ class GraphMemoryRetriever:
10
+ """
11
+ Unified memory retriever that combines both graph-based and vector-based retrieval logic.
12
+ """
13
+
14
+ def __init__(self, graph_store: Neo4jGraphDB, embedder: OllamaEmbedder):
15
+ self.graph_store = graph_store
16
+ self.embedder = embedder
17
+
18
+ def retrieve(
19
+ self,
20
+ query: str,
21
+ parsed_goal: ParsedTaskGoal,
22
+ top_k: int,
23
+ memory_scope: str,
24
+ query_embedding: list[list[float]] | None = None,
25
+ ) -> list[TextualMemoryItem]:
26
+ """
27
+ Perform hybrid memory retrieval:
28
+ - Run graph-based lookup from dispatch plan.
29
+ - Run vector similarity search from embedded query.
30
+ - Merge and return combined result set.
31
+
32
+ Args:
33
+ query (str): Original task query.
34
+ parsed_goal (dict): parsed_goal.
35
+ top_k (int): Number of candidates to return.
36
+ memory_scope (str): One of ['working', 'long_term', 'user'].
37
+ query_embedding(list of embedding): list of embedding of query
38
+
39
+ Returns:
40
+ list: Combined memory items.
41
+ """
42
+ if memory_scope not in ["WorkingMemory", "LongTermMemory", "UserMemory"]:
43
+ raise ValueError(f"Unsupported memory scope: {memory_scope}")
44
+
45
+ if memory_scope == "WorkingMemory":
46
+ # For working memory, retrieve all entries (no filtering)
47
+ working_memories = self.graph_store.get_all_memory_items(scope="WorkingMemory")
48
+ return [TextualMemoryItem.from_dict(record) for record in working_memories]
49
+
50
+ # Step 1: Structured graph-based retrieval
51
+ graph_results = self._graph_recall(parsed_goal, memory_scope)
52
+
53
+ # Step 2: Vector similarity search
54
+ vector_results = self._vector_recall(query_embedding, memory_scope, top_k)
55
+
56
+ # Step 3: Merge and deduplicate results
57
+ combined = {item.id: item for item in graph_results + vector_results}
58
+
59
+ # Debug: 打印在 graph_results 中但不在 combined 中的 id
60
+ graph_ids = {item.id for item in graph_results}
61
+ combined_ids = set(combined.keys())
62
+ lost_ids = graph_ids - combined_ids
63
+
64
+ if lost_ids:
65
+ print(
66
+ f"[DEBUG] The following nodes were in graph_results but missing in combined: {lost_ids}"
67
+ )
68
+
69
+ return list(combined.values())
70
+
71
+ def _graph_recall(
72
+ self, parsed_goal: ParsedTaskGoal, memory_scope: str
73
+ ) -> list[TextualMemoryItem]:
74
+ """
75
+ Perform structured node-based retrieval from Neo4j.
76
+ - keys must match exactly (n.key IN keys)
77
+ - tags must overlap with at least 2 input tags
78
+ - scope filters by memory_type if provided
79
+ """
80
+ candidate_ids = set()
81
+
82
+ # 1) key-based OR branch
83
+ if parsed_goal.keys:
84
+ key_filters = [
85
+ {"field": "key", "op": "in", "value": parsed_goal.keys},
86
+ {"field": "memory_type", "op": "=", "value": memory_scope},
87
+ ]
88
+ key_ids = self.graph_store.get_by_metadata(key_filters)
89
+ candidate_ids.update(key_ids)
90
+
91
+ # 2) tag-based OR branch
92
+ if parsed_goal.tags:
93
+ tag_filters = [
94
+ {"field": "tags", "op": "contains", "value": parsed_goal.tags},
95
+ {"field": "memory_type", "op": "=", "value": memory_scope},
96
+ ]
97
+ tag_ids = self.graph_store.get_by_metadata(tag_filters)
98
+ candidate_ids.update(tag_ids)
99
+
100
+ # No matches → return empty
101
+ if not candidate_ids:
102
+ return []
103
+
104
+ # Load nodes and post-filter
105
+ node_dicts = self.graph_store.get_nodes(list(candidate_ids))
106
+
107
+ final_nodes = []
108
+ for node in node_dicts:
109
+ meta = node.get("metadata", {})
110
+ node_key = meta.get("key")
111
+ node_tags = meta.get("tags", []) or []
112
+
113
+ keep = False
114
+ # key equals to node_key
115
+ if parsed_goal.keys and node_key in parsed_goal.keys:
116
+ keep = True
117
+ # overlap tags more than 2
118
+ elif parsed_goal.tags:
119
+ overlap = len(set(node_tags) & set(parsed_goal.tags))
120
+ if overlap >= 2:
121
+ keep = True
122
+ if keep:
123
+ final_nodes.append(TextualMemoryItem.from_dict(node))
124
+ return final_nodes
125
+
126
+ def _vector_recall(
127
+ self,
128
+ query_embedding: list[list[float]],
129
+ memory_scope: str,
130
+ top_k: int = 20,
131
+ max_num: int = 5,
132
+ ) -> list[TextualMemoryItem]:
133
+ """
134
+ # TODO: tackle with post-filter and pre-filter(5.18+) better.
135
+ Perform vector-based similarity retrieval using query embedding.
136
+ """
137
+ all_matches = []
138
+
139
+ def search_single(vec):
140
+ return (
141
+ self.graph_store.search_by_embedding(vector=vec, top_k=top_k, scope=memory_scope)
142
+ or []
143
+ )
144
+
145
+ with concurrent.futures.ThreadPoolExecutor() as executor:
146
+ futures = [executor.submit(search_single, vec) for vec in query_embedding[:max_num]]
147
+ for future in concurrent.futures.as_completed(futures):
148
+ result = future.result()
149
+ all_matches.extend(result)
150
+
151
+ if not all_matches:
152
+ return []
153
+
154
+ # Step 3: Extract matched IDs and retrieve full nodes
155
+ unique_ids = set({r["id"] for r in all_matches})
156
+ node_dicts = self.graph_store.get_nodes(list(unique_ids))
157
+
158
+ return [TextualMemoryItem.from_dict(record) for record in node_dicts]
@@ -0,0 +1,111 @@
1
+ import numpy as np
2
+
3
+ from memos.embedders.factory import OllamaEmbedder
4
+ from memos.llms.factory import OllamaLLM, OpenAILLM
5
+ from memos.memories.textual.item import TextualMemoryItem
6
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
+
8
+
9
+ def batch_cosine_similarity(
10
+ query_vec: list[float], candidate_vecs: list[list[float]]
11
+ ) -> list[float]:
12
+ """
13
+ Compute cosine similarity between a single query vector and multiple candidate vectors using NumPy.
14
+
15
+ Args:
16
+ query_vec (list[float]): The query embedding.
17
+ candidate_vecs (list[list[float]]): A list of memory embeddings.
18
+
19
+ Returns:
20
+ list[float]: Cosine similarity scores for each candidate.
21
+ """
22
+ query = np.array(query_vec)
23
+ candidates = np.array(candidate_vecs)
24
+
25
+ # Normalize query and candidates
26
+ query_norm = np.linalg.norm(query)
27
+ candidates_norm = np.linalg.norm(candidates, axis=1)
28
+
29
+ # Compute dot products
30
+ dot_products = np.dot(candidates, query)
31
+
32
+ # Avoid division by zero
33
+ eps = 1e-10
34
+ similarities = dot_products / (candidates_norm * query_norm + eps)
35
+
36
+ return similarities.tolist()
37
+
38
+
39
+ class MemoryReranker:
40
+ """
41
+ Rank retrieved memory cards by structural priority and contextual similarity.
42
+ """
43
+
44
+ def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
45
+ self.llm = llm
46
+ self.embedder = embedder
47
+
48
+ # Structural priority weights
49
+ self.level_weights = {
50
+ "topic": 1.0,
51
+ "concept": 1.0,
52
+ "fact": 1.0,
53
+ }
54
+
55
+ def rerank(
56
+ self,
57
+ query: str,
58
+ query_embedding: list[float],
59
+ graph_results: list,
60
+ top_k: int,
61
+ parsed_goal: ParsedTaskGoal,
62
+ ) -> list[tuple[TextualMemoryItem, float]]:
63
+ """
64
+ Rerank memory items by relevance to task.
65
+
66
+ Args:
67
+ query (str): Original task.
68
+ query_embedding(list[float]): embedding of query
69
+ graph_results (list): Combined retrieval results.
70
+ top_k (int): Number of top results to return.
71
+ parsed_goal (dict): Structured task representation.
72
+
73
+ Returns:
74
+ list(tuple): Ranked list of memory items with similarity score.
75
+ """
76
+ # Step 1: Filter out items without embeddings
77
+ items_with_embeddings = [item for item in graph_results if item.metadata.embedding]
78
+ embeddings = [item.metadata.embedding for item in items_with_embeddings]
79
+
80
+ if not embeddings:
81
+ return graph_results[:top_k]
82
+
83
+ # Step 2: Compute cosine similarities
84
+ similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
85
+
86
+ # Step 3: Apply structural weight boost
87
+ def get_weight(item: TextualMemoryItem) -> float:
88
+ level = item.metadata.background
89
+ return self.level_weights.get(level, 1.0)
90
+
91
+ weighted_scores = [
92
+ sim * get_weight(item)
93
+ for sim, item in zip(similarity_scores, items_with_embeddings, strict=False)
94
+ ]
95
+
96
+ # Step 4: Sort by weighted score
97
+ sorted_items = sorted(
98
+ zip(items_with_embeddings, weighted_scores, strict=False),
99
+ key=lambda pair: pair[1],
100
+ reverse=True,
101
+ )
102
+
103
+ # Step 5: Return top-k items with fallback
104
+ top_items = sorted_items[:top_k]
105
+
106
+ if len(top_items) < top_k:
107
+ selected_items = [item for item, _ in top_items]
108
+ remaining = [(item, -1.0) for item in graph_results if item not in selected_items]
109
+ top_items.extend(remaining[: top_k - len(top_items)])
110
+
111
+ return top_items # list of (item, score)
@@ -0,0 +1,13 @@
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class ParsedTaskGoal:
6
+ """
7
+ Goal structure for both Fast & LLM.
8
+ """
9
+
10
+ memories: list[str] = field(default_factory=list)
11
+ keys: list[str] = field(default_factory=list)
12
+ tags: list[str] = field(default_factory=list)
13
+ goal_type: str | None = None # e.g., 'default', 'explanation', etc.
@@ -0,0 +1,166 @@
1
+ import concurrent.futures
2
+ import json
3
+
4
+ from datetime import datetime
5
+
6
+ from memos.embedders.factory import OllamaEmbedder
7
+ from memos.graph_dbs.factory import Neo4jGraphDB
8
+ from memos.llms.factory import OllamaLLM, OpenAILLM
9
+ from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
10
+
11
+ from .reasoner import MemoryReasoner
12
+ from .recall import GraphMemoryRetriever
13
+ from .reranker import MemoryReranker
14
+ from .task_goal_parser import TaskGoalParser
15
+
16
+
17
+ class Searcher:
18
+ def __init__(
19
+ self,
20
+ dispatcher_llm: OpenAILLM | OllamaLLM,
21
+ graph_store: Neo4jGraphDB,
22
+ embedder: OllamaEmbedder,
23
+ ):
24
+ self.graph_store = graph_store
25
+ self.embedder = embedder
26
+
27
+ self.task_goal_parser = TaskGoalParser(dispatcher_llm)
28
+ self.graph_retriever = GraphMemoryRetriever(self.graph_store, self.embedder)
29
+ self.reranker = MemoryReranker(dispatcher_llm, self.embedder)
30
+ self.reasoner = MemoryReasoner(dispatcher_llm)
31
+
32
+ def search(
33
+ self, query: str, top_k: int, info=None, mode: str = "fast", memory_type: str = "All"
34
+ ) -> list[TextualMemoryItem]:
35
+ """
36
+ Search for memories based on a query.
37
+ User query -> TaskGoalParser -> GraphMemoryRetriever ->
38
+ MemoryReranker -> MemoryReasoner -> Final output
39
+ Args:
40
+ query (str): The query to search for.
41
+ top_k (int): The number of top results to return.
42
+ info (dict): Leave a record of memory consumption.
43
+ mode (str, optional): The mode of the search.
44
+ - 'fast': Uses a faster search process, sacrificing some precision for speed.
45
+ - 'fine': Uses a more detailed search process, invoking large models for higher precision, but slower performance.
46
+ memory_type (str): Type restriction for search.
47
+ ['All', 'WorkingMemory', 'LongTermMemory', 'UserMemory']
48
+ Returns:
49
+ list[TextualMemoryItem]: List of matching memories.
50
+ """
51
+
52
+ # Step 1: Parse task structure into topic, concept, and fact levels
53
+ parsed_goal = self.task_goal_parser.parse(query)
54
+
55
+ if parsed_goal.memories:
56
+ query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
57
+
58
+ # Step 2a: Working memory retrieval (Path A)
59
+ def retrieve_from_working_memory():
60
+ """
61
+ Direct structure-based retrieval from working memory.
62
+ """
63
+ if memory_type not in ["All", "WorkingMemory"]:
64
+ return []
65
+
66
+ working_memory = self.graph_retriever.retrieve(
67
+ query=query, parsed_goal=parsed_goal, top_k=top_k, memory_scope="WorkingMemory"
68
+ )
69
+ # Rerank working_memory results
70
+ ranked_memories = self.reranker.rerank(
71
+ query=query,
72
+ query_embedding=query_embedding[0],
73
+ graph_results=working_memory,
74
+ top_k=top_k,
75
+ parsed_goal=parsed_goal,
76
+ )
77
+ return ranked_memories
78
+
79
+ # Step 2b: Parallel long-term and user memory retrieval (Path B)
80
+ def retrieve_ranked_long_term_and_user():
81
+ """
82
+ Retrieve from both long-term and user memory, then rank and merge results.
83
+ """
84
+ long_term_items = (
85
+ self.graph_retriever.retrieve(
86
+ query=query,
87
+ query_embedding=query_embedding,
88
+ parsed_goal=parsed_goal,
89
+ top_k=top_k * 2,
90
+ memory_scope="LongTermMemory",
91
+ )
92
+ if memory_type in ["All", "LongTermMemory"]
93
+ else []
94
+ )
95
+ user_items = (
96
+ self.graph_retriever.retrieve(
97
+ query=query,
98
+ query_embedding=query_embedding,
99
+ parsed_goal=parsed_goal,
100
+ top_k=top_k * 2,
101
+ memory_scope="UserMemory",
102
+ )
103
+ if memory_type in ["All", "UserMemory"]
104
+ else []
105
+ )
106
+
107
+ # Rerank combined results
108
+ ranked_memories = self.reranker.rerank(
109
+ query=query,
110
+ query_embedding=query_embedding[0],
111
+ graph_results=long_term_items + user_items,
112
+ top_k=top_k * 2,
113
+ parsed_goal=parsed_goal,
114
+ )
115
+ return ranked_memories
116
+
117
+ # Step 3: Parallel execution of both paths
118
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
119
+ future_working = executor.submit(retrieve_from_working_memory)
120
+ future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
121
+
122
+ working_results = future_working.result()
123
+ hybrid_results = future_hybrid.result()
124
+ searched_res = working_results + hybrid_results
125
+
126
+ # Deduplicate by item.memory, keep higher score
127
+ deduped_result = {}
128
+ for item, score in searched_res:
129
+ mem_key = item.memory
130
+ if mem_key not in deduped_result or score > deduped_result[mem_key][1]:
131
+ deduped_result[mem_key] = (item, score)
132
+
133
+ searched_res = []
134
+ for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
135
+ :top_k
136
+ ]:
137
+ new_meta = SearchedTreeNodeTextualMemoryMetadata(
138
+ **item.metadata.model_dump(), relativity=score
139
+ )
140
+ searched_res.append(
141
+ TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
142
+ )
143
+
144
+ # Step 4: Reasoning over all retrieved and ranked memory
145
+ if mode == "fine":
146
+ searched_res = self.reasoner.reason(
147
+ query=query,
148
+ ranked_memories=searched_res,
149
+ parsed_goal=parsed_goal,
150
+ )
151
+
152
+ # Step 5: Update usage history with current timestamp
153
+ now_time = datetime.now().isoformat()
154
+ usage_record = json.dumps(
155
+ {"time": now_time, "info": info}
156
+ ) # `info` should be a serializable dict or string
157
+
158
+ for item in searched_res:
159
+ if (
160
+ hasattr(item, "id")
161
+ and hasattr(item, "metadata")
162
+ and hasattr(item.metadata, "usage")
163
+ ):
164
+ item.metadata.usage.append(usage_record)
165
+ self.graph_store.update_node(item.id, {"usage": item.metadata.usage})
166
+ return searched_res
@@ -0,0 +1,68 @@
1
+ import json
2
+
3
+ from string import Template
4
+
5
+ from memos.llms.base import BaseLLM
6
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
+ from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT
8
+
9
+
10
+ class TaskGoalParser:
11
+ """
12
+ Unified TaskGoalParser:
13
+ - mode == 'fast': directly use origin task_description
14
+ - mode == 'fine': use LLM to parse structured topic/keys/tags
15
+ """
16
+
17
+ def __init__(self, llm=BaseLLM, mode: str = "fast"):
18
+ self.llm = llm
19
+ self.mode = mode
20
+
21
+ def parse(self, task_description: str, context: str = "") -> ParsedTaskGoal:
22
+ """
23
+ Parse user input into structured semantic layers.
24
+ Returns:
25
+ ParsedTaskGoal: object containing topic/concept/fact levels and optional metadata
26
+ - mode == 'fast': use jieba to split words only
27
+ - mode == 'fine': use LLM to parse structured topic/keys/tags
28
+ """
29
+ if self.mode == "fast":
30
+ return self._parse_fast(task_description)
31
+ elif self.mode == "fine":
32
+ if not self.llm:
33
+ raise ValueError("LLM not provided for slow mode.")
34
+ return self._parse_fine(task_description)
35
+ else:
36
+ raise ValueError(f"Unknown mode: {self.mode}")
37
+
38
+ def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal:
39
+ """
40
+ Fast mode: simple jieba word split.
41
+ """
42
+ return ParsedTaskGoal(
43
+ memories=[task_description], keys=[task_description], tags=[], goal_type="default"
44
+ )
45
+
46
+ def _parse_fine(self, query: str) -> ParsedTaskGoal:
47
+ """
48
+ Slow mode: LLM structured parse.
49
+ """
50
+ prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context="")
51
+ response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
52
+ return self._parse_response(response)
53
+
54
+ def _parse_response(self, response: str) -> ParsedTaskGoal:
55
+ """
56
+ Parse LLM JSON output safely.
57
+ """
58
+ try:
59
+ response = response.replace("```", "").replace("json", "")
60
+ response_json = json.loads(response.strip())
61
+ return ParsedTaskGoal(
62
+ memories=response_json.get("memories", []),
63
+ keys=response_json.get("keys", []),
64
+ tags=response_json.get("tags", []),
65
+ goal_type=response_json.get("goal_type", "default"),
66
+ )
67
+ except Exception as e:
68
+ raise ValueError(f"Failed to parse LLM output: {e}\nRaw response:\n{response}") from e
@@ -0,0 +1,48 @@
1
+ # Prompt for task parsing
2
+ TASK_PARSE_PROMPT = """
3
+ You are a task parsing expert. Given a user's task instruction, extract the following structured information:
4
+
5
+ Given a user task instruction and optional related memory context,
6
+ extract the following structured information:
7
+ 1. Keys: the high-level keywords directly relevant to the user’s task.
8
+ 2. Tags: thematic tags to help categorize and retrieve related memories.
9
+ 3. Goal Type: retrieval | qa | generation
10
+ 4. Memories: Provide 2–5 short semantic expansions or rephrasings of the task instruction.
11
+ These are used for improved embedding search coverage.
12
+ Each should be clear, concise, and meaningful for retrieval.
13
+
14
+ Task description:
15
+ \"\"\"$task\"\"\"
16
+
17
+ Context (if any):
18
+ \"\"\"$context\"\"\"
19
+
20
+ Return strictly in this JSON format:
21
+ {
22
+ "keys": [...],
23
+ "tags": [...],
24
+ "goal_type": "retrieval | qa | generation",
25
+ "memories": ["...", "...", ...]
26
+ }
27
+ """
28
+
29
+
30
+ REASON_PROMPT = """
31
+ You are a reasoning agent working with a memory system. You will synthesize knowledge from multiple memory cards to construct a meaningful response to the task below.
32
+
33
+ Task: ${task}
34
+
35
+ Memory cards (with metadata):
36
+ ${detailed_memory_list}
37
+
38
+ Please perform:
39
+ 1. Clustering by theme (topic/concept/fact)
40
+ 2. Identify useful chains or connections
41
+ 3. Return a curated list of memory card IDs with reasons.
42
+
43
+ Output in JSON:
44
+ {
45
+ "selected_ids": [...],
46
+ "explanation": "..."
47
+ }
48
+ """
File without changes
memos/parsers/base.py ADDED
@@ -0,0 +1,15 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from memos.configs.parser import BaseParserConfig
4
+
5
+
6
+ class BaseParser(ABC):
7
+ """Base class for all parsers."""
8
+
9
+ @abstractmethod
10
+ def __init__(self, config: BaseParserConfig):
11
+ """Initialize the parser with the given configuration."""
12
+
13
+ @abstractmethod
14
+ def parse(self, file_path: str) -> str:
15
+ """Parse the file at the given path and return its content as a string."""
@@ -0,0 +1,19 @@
1
+ from typing import Any, ClassVar
2
+
3
+ from memos.configs.parser import ParserConfigFactory
4
+ from memos.parsers.base import BaseParser
5
+ from memos.parsers.markitdown import MarkItDownParser
6
+
7
+
8
+ class ParserFactory(BaseParser):
9
+ """Factory class for creating Parser instances."""
10
+
11
+ backend_to_class: ClassVar[dict[str, Any]] = {"markitdown": MarkItDownParser}
12
+
13
+ @classmethod
14
+ def from_config(cls, config_factory: ParserConfigFactory) -> BaseParser:
15
+ backend = config_factory.backend
16
+ if backend not in cls.backend_to_class:
17
+ raise ValueError(f"Invalid backend: {backend}")
18
+ parser_class = cls.backend_to_class[backend]
19
+ return parser_class(config_factory.config)
@@ -0,0 +1,22 @@
1
+ from markitdown import MarkItDown
2
+
3
+ from memos.configs.parser import MarkItDownParserConfig
4
+ from memos.log import get_logger
5
+ from memos.parsers.base import BaseParser
6
+
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ class MarkItDownParser(BaseParser):
12
+ """MarkItDown Parser class."""
13
+
14
+ def __init__(self, config: MarkItDownParserConfig):
15
+ self.config = config
16
+
17
+ def parse(self, file_path: str) -> str:
18
+ """Parse the file at the given path and return its content as a MarkDown string."""
19
+ md = MarkItDown(enable_plugins=False)
20
+ result = md.convert(file_path)
21
+
22
+ return result.text_content
memos/settings.py ADDED
@@ -0,0 +1,8 @@
1
+ from pathlib import Path
2
+
3
+
4
+ MEMOS_DIR = Path.cwd() / ".memos"
5
+ DEBUG = False
6
+
7
+ # "memos" or "memos.submodules" ... to filter logs from specific packages
8
+ LOG_FILTER_TREE_PREFIX = ""
File without changes