MemoryOS 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (92) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/METADATA +7 -1
  2. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/RECORD +87 -64
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +158 -69
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +101 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +19 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +54 -18
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +38 -3
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1612 -0
  18. memos/graph_dbs/neo4j.py +18 -9
  19. memos/log.py +6 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +157 -37
  22. memos/mem_os/main.py +2 -2
  23. memos/mem_os/product.py +252 -201
  24. memos/mem_os/utils/default_config.py +1 -1
  25. memos/mem_os/utils/format_utils.py +281 -70
  26. memos/mem_os/utils/reference_utils.py +133 -0
  27. memos/mem_reader/simple_struct.py +13 -5
  28. memos/mem_scheduler/base_scheduler.py +239 -266
  29. memos/mem_scheduler/{modules → general_modules}/base.py +4 -5
  30. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +57 -21
  31. memos/mem_scheduler/general_modules/misc.py +104 -0
  32. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +12 -10
  33. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  34. memos/mem_scheduler/general_modules/retriever.py +199 -0
  35. memos/mem_scheduler/general_modules/scheduler_logger.py +261 -0
  36. memos/mem_scheduler/general_scheduler.py +243 -80
  37. memos/mem_scheduler/monitors/__init__.py +0 -0
  38. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  39. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +106 -57
  40. memos/mem_scheduler/mos_for_test_scheduler.py +23 -20
  41. memos/mem_scheduler/schemas/__init__.py +0 -0
  42. memos/mem_scheduler/schemas/general_schemas.py +44 -0
  43. memos/mem_scheduler/schemas/message_schemas.py +149 -0
  44. memos/mem_scheduler/schemas/monitor_schemas.py +337 -0
  45. memos/mem_scheduler/utils/__init__.py +0 -0
  46. memos/mem_scheduler/utils/filter_utils.py +176 -0
  47. memos/mem_scheduler/utils/misc_utils.py +102 -0
  48. memos/mem_user/factory.py +94 -0
  49. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  50. memos/mem_user/mysql_user_manager.py +500 -0
  51. memos/mem_user/persistent_factory.py +96 -0
  52. memos/mem_user/user_manager.py +4 -4
  53. memos/memories/activation/item.py +5 -1
  54. memos/memories/activation/kv.py +20 -8
  55. memos/memories/textual/base.py +2 -2
  56. memos/memories/textual/general.py +36 -92
  57. memos/memories/textual/item.py +5 -33
  58. memos/memories/textual/tree.py +13 -7
  59. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +34 -50
  60. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  61. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +49 -43
  62. memos/memories/textual/tree_text_memory/organize/reorganizer.py +107 -142
  63. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  64. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  65. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +11 -0
  66. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  67. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  68. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  69. memos/memories/textual/tree_text_memory/retrieve/searcher.py +191 -116
  70. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +47 -15
  71. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  72. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  73. memos/memos_tools/dinding_report_bot.py +422 -0
  74. memos/memos_tools/lockfree_dict.py +120 -0
  75. memos/memos_tools/notification_service.py +44 -0
  76. memos/memos_tools/notification_utils.py +96 -0
  77. memos/memos_tools/thread_safe_dict.py +288 -0
  78. memos/settings.py +3 -1
  79. memos/templates/mem_reader_prompts.py +4 -1
  80. memos/templates/mem_scheduler_prompts.py +62 -15
  81. memos/templates/mos_prompts.py +116 -0
  82. memos/templates/tree_reorganize_prompts.py +24 -17
  83. memos/utils.py +19 -0
  84. memos/mem_scheduler/modules/misc.py +0 -39
  85. memos/mem_scheduler/modules/retriever.py +0 -268
  86. memos/mem_scheduler/modules/schemas.py +0 -328
  87. memos/mem_scheduler/utils.py +0 -75
  88. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  89. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  90. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  91. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  92. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
@@ -1,328 +0,0 @@
1
- import json
2
-
3
- from datetime import datetime
4
- from pathlib import Path
5
- from typing import ClassVar, NewType, TypeVar
6
- from uuid import uuid4
7
-
8
- from pydantic import BaseModel, Field, computed_field
9
- from typing_extensions import TypedDict
10
-
11
- from memos.log import get_logger
12
- from memos.mem_cube.general import GeneralMemCube
13
-
14
-
15
- logger = get_logger(__name__)
16
-
17
-
18
- FILE_PATH = Path(__file__).absolute()
19
- BASE_DIR = FILE_PATH.parent.parent.parent.parent.parent
20
-
21
- QUERY_LABEL = "query"
22
- ANSWER_LABEL = "answer"
23
- ADD_LABEL = "add"
24
-
25
- TreeTextMemory_SEARCH_METHOD = "tree_text_memory_search"
26
- TextMemory_SEARCH_METHOD = "text_memory_search"
27
- DIRECT_EXCHANGE_TYPE = "direct"
28
- FANOUT_EXCHANGE_TYPE = "fanout"
29
- DEFAULT_WORKING_MEM_MONITOR_SIZE_LIMIT = 20
30
- DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT = 5
31
- DEFAULT_ACT_MEM_DUMP_PATH = f"{BASE_DIR}/outputs/mem_scheduler/mem_cube_scheduler_test.kv_cache"
32
- DEFAULT_THREAD__POOL_MAX_WORKERS = 5
33
- DEFAULT_CONSUME_INTERVAL_SECONDS = 3
34
- NOT_INITIALIZED = -1
35
- BaseModelType = TypeVar("T", bound="BaseModel")
36
-
37
- # web log
38
- LONG_TERM_MEMORY_TYPE = "LongTermMemory"
39
- USER_MEMORY_TYPE = "UserMemory"
40
- WORKING_MEMORY_TYPE = "WorkingMemory"
41
- TEXT_MEMORY_TYPE = "TextMemory"
42
- ACTIVATION_MEMORY_TYPE = "ActivationMemory"
43
- PARAMETER_MEMORY_TYPE = "ParameterMemory"
44
- USER_INPUT_TYPE = "UserInput"
45
- NOT_APPLICABLE_TYPE = "NotApplicable"
46
-
47
- # monitors
48
- MONITOR_WORKING_MEMORY_TYPE = "MonitorWorkingMemoryType"
49
- MONITOR_ACTIVATION_MEMORY_TYPE = "MonitorActivationMemoryType"
50
-
51
-
52
- # new types
53
- UserID = NewType("UserID", str)
54
- MemCubeID = NewType("CubeID", str)
55
-
56
-
57
- # ************************* Public *************************
58
- class DictConversionMixin:
59
- def to_dict(self) -> dict:
60
- """Convert the instance to a dictionary."""
61
- return {
62
- **self.model_dump(), # 替换 self.dict()
63
- "timestamp": self.timestamp.isoformat() if hasattr(self, "timestamp") else None,
64
- }
65
-
66
- @classmethod
67
- def from_dict(cls: type[BaseModelType], data: dict) -> BaseModelType:
68
- """Create an instance from a dictionary."""
69
- if "timestamp" in data:
70
- data["timestamp"] = datetime.fromisoformat(data["timestamp"])
71
- return cls(**data)
72
-
73
- def __str__(self) -> str:
74
- """Convert the instance to a JSON string with indentation of 4 spaces.
75
- This will be used when str() or print() is called on the instance.
76
-
77
- Returns:
78
- str: A JSON string representation of the instance with 4-space indentation.
79
- """
80
- return json.dumps(
81
- self.to_dict(),
82
- indent=4,
83
- ensure_ascii=False,
84
- default=str, # 处理无法序列化的对象
85
- )
86
-
87
- class Config:
88
- json_encoders: ClassVar[dict[type, object]] = {datetime: lambda v: v.isoformat()}
89
-
90
-
91
- # ************************* Messages *************************
92
- class ScheduleMessageItem(BaseModel, DictConversionMixin):
93
- item_id: str = Field(description="uuid", default_factory=lambda: str(uuid4()))
94
- user_id: str = Field(..., description="user id")
95
- mem_cube_id: str = Field(..., description="memcube id")
96
- label: str = Field(..., description="Label of the schedule message")
97
- mem_cube: GeneralMemCube | str = Field(..., description="memcube for schedule")
98
- content: str = Field(..., description="Content of the schedule message")
99
- timestamp: datetime = Field(
100
- default_factory=datetime.now, description="submit time for schedule_messages"
101
- )
102
-
103
- class Config:
104
- arbitrary_types_allowed = True
105
- json_encoders: ClassVar[dict[type, object]] = {
106
- datetime: lambda v: v.isoformat(),
107
- GeneralMemCube: lambda v: f"<GeneralMemCube:{id(v)}>",
108
- }
109
-
110
- def to_dict(self) -> dict:
111
- """Convert model to dictionary suitable for Redis Stream"""
112
- return {
113
- "item_id": self.item_id,
114
- "user_id": self.user_id,
115
- "cube_id": self.mem_cube_id,
116
- "label": self.label,
117
- "cube": "Not Applicable", # Custom cube serialization
118
- "content": self.content,
119
- "timestamp": self.timestamp.isoformat(),
120
- }
121
-
122
- @classmethod
123
- def from_dict(cls, data: dict) -> "ScheduleMessageItem":
124
- """Create model from Redis Stream dictionary"""
125
- return cls(
126
- item_id=data.get("item_id", str(uuid4())),
127
- user_id=data["user_id"],
128
- cube_id=data["cube_id"],
129
- label=data["label"],
130
- cube="Not Applicable", # Custom cube deserialization
131
- content=data["content"],
132
- timestamp=datetime.fromisoformat(data["timestamp"]),
133
- )
134
-
135
-
136
- class MemorySizes(TypedDict):
137
- long_term_memory_size: int
138
- user_memory_size: int
139
- working_memory_size: int
140
- transformed_act_memory_size: int
141
-
142
-
143
- class MemoryCapacities(TypedDict):
144
- long_term_memory_capacity: int
145
- user_memory_capacity: int
146
- working_memory_capacity: int
147
- transformed_act_memory_capacity: int
148
-
149
-
150
- DEFAULT_MEMORY_SIZES = {
151
- "long_term_memory_size": NOT_INITIALIZED,
152
- "user_memory_size": NOT_INITIALIZED,
153
- "working_memory_size": NOT_INITIALIZED,
154
- "transformed_act_memory_size": NOT_INITIALIZED,
155
- "parameter_memory_size": NOT_INITIALIZED,
156
- }
157
-
158
- DEFAULT_MEMORY_CAPACITIES = {
159
- "long_term_memory_capacity": 10000,
160
- "user_memory_capacity": 10000,
161
- "working_memory_capacity": 20,
162
- "transformed_act_memory_capacity": NOT_INITIALIZED,
163
- "parameter_memory_capacity": NOT_INITIALIZED,
164
- }
165
-
166
-
167
- class ScheduleLogForWebItem(BaseModel, DictConversionMixin):
168
- item_id: str = Field(
169
- description="Unique identifier for the log entry", default_factory=lambda: str(uuid4())
170
- )
171
- user_id: str = Field(..., description="Identifier for the user associated with the log")
172
- mem_cube_id: str = Field(
173
- ..., description="Identifier for the memcube associated with this log entry"
174
- )
175
- label: str = Field(..., description="Label categorizing the type of log")
176
- from_memory_type: str = Field(..., description="Source memory type")
177
- to_memory_type: str = Field(..., description="Destination memory type")
178
- log_content: str = Field(..., description="Detailed content of the log entry")
179
- current_memory_sizes: MemorySizes = Field(
180
- default_factory=lambda: dict(DEFAULT_MEMORY_SIZES),
181
- description="Current utilization of memory partitions",
182
- )
183
- memory_capacities: MemoryCapacities = Field(
184
- default_factory=lambda: dict(DEFAULT_MEMORY_CAPACITIES),
185
- description="Maximum capacities of memory partitions",
186
- )
187
- timestamp: datetime = Field(
188
- default_factory=datetime.now,
189
- description="Timestamp indicating when the log entry was created",
190
- )
191
-
192
-
193
- # ************************* Monitor *************************
194
- class MemoryMonitorItem(BaseModel, DictConversionMixin):
195
- item_id: str = Field(
196
- description="Unique identifier for the memory item", default_factory=lambda: str(uuid4())
197
- )
198
- memory_text: str = Field(
199
- ...,
200
- description="The actual content of the memory",
201
- min_length=1,
202
- max_length=10000, # Prevent excessively large memory texts
203
- )
204
- importance_score: float = Field(
205
- default=NOT_INITIALIZED,
206
- description="Numerical score representing the memory's importance",
207
- ge=NOT_INITIALIZED, # Minimum value of 0
208
- )
209
- recording_count: int = Field(
210
- default=1,
211
- description="How many times this memory has been recorded",
212
- ge=1, # Greater than or equal to 1
213
- )
214
-
215
- def get_score(self) -> float:
216
- """
217
- Calculate the effective score for the memory item.
218
-
219
- Returns:
220
- float: The importance_score if it has been initialized (>=0),
221
- otherwise the recording_count converted to float.
222
-
223
- Note:
224
- This method provides a unified way to retrieve a comparable score
225
- for memory items, regardless of whether their importance has been explicitly set.
226
- """
227
- if self.importance_score == NOT_INITIALIZED:
228
- # Return recording_count as float when importance_score is not initialized
229
- return float(self.recording_count)
230
- else:
231
- # Return the initialized importance_score
232
- return self.importance_score
233
-
234
-
235
- class MemoryMonitorManager(BaseModel, DictConversionMixin):
236
- user_id: str = Field(..., description="Required user identifier", min_length=1)
237
- mem_cube_id: str = Field(..., description="Required memory cube identifier", min_length=1)
238
- memories: list[MemoryMonitorItem] = Field(
239
- default_factory=list, description="Collection of memory items"
240
- )
241
- max_capacity: int | None = Field(
242
- default=None, description="Maximum number of memories allowed (None for unlimited)", ge=1
243
- )
244
-
245
- @computed_field
246
- @property
247
- def memory_size(self) -> int:
248
- """Automatically calculated count of memory items."""
249
- return len(self.memories)
250
-
251
- def update_memories(
252
- self, text_working_memories: list[str], partial_retention_number: int
253
- ) -> MemoryMonitorItem:
254
- """
255
- Update memories based on text_working_memories.
256
-
257
- Args:
258
- text_working_memories: List of memory texts to update
259
- partial_retention_number: Number of top memories to keep by recording count
260
-
261
- Returns:
262
- List of added or updated MemoryMonitorItem instances
263
- """
264
-
265
- # Validate partial_retention_number
266
- if partial_retention_number < 0:
267
- raise ValueError("partial_retention_number must be non-negative")
268
-
269
- # Create text lookup set
270
- working_memory_set = set(text_working_memories)
271
-
272
- # Step 1: Update existing memories or add new ones
273
- added_or_updated = []
274
- memory_text_map = {item.memory_text: item for item in self.memories}
275
-
276
- for text in text_working_memories:
277
- if text in memory_text_map:
278
- # Update existing memory
279
- memory = memory_text_map[text]
280
- memory.recording_count += 1
281
- added_or_updated.append(memory)
282
- else:
283
- # Add new memory
284
- new_memory = MemoryMonitorItem(memory_text=text, recording_count=1)
285
- self.memories.append(new_memory)
286
- added_or_updated.append(new_memory)
287
-
288
- # Step 2: Identify memories to remove
289
- # Sort memories by recording_count in descending order
290
- sorted_memories = sorted(self.memories, key=lambda item: item.recording_count, reverse=True)
291
-
292
- # Keep the top N memories by recording_count
293
- records_to_keep = {
294
- memory.memory_text for memory in sorted_memories[:partial_retention_number]
295
- }
296
-
297
- # Collect memories to remove: not in current working memory and not in top N
298
- memories_to_remove = [
299
- memory
300
- for memory in self.memories
301
- if memory.memory_text not in working_memory_set
302
- and memory.memory_text not in records_to_keep
303
- ]
304
-
305
- # Step 3: Remove identified memories
306
- for memory in memories_to_remove:
307
- self.memories.remove(memory)
308
-
309
- # Step 4: Enforce max_capacity if set
310
- if self.max_capacity is not None and len(self.memories) > self.max_capacity:
311
- # Sort by importance and then recording count
312
- sorted_memories = sorted(
313
- self.memories,
314
- key=lambda item: (item.importance_score, item.recording_count),
315
- reverse=True,
316
- )
317
- # Keep only the top max_capacity memories
318
- self.memories = sorted_memories[: self.max_capacity]
319
-
320
- # Log the update result
321
- logger.info(
322
- f"Updated monitor manager for user {self.user_id}, mem_cube {self.mem_cube_id}: "
323
- f"Total memories: {len(self.memories)}, "
324
- f"Added/Updated: {len(added_or_updated)}, "
325
- f"Removed: {len(memories_to_remove)} (excluding top {partial_retention_number} by recording_count)"
326
- )
327
-
328
- return added_or_updated
@@ -1,75 +0,0 @@
1
- import json
2
- import re
3
-
4
- from pathlib import Path
5
-
6
- import yaml
7
-
8
-
9
- def extract_json_dict(text: str):
10
- text = text.strip()
11
- patterns_to_remove = ["json```", "```json", "latex```", "```latex", "```"]
12
- for pattern in patterns_to_remove:
13
- text = text.replace(pattern, "")
14
- res = json.loads(text.strip())
15
- return res
16
-
17
-
18
- def transform_name_to_key(name):
19
- """
20
- Normalize text by removing all punctuation marks, keeping only letters, numbers, and word characters.
21
-
22
- Args:
23
- name (str): Input text to be processed
24
-
25
- Returns:
26
- str: Processed text with all punctuation removed
27
- """
28
- # Match all characters that are NOT:
29
- # \w - word characters (letters, digits, underscore)
30
- # \u4e00-\u9fff - Chinese/Japanese/Korean characters
31
- # \s - whitespace
32
- pattern = r"[^\w\u4e00-\u9fff\s]"
33
-
34
- # Substitute all matched punctuation marks with empty string
35
- # re.UNICODE flag ensures proper handling of Unicode characters
36
- normalized = re.sub(pattern, "", name, flags=re.UNICODE)
37
-
38
- # Optional: Collapse multiple whitespaces into single space
39
- normalized = "_".join(normalized.split())
40
-
41
- normalized = normalized.lower()
42
-
43
- return normalized
44
-
45
-
46
- def parse_yaml(yaml_file):
47
- yaml_path = Path(yaml_file)
48
- yaml_path = Path(yaml_file)
49
- if not yaml_path.is_file():
50
- raise FileNotFoundError(f"No such file: {yaml_file}")
51
-
52
- with yaml_path.open("r", encoding="utf-8") as fr:
53
- data = yaml.safe_load(fr)
54
-
55
- return data
56
-
57
-
58
- def is_all_english(input_string: str) -> bool:
59
- """Determine if the string consists entirely of English characters (including spaces)"""
60
- return all(char.isascii() or char.isspace() for char in input_string)
61
-
62
-
63
- def is_all_chinese(input_string: str) -> bool:
64
- """Determine if the string consists entirely of Chinese characters (including Chinese punctuation and spaces)"""
65
- return all(
66
- ("\u4e00" <= char <= "\u9fff") # Basic Chinese characters
67
- or ("\u3400" <= char <= "\u4dbf") # Extension A
68
- or ("\u20000" <= char <= "\u2a6df") # Extension B
69
- or ("\u2a700" <= char <= "\u2b73f") # Extension C
70
- or ("\u2b740" <= char <= "\u2b81f") # Extension D
71
- or ("\u2b820" <= char <= "\u2ceaf") # Extension E
72
- or ("\u2f800" <= char <= "\u2fa1f") # Extension F
73
- or char.isspace() # Spaces
74
- for char in input_string
75
- )
@@ -1,193 +0,0 @@
1
- import json
2
- import re
3
-
4
- from datetime import datetime
5
-
6
- from memos.embedders.base import BaseEmbedder
7
- from memos.graph_dbs.neo4j import Neo4jGraphDB
8
- from memos.llms.base import BaseLLM
9
- from memos.log import get_logger
10
- from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
- from memos.templates.tree_reorganize_prompts import (
12
- REDUNDANCY_DETECTOR_PROMPT,
13
- REDUNDANCY_MERGE_PROMPT,
14
- REDUNDANCY_RESOLVER_PROMPT,
15
- )
16
-
17
-
18
- logger = get_logger(__name__)
19
-
20
-
21
- class RedundancyHandler:
22
- EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider redundancy
23
-
24
- def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
25
- self.graph_store = graph_store
26
- self.llm = llm
27
- self.embedder = embedder
28
-
29
- def detect(
30
- self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
31
- ) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
32
- """
33
- Detect redundancy by finding the most similar items in the graph database based on embedding, then use LLM to judge redundancy.
34
- Args:
35
- memory: The memory item (should have an embedding attribute or field).
36
- top_k: Number of top similar nodes to retrieve.
37
- scope: Optional memory type filter.
38
- Returns:
39
- List of redundancy pairs (each pair is a tuple: (memory, candidate)).
40
- """
41
- # 1. Search for similar memories based on embedding
42
- embedding = memory.metadata.embedding
43
- embedding_candidates_info = self.graph_store.search_by_embedding(
44
- embedding, top_k=top_k, scope=scope
45
- )
46
- # 2. Filter based on similarity threshold
47
- embedding_candidates_ids = [
48
- info["id"]
49
- for info in embedding_candidates_info
50
- if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
51
- ]
52
- # 3. Judge redundancys using LLM
53
- embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
54
- redundant_pairs = []
55
- for embedding_candidate in embedding_candidates:
56
- embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
57
- prompt = [
58
- {
59
- "role": "system",
60
- "content": "You are a redundancy detector for memory items.",
61
- },
62
- {
63
- "role": "user",
64
- "content": REDUNDANCY_DETECTOR_PROMPT.format(
65
- statement_1=memory.memory,
66
- statement_2=embedding_candidate.memory,
67
- ),
68
- },
69
- ]
70
- result = self.llm.generate(prompt).strip()
71
- if "yes" in result.lower():
72
- redundant_pairs.append([memory, embedding_candidate])
73
- if len(redundant_pairs):
74
- redundant_text = "\n".join(
75
- f'"{pair[0].memory!s}" <==REDUNDANCY==> "{pair[1].memory!s}"'
76
- for pair in redundant_pairs
77
- )
78
- logger.warning(
79
- f"Detected {len(redundant_pairs)} redundancies for memory {memory.id}\n {redundant_text}"
80
- )
81
- return redundant_pairs
82
-
83
- def resolve_two_nodes(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
84
- """
85
- Resolve detected redundancies between two memory items using LLM fusion.
86
- Args:
87
- memory_a: The first redundant memory item.
88
- memory_b: The second redundant memory item.
89
- Returns:
90
- A fused TextualMemoryItem representing the resolved memory.
91
- """
92
- return # waiting for implementation
93
- # ———————————— 1. LLM generate fused memory ————————————
94
- metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
95
- metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
96
- metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
97
- prompt = [
98
- {
99
- "role": "system",
100
- "content": "",
101
- },
102
- {
103
- "role": "user",
104
- "content": REDUNDANCY_RESOLVER_PROMPT.format(
105
- statement_1=memory_a.memory,
106
- metadata_1=metadata_1,
107
- statement_2=memory_b.memory,
108
- metadata_2=metadata_2,
109
- ),
110
- },
111
- ]
112
- response = self.llm.generate(prompt).strip()
113
-
114
- # ———————————— 2. Parse the response ————————————
115
- try:
116
- answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
117
- answer = answer.group(1).strip()
118
- fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
119
- merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
120
- logger.info(f"Resolved result: {merged_memory}")
121
- self._resolve_in_graph(memory_a, memory_b, merged_memory)
122
- except json.decoder.JSONDecodeError:
123
- logger.error(f"Failed to parse LLM response: {response}")
124
-
125
- def resolve_one_node(self, memory: TextualMemoryItem) -> None:
126
- prompt = [
127
- {
128
- "role": "user",
129
- "content": REDUNDANCY_MERGE_PROMPT.format(merged_text=memory.memory),
130
- },
131
- ]
132
- response = self.llm.generate(prompt)
133
- memory.memory = response.strip()
134
- self.graph_store.update_node(
135
- memory.id,
136
- {"memory": memory.memory, **memory.metadata.model_dump(exclude_none=True)},
137
- )
138
- logger.debug(f"Merged memory: {memory.memory}")
139
-
140
- def _resolve_in_graph(
141
- self,
142
- redundant_a: TextualMemoryItem,
143
- redundant_b: TextualMemoryItem,
144
- merged: TextualMemoryItem,
145
- ):
146
- edges_a = self.graph_store.get_edges(redundant_a.id, type="ANY", direction="ANY")
147
- edges_b = self.graph_store.get_edges(redundant_b.id, type="ANY", direction="ANY")
148
- all_edges = edges_a + edges_b
149
-
150
- self.graph_store.add_node(
151
- merged.id, merged.memory, merged.metadata.model_dump(exclude_none=True)
152
- )
153
-
154
- for edge in all_edges:
155
- new_from = (
156
- merged.id if edge["from"] in (redundant_a.id, redundant_b.id) else edge["from"]
157
- )
158
- new_to = merged.id if edge["to"] in (redundant_a.id, redundant_b.id) else edge["to"]
159
- if new_from == new_to:
160
- continue
161
- # Check if the edge already exists before adding
162
- if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
163
- self.graph_store.add_edge(new_from, new_to, edge["type"])
164
-
165
- self.graph_store.update_node(redundant_a.id, {"status": "archived"})
166
- self.graph_store.update_node(redundant_b.id, {"status": "archived"})
167
- self.graph_store.add_edge(redundant_a.id, merged.id, type="MERGED_TO")
168
- self.graph_store.add_edge(redundant_b.id, merged.id, type="MERGED_TO")
169
- logger.debug(
170
- f"Archive {redundant_a.id} and {redundant_b.id}, and inherit their edges to {merged.id}."
171
- )
172
-
173
- def _merge_metadata(
174
- self,
175
- memory: str,
176
- metadata_a: TreeNodeTextualMemoryMetadata,
177
- metadata_b: TreeNodeTextualMemoryMetadata,
178
- ) -> TreeNodeTextualMemoryMetadata:
179
- metadata_1 = metadata_a.model_dump()
180
- metadata_2 = metadata_b.model_dump()
181
- merged_metadata = {
182
- "sources": (metadata_1["sources"] or []) + (metadata_2["sources"] or []),
183
- "embedding": self.embedder.embed([memory])[0],
184
- "update_at": datetime.now().isoformat(),
185
- "created_at": datetime.now().isoformat(),
186
- }
187
- for key in metadata_1:
188
- if key in merged_metadata:
189
- continue
190
- merged_metadata[key] = (
191
- metadata_1[key] if metadata_1[key] is not None else metadata_2[key]
192
- )
193
- return TreeNodeTextualMemoryMetadata.model_validate(merged_metadata)