MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- memoryos-0.1.13.dist-info/METADATA +288 -0
- memoryos-0.1.13.dist-info/RECORD +122 -0
- memos/__init__.py +20 -1
- memos/api/start_api.py +420 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/factory.py +22 -0
- memos/chunkers/sentence_chunker.py +35 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +45 -0
- memos/configs/embedder.py +53 -0
- memos/configs/graph_db.py +45 -0
- memos/configs/internet_retriever.py +81 -0
- memos/configs/llm.py +71 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +89 -0
- memos/configs/mem_os.py +74 -0
- memos/configs/mem_reader.py +53 -0
- memos/configs/mem_scheduler.py +78 -0
- memos/configs/memory.py +195 -0
- memos/configs/parser.py +38 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +64 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/base.py +15 -0
- memos/embedders/factory.py +23 -0
- memos/embedders/ollama.py +74 -0
- memos/embedders/sentence_transformer.py +40 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +215 -0
- memos/graph_dbs/factory.py +21 -0
- memos/graph_dbs/neo4j.py +827 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +16 -0
- memos/llms/factory.py +25 -0
- memos/llms/hf.py +231 -0
- memos/llms/ollama.py +82 -0
- memos/llms/openai.py +34 -0
- memos/llms/utils.py +14 -0
- memos/log.py +78 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +29 -0
- memos/mem_cube/general.py +146 -0
- memos/mem_cube/utils.py +24 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +819 -0
- memos/mem_os/main.py +503 -0
- memos/mem_os/product.py +89 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +27 -0
- memos/mem_reader/factory.py +21 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/simple_struct.py +241 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/base_scheduler.py +164 -0
- memos/mem_scheduler/general_scheduler.py +305 -0
- memos/mem_scheduler/modules/__init__.py +0 -0
- memos/mem_scheduler/modules/base.py +74 -0
- memos/mem_scheduler/modules/dispatcher.py +103 -0
- memos/mem_scheduler/modules/monitor.py +82 -0
- memos/mem_scheduler/modules/redis_service.py +146 -0
- memos/mem_scheduler/modules/retriever.py +41 -0
- memos/mem_scheduler/modules/schemas.py +146 -0
- memos/mem_scheduler/scheduler_factory.py +21 -0
- memos/mem_scheduler/utils.py +26 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +232 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +34 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +89 -0
- memos/memories/textual/general.py +286 -0
- memos/memories/textual/item.py +167 -0
- memos/memories/textual/naive.py +185 -0
- memos/memories/textual/tree.py +321 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +19 -0
- memos/parsers/markitdown.py +22 -0
- memos/settings.py +8 -0
- memos/templates/__init__.py +0 -0
- memos/templates/mem_reader_prompts.py +98 -0
- memos/templates/mem_scheduler_prompts.py +65 -0
- memos/templates/mos_prompts.py +63 -0
- memos/types.py +55 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +105 -0
- memos/vec_dbs/factory.py +21 -0
- memos/vec_dbs/item.py +43 -0
- memos/vec_dbs/qdrant.py +292 -0
- memoryos-0.0.1.dist-info/METADATA +0 -53
- memoryos-0.0.1.dist-info/RECORD +0 -5
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from memos.llms.base import BaseLLM
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Memory:
|
|
8
|
+
"""Class representing the memory structure for storing and organizing memory content."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
user_id: str,
|
|
13
|
+
session_id: str,
|
|
14
|
+
created_at: datetime,
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the Memory structure.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
user_id: User identifier
|
|
21
|
+
session_id: Session identifier
|
|
22
|
+
created_at: Creation timestamp
|
|
23
|
+
"""
|
|
24
|
+
self.objective_memory: dict[str, dict[str, Any]] = {}
|
|
25
|
+
self.subjective_memory: dict[str, dict[str, Any]] = {}
|
|
26
|
+
self.scene_memory = {
|
|
27
|
+
"qa_pair": {
|
|
28
|
+
"section": [],
|
|
29
|
+
"info": {
|
|
30
|
+
"user_id": user_id,
|
|
31
|
+
"session_id": session_id,
|
|
32
|
+
"created_at": created_at,
|
|
33
|
+
"summary": "",
|
|
34
|
+
"label": [],
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
"document": {
|
|
38
|
+
"section": [],
|
|
39
|
+
"info": {
|
|
40
|
+
"user_id": user_id,
|
|
41
|
+
"session_id": session_id,
|
|
42
|
+
"created_at": created_at,
|
|
43
|
+
"doc_type": "", # pdf, txt, etc.
|
|
44
|
+
"doc_category": "", # research_paper, news, etc.
|
|
45
|
+
"doc_name": "",
|
|
46
|
+
"summary": "",
|
|
47
|
+
"label": [],
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict[str, Any]:
|
|
53
|
+
"""
|
|
54
|
+
Convert the Memory object to a dictionary.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Dictionary representation of the Memory object
|
|
58
|
+
"""
|
|
59
|
+
return {
|
|
60
|
+
"objective_memory": self.objective_memory,
|
|
61
|
+
"subjective_memory": self.subjective_memory,
|
|
62
|
+
"scene_memory": self.scene_memory,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def update_user_memory(
|
|
66
|
+
self,
|
|
67
|
+
memory_type: str,
|
|
68
|
+
key: str,
|
|
69
|
+
value: Any,
|
|
70
|
+
origin_data: str,
|
|
71
|
+
confidence_score: float = 1.0,
|
|
72
|
+
timestamp: str | None = None,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Update a memory item in either objective_memory or subjective_memory.
|
|
76
|
+
If a key already exists, the new memory item's info will replace the existing one,
|
|
77
|
+
and the values will be connected.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
memory_type: Type of memory to update ('objective' or 'subjective')
|
|
81
|
+
key: Key for the memory item. Must be one of:
|
|
82
|
+
|
|
83
|
+
| Memory Type | Key | Description |
|
|
84
|
+
|-------------------|----------------------|---------------------------------------------------------|
|
|
85
|
+
| objective_memory | nickname | User's preferred name or alias |
|
|
86
|
+
| objective_memory | gender | User's gender (male, female, other) |
|
|
87
|
+
| objective_memory | personality | User's personality traits or MBTI type |
|
|
88
|
+
| objective_memory | birth | User's birthdate or age information |
|
|
89
|
+
| objective_memory | education | User's educational background |
|
|
90
|
+
| objective_memory | work | User's professional history |
|
|
91
|
+
| objective_memory | achievement | User's notable accomplishments |
|
|
92
|
+
| objective_memory | occupation | User's current job or role |
|
|
93
|
+
| objective_memory | residence | User's home location or living situation |
|
|
94
|
+
| objective_memory | location | User's current geographical location |
|
|
95
|
+
| objective_memory | income | User's financial information |
|
|
96
|
+
| objective_memory | preference | User's likes and dislikes |
|
|
97
|
+
| objective_memory | expertise | User's skills and knowledge areas |
|
|
98
|
+
| objective_memory | language | User's language proficiency |
|
|
99
|
+
| objective_memory | hobby | User's recreational activities |
|
|
100
|
+
| objective_memory | goal | User's long-term aspirations |
|
|
101
|
+
|-------------------|----------------------|---------------------------------------------------------|
|
|
102
|
+
| subjective_memory | current_mood | User's current emotional state |
|
|
103
|
+
| subjective_memory | response_style | User's preferred interaction style |
|
|
104
|
+
| subjective_memory | language_style | User's language patterns and preferences |
|
|
105
|
+
| subjective_memory | information_density | User's preference for detail level in responses |
|
|
106
|
+
| subjective_memory | interaction_pace | User's preferred conversation speed and frequency |
|
|
107
|
+
| subjective_memory | followed_topic | Topics the user is currently interested in |
|
|
108
|
+
| subjective_memory | current_goal | User's immediate objectives in the conversation |
|
|
109
|
+
| subjective_memory | content_type | User's preferred field of interest (e.g., technology, finance, etc.) |
|
|
110
|
+
| subjective_memory | role_preference | User's preferred assistant role (e.g., domain expert, translation assistant, etc.) |
|
|
111
|
+
|
|
112
|
+
value: Value to store
|
|
113
|
+
origin_data: Original data that led to this memory
|
|
114
|
+
confidence_score: Confidence score (0.0 to 1.0)
|
|
115
|
+
timestamp: Timestamp string, if None current time will be used
|
|
116
|
+
"""
|
|
117
|
+
if timestamp is None:
|
|
118
|
+
timestamp = datetime.now()
|
|
119
|
+
|
|
120
|
+
memory_item = {
|
|
121
|
+
"value": value,
|
|
122
|
+
"info": {
|
|
123
|
+
"timestamp": timestamp,
|
|
124
|
+
"confidence_score": confidence_score,
|
|
125
|
+
"origin_data": origin_data,
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if memory_type == "objective":
|
|
130
|
+
memory_dict = self.objective_memory
|
|
131
|
+
elif memory_type == "subjective":
|
|
132
|
+
memory_dict = self.subjective_memory
|
|
133
|
+
else:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
f"Invalid memory_type: {memory_type}. Must be 'objective' or 'subjective'."
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Check if key already exists
|
|
139
|
+
if key in memory_dict:
|
|
140
|
+
existing_item = memory_dict[key]
|
|
141
|
+
|
|
142
|
+
# Connect the values (keep history but present as a connected string)
|
|
143
|
+
combined_value = f"{existing_item['value']} | {value}"
|
|
144
|
+
|
|
145
|
+
# Update the memory item with combined value and new info (using the newest info)
|
|
146
|
+
memory_dict[key] = {
|
|
147
|
+
"value": combined_value,
|
|
148
|
+
"info": memory_item["info"], # Use the new info
|
|
149
|
+
}
|
|
150
|
+
else:
|
|
151
|
+
# If key doesn't exist, simply add the new memory item
|
|
152
|
+
memory_dict[key] = memory_item
|
|
153
|
+
|
|
154
|
+
def add_qa_batch(
|
|
155
|
+
self, batch_summary: str, pair_summaries: list[dict], themes: list[str], order: int
|
|
156
|
+
) -> None:
|
|
157
|
+
"""
|
|
158
|
+
Add a batch of Q&A pairs to the scene memory as a single subsection.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
batch_summary: The summary of the entire batch
|
|
162
|
+
pair_summaries: List of dictionaries, each containing:
|
|
163
|
+
- question: The summarized question for a single pair
|
|
164
|
+
- summary: The original dialogue for a single pair
|
|
165
|
+
- prompt: The prompt used for summarization
|
|
166
|
+
- time: The extracted time information (if any)
|
|
167
|
+
themes: List of themes associated with the batch
|
|
168
|
+
order: Order of the batch in the sequence
|
|
169
|
+
"""
|
|
170
|
+
qa_subsection = {
|
|
171
|
+
"subsection": {},
|
|
172
|
+
"info": {
|
|
173
|
+
"summary": batch_summary,
|
|
174
|
+
"label": themes,
|
|
175
|
+
"origin_data": "",
|
|
176
|
+
"order": order,
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
for pair in pair_summaries:
|
|
181
|
+
qa_subsection["subsection"][pair["question"]] = {
|
|
182
|
+
"summary": pair["summary"],
|
|
183
|
+
"sources": pair["prompt"].split("\n\n", 1)[-1],
|
|
184
|
+
"time": pair.get("time", ""), # Add time field with default empty string
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
self.scene_memory["qa_pair"]["section"].append(qa_subsection)
|
|
188
|
+
|
|
189
|
+
def add_document_chunk_group(
|
|
190
|
+
self, summary: str, label: list[str], order: int, sub_chunks: list
|
|
191
|
+
) -> None:
|
|
192
|
+
"""
|
|
193
|
+
Add a group of document chunks as a single section with multiple facts in the subsection.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
summary: The summary of the large chunk
|
|
197
|
+
label: List of theme labels for the large chunk
|
|
198
|
+
order: Order of the large chunk in the sequence
|
|
199
|
+
sub_chunks: List of dictionaries containing small chunks information,
|
|
200
|
+
each with keys: 'question', 'chunk_text', 'prompt'
|
|
201
|
+
"""
|
|
202
|
+
doc_section = {
|
|
203
|
+
"subsection": {},
|
|
204
|
+
"info": {
|
|
205
|
+
"summary": summary,
|
|
206
|
+
"label": label,
|
|
207
|
+
"origin_data": "",
|
|
208
|
+
"order": order,
|
|
209
|
+
},
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
# Add each small chunk as a fact in the subsection
|
|
213
|
+
for sub_chunk in sub_chunks:
|
|
214
|
+
question = sub_chunk["question"]
|
|
215
|
+
doc_section["subsection"][question] = {
|
|
216
|
+
"summary": sub_chunk["chunk_text"],
|
|
217
|
+
"sources": sub_chunk["prompt"].split("\n\n", 1)[-1],
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
self.scene_memory["document"]["section"].append(doc_section)
|
|
221
|
+
|
|
222
|
+
def process_qa_pair_summaries(self, llm: BaseLLM | None = None) -> None:
|
|
223
|
+
"""
|
|
224
|
+
Process all qa_pair subsection summaries to generate a section summary.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
llm: Optional LLM instance to generate summary. If None, concatenates subsection summaries.
|
|
228
|
+
Returns:
|
|
229
|
+
The generated section summary
|
|
230
|
+
"""
|
|
231
|
+
all_summaries = []
|
|
232
|
+
all_labels = set()
|
|
233
|
+
|
|
234
|
+
# Collect all subsection summaries and labels
|
|
235
|
+
for section in self.scene_memory["qa_pair"]["section"]:
|
|
236
|
+
if "info" in section and "summary" in section["info"]:
|
|
237
|
+
all_summaries.append(section["info"]["summary"])
|
|
238
|
+
if "info" in section and "label" in section["info"]:
|
|
239
|
+
all_labels.update(section["info"]["label"])
|
|
240
|
+
|
|
241
|
+
# Generate summary
|
|
242
|
+
if llm is not None:
|
|
243
|
+
# Use LLM to generate a coherent summary
|
|
244
|
+
all_summaries_str = "\n".join(all_summaries)
|
|
245
|
+
messages = [
|
|
246
|
+
{
|
|
247
|
+
"role": "user",
|
|
248
|
+
"content": f"Summarize this text into a concise and objective sentence that captures its main idea. Provide only the required content directly, without including any additional information.\n\n{all_summaries_str}",
|
|
249
|
+
}
|
|
250
|
+
]
|
|
251
|
+
section_summary = llm.generate(messages)
|
|
252
|
+
else:
|
|
253
|
+
# Simple concatenation of summaries
|
|
254
|
+
section_summary = " ".join(all_summaries)
|
|
255
|
+
|
|
256
|
+
# Update the section info
|
|
257
|
+
self.scene_memory["qa_pair"]["info"]["summary"] = section_summary
|
|
258
|
+
self.scene_memory["qa_pair"]["info"]["label"] = list(all_labels)
|
|
259
|
+
|
|
260
|
+
def process_document_summaries(self, llm=None) -> str:
|
|
261
|
+
"""
|
|
262
|
+
Process all document subsection summaries to generate a section summary.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
llm: Optional LLM instance to generate summary. If None, concatenates subsection summaries.
|
|
266
|
+
Returns:
|
|
267
|
+
The generated section summary
|
|
268
|
+
"""
|
|
269
|
+
all_summaries = []
|
|
270
|
+
all_labels = set()
|
|
271
|
+
|
|
272
|
+
# Collect all subsection summaries and labels
|
|
273
|
+
for section in self.scene_memory["document"]["section"]:
|
|
274
|
+
if "info" in section and "summary" in section["info"]:
|
|
275
|
+
all_summaries.append(section["info"]["summary"])
|
|
276
|
+
if "info" in section and "label" in section["info"]:
|
|
277
|
+
all_labels.update(section["info"]["label"])
|
|
278
|
+
|
|
279
|
+
# Generate summary
|
|
280
|
+
if llm is not None:
|
|
281
|
+
# Use LLM to generate a coherent summary
|
|
282
|
+
all_summaries_str = "\n".join(all_summaries)
|
|
283
|
+
messages = [
|
|
284
|
+
{
|
|
285
|
+
"role": "user",
|
|
286
|
+
"content": f"Summarize this text into a concise and objective sentence that captures its main idea. Provide only the required content directly, without including any additional information.\n\n{all_summaries_str}",
|
|
287
|
+
}
|
|
288
|
+
]
|
|
289
|
+
section_summary = llm.generate(messages)
|
|
290
|
+
else:
|
|
291
|
+
# Simple concatenation of summaries
|
|
292
|
+
section_summary = " ".join(all_summaries)
|
|
293
|
+
|
|
294
|
+
# Update the section info
|
|
295
|
+
self.scene_memory["document"]["info"]["summary"] = section_summary
|
|
296
|
+
self.scene_memory["document"]["info"]["label"] = list(all_labels)
|
|
297
|
+
|
|
298
|
+
return section_summary
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import copy
|
|
3
|
+
import json
|
|
4
|
+
|
|
5
|
+
from abc import ABC
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from memos import log
|
|
9
|
+
from memos.chunkers import ChunkerFactory
|
|
10
|
+
from memos.configs.mem_reader import SimpleStructMemReaderConfig
|
|
11
|
+
from memos.configs.parser import ParserConfigFactory
|
|
12
|
+
from memos.embedders.factory import EmbedderFactory
|
|
13
|
+
from memos.llms.factory import LLMFactory
|
|
14
|
+
from memos.mem_reader.base import BaseMemReader
|
|
15
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
16
|
+
from memos.parsers.factory import ParserFactory
|
|
17
|
+
from memos.templates.mem_reader_prompts import (
|
|
18
|
+
SIMPLE_STRUCT_DOC_READER_PROMPT,
|
|
19
|
+
SIMPLE_STRUCT_MEM_READER_PROMPT,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
logger = log.get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SimpleStructMemReader(BaseMemReader, ABC):
|
|
27
|
+
"""Naive implementation of MemReader."""
|
|
28
|
+
|
|
29
|
+
def __init__(self, config: SimpleStructMemReaderConfig):
|
|
30
|
+
"""
|
|
31
|
+
Initialize the NaiveMemReader with configuration.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
config: Configuration object for the reader
|
|
35
|
+
"""
|
|
36
|
+
self.config = config
|
|
37
|
+
self.llm = LLMFactory.from_config(config.llm)
|
|
38
|
+
self.embedder = EmbedderFactory.from_config(config.embedder)
|
|
39
|
+
self.chunker = ChunkerFactory.from_config(config.chunker)
|
|
40
|
+
|
|
41
|
+
def _process_chat_data(self, scene_data_info, info):
|
|
42
|
+
prompt = (
|
|
43
|
+
SIMPLE_STRUCT_MEM_READER_PROMPT.replace("${user_a}", "user")
|
|
44
|
+
.replace("${user_b}", "assistant")
|
|
45
|
+
.replace("${conversation}", "\n".join(scene_data_info))
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
messages = [{"role": "user", "content": prompt}]
|
|
49
|
+
|
|
50
|
+
response_text = self.llm.generate(messages)
|
|
51
|
+
response_json = self.parse_json_result(response_text)
|
|
52
|
+
|
|
53
|
+
chat_read_nodes = []
|
|
54
|
+
for memory_i_raw in response_json.get("memory list", []):
|
|
55
|
+
node_i = TextualMemoryItem(
|
|
56
|
+
memory=memory_i_raw.get("value", ""),
|
|
57
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
58
|
+
user_id=info.get("user_id"),
|
|
59
|
+
session_id=info.get("session_id"),
|
|
60
|
+
memory_type=memory_i_raw.get("memory_type", ""),
|
|
61
|
+
status="activated",
|
|
62
|
+
tags=memory_i_raw.get("tags", ""),
|
|
63
|
+
key=memory_i_raw.get("key", ""),
|
|
64
|
+
embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
|
|
65
|
+
usage=[],
|
|
66
|
+
sources=scene_data_info,
|
|
67
|
+
background=response_json.get("summary", ""),
|
|
68
|
+
confidence=0.99,
|
|
69
|
+
type="fact",
|
|
70
|
+
),
|
|
71
|
+
)
|
|
72
|
+
chat_read_nodes.append(node_i)
|
|
73
|
+
|
|
74
|
+
return chat_read_nodes
|
|
75
|
+
|
|
76
|
+
def get_memory(
|
|
77
|
+
self, scene_data: list, type: str, info: dict[str, Any]
|
|
78
|
+
) -> list[list[TextualMemoryItem]]:
|
|
79
|
+
"""
|
|
80
|
+
Extract and classify memory content from scene_data.
|
|
81
|
+
For dictionaries: Use LLM to summarize pairs of Q&A
|
|
82
|
+
For file paths: Use chunker to split documents and LLM to summarize each chunk
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
scene_data: List of dialogue information or document paths
|
|
86
|
+
type: Type of scene_data: ['doc', 'chat']
|
|
87
|
+
info: Dictionary containing user_id and session_id.
|
|
88
|
+
Must be in format: {"user_id": "1111", "session_id": "2222"}
|
|
89
|
+
Optional parameters:
|
|
90
|
+
- topic_chunk_size: Size for large topic chunks (default: 1024)
|
|
91
|
+
- topic_chunk_overlap: Overlap for large topic chunks (default: 100)
|
|
92
|
+
- chunk_size: Size for small chunks (default: 256)
|
|
93
|
+
- chunk_overlap: Overlap for small chunks (default: 50)
|
|
94
|
+
Returns:
|
|
95
|
+
list[list[TextualMemoryItem]] containing memory content with summaries as keys and original text as values
|
|
96
|
+
Raises:
|
|
97
|
+
ValueError: If scene_data is empty or if info dictionary is missing required fields
|
|
98
|
+
"""
|
|
99
|
+
if not scene_data:
|
|
100
|
+
raise ValueError("scene_data is empty")
|
|
101
|
+
|
|
102
|
+
# Validate info dictionary format
|
|
103
|
+
if not isinstance(info, dict):
|
|
104
|
+
raise ValueError("info must be a dictionary")
|
|
105
|
+
|
|
106
|
+
required_fields = {"user_id", "session_id"}
|
|
107
|
+
missing_fields = required_fields - set(info.keys())
|
|
108
|
+
if missing_fields:
|
|
109
|
+
raise ValueError(f"info dictionary is missing required fields: {missing_fields}")
|
|
110
|
+
|
|
111
|
+
if not all(isinstance(info[field], str) for field in required_fields):
|
|
112
|
+
raise ValueError("user_id and session_id must be strings")
|
|
113
|
+
|
|
114
|
+
list_scene_data_info = self.get_scene_data_info(scene_data, type)
|
|
115
|
+
|
|
116
|
+
memory_list = []
|
|
117
|
+
|
|
118
|
+
if type == "chat":
|
|
119
|
+
processing_func = self._process_chat_data
|
|
120
|
+
elif type == "doc":
|
|
121
|
+
processing_func = self._process_doc_data
|
|
122
|
+
else:
|
|
123
|
+
processing_func = self._process_doc_data
|
|
124
|
+
|
|
125
|
+
# Process Q&A pairs concurrently
|
|
126
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
127
|
+
futures = [
|
|
128
|
+
executor.submit(processing_func, scene_data_info, info)
|
|
129
|
+
for scene_data_info in list_scene_data_info
|
|
130
|
+
]
|
|
131
|
+
for future in concurrent.futures.as_completed(futures):
|
|
132
|
+
res_memory = future.result()
|
|
133
|
+
memory_list.append(res_memory)
|
|
134
|
+
|
|
135
|
+
return memory_list
|
|
136
|
+
|
|
137
|
+
def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
|
|
138
|
+
"""
|
|
139
|
+
Get raw information from scene_data.
|
|
140
|
+
If scene_data contains dictionaries, convert them to strings.
|
|
141
|
+
If scene_data contains file paths, parse them using the parser.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
scene_data: List of dialogue information or document paths
|
|
145
|
+
type: Type of scene data: ['doc', 'chat']
|
|
146
|
+
Returns:
|
|
147
|
+
List of strings containing the processed scene data
|
|
148
|
+
"""
|
|
149
|
+
results = []
|
|
150
|
+
parser_config = ParserConfigFactory.model_validate(
|
|
151
|
+
{
|
|
152
|
+
"backend": "markitdown",
|
|
153
|
+
"config": {},
|
|
154
|
+
}
|
|
155
|
+
)
|
|
156
|
+
parser = ParserFactory.from_config(parser_config)
|
|
157
|
+
|
|
158
|
+
if type == "chat":
|
|
159
|
+
for items in scene_data:
|
|
160
|
+
result = []
|
|
161
|
+
for item in items:
|
|
162
|
+
# Convert dictionary to string
|
|
163
|
+
if "chat_time" in item:
|
|
164
|
+
mem = item["role"] + ": " + f"[{item['chat_time']}]: " + item["content"]
|
|
165
|
+
result.append(mem)
|
|
166
|
+
else:
|
|
167
|
+
mem = item["role"] + ":" + item["content"]
|
|
168
|
+
result.append(mem)
|
|
169
|
+
if len(result) >= 10:
|
|
170
|
+
results.append(result)
|
|
171
|
+
context = copy.deepcopy(result[-2:])
|
|
172
|
+
result = context
|
|
173
|
+
if result:
|
|
174
|
+
results.append(result)
|
|
175
|
+
elif type == "doc":
|
|
176
|
+
for item in scene_data:
|
|
177
|
+
try:
|
|
178
|
+
parsed_text = parser.parse(item)
|
|
179
|
+
results.append({"file": item, "text": parsed_text})
|
|
180
|
+
except Exception as e:
|
|
181
|
+
print(f"Error parsing file {item}: {e!s}")
|
|
182
|
+
|
|
183
|
+
return results
|
|
184
|
+
|
|
185
|
+
def _process_doc_data(self, scene_data_info, info):
|
|
186
|
+
chunks = self.chunker.chunk(scene_data_info["text"])
|
|
187
|
+
messages = [
|
|
188
|
+
[
|
|
189
|
+
{
|
|
190
|
+
"role": "user",
|
|
191
|
+
"content": SIMPLE_STRUCT_DOC_READER_PROMPT.replace("{chunk_text}", chunk.text),
|
|
192
|
+
}
|
|
193
|
+
]
|
|
194
|
+
for chunk in chunks
|
|
195
|
+
]
|
|
196
|
+
|
|
197
|
+
processed_chunks = []
|
|
198
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
|
199
|
+
futures = [executor.submit(self.llm.generate, message) for message in messages]
|
|
200
|
+
for future in concurrent.futures.as_completed(futures):
|
|
201
|
+
chunk_result = future.result()
|
|
202
|
+
if chunk_result:
|
|
203
|
+
processed_chunks.append(chunk_result)
|
|
204
|
+
|
|
205
|
+
processed_chunks = [self.parse_json_result(r) for r in processed_chunks]
|
|
206
|
+
doc_nodes = []
|
|
207
|
+
for i, chunk_res in enumerate(processed_chunks):
|
|
208
|
+
if chunk_res:
|
|
209
|
+
node_i = TextualMemoryItem(
|
|
210
|
+
memory=chunk_res["summary"],
|
|
211
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
212
|
+
user_id=info.get("user_id"),
|
|
213
|
+
session_id=info.get("session_id"),
|
|
214
|
+
memory_type="LongTermMemory",
|
|
215
|
+
status="activated",
|
|
216
|
+
tags=chunk_res["tags"],
|
|
217
|
+
key="",
|
|
218
|
+
embedding=self.embedder.embed([chunk_res["summary"]])[0],
|
|
219
|
+
usage=[],
|
|
220
|
+
sources=[f"{scene_data_info['file']}_{i}"],
|
|
221
|
+
background="",
|
|
222
|
+
confidence=0.99,
|
|
223
|
+
type="fact",
|
|
224
|
+
),
|
|
225
|
+
)
|
|
226
|
+
doc_nodes.append(node_i)
|
|
227
|
+
return doc_nodes
|
|
228
|
+
|
|
229
|
+
def parse_json_result(self, response_text):
|
|
230
|
+
try:
|
|
231
|
+
response_text = response_text.replace("```", "").replace("json", "")
|
|
232
|
+
response_json = json.loads(response_text)
|
|
233
|
+
return response_json
|
|
234
|
+
except json.JSONDecodeError as e:
|
|
235
|
+
logger.warning(
|
|
236
|
+
f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
|
|
237
|
+
)
|
|
238
|
+
return {}
|
|
239
|
+
|
|
240
|
+
def transform_memreader(self, data: dict) -> list[TextualMemoryItem]:
|
|
241
|
+
pass
|
|
File without changes
|