MemoryOS 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/METADATA +8 -2
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/RECORD +92 -69
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/WHEEL +1 -1
- memos/__init__.py +1 -1
- memos/api/client.py +109 -0
- memos/api/config.py +35 -8
- memos/api/context/dependencies.py +15 -66
- memos/api/middleware/request_context.py +63 -0
- memos/api/product_api.py +5 -2
- memos/api/product_models.py +107 -16
- memos/api/routers/product_router.py +62 -19
- memos/api/start_api.py +13 -0
- memos/configs/graph_db.py +4 -0
- memos/configs/mem_scheduler.py +38 -3
- memos/configs/memory.py +13 -0
- memos/configs/reranker.py +18 -0
- memos/context/context.py +255 -0
- memos/embedders/factory.py +2 -0
- memos/graph_dbs/base.py +4 -2
- memos/graph_dbs/nebular.py +368 -223
- memos/graph_dbs/neo4j.py +49 -13
- memos/graph_dbs/neo4j_community.py +13 -3
- memos/llms/factory.py +2 -0
- memos/llms/openai.py +74 -2
- memos/llms/vllm.py +2 -0
- memos/log.py +128 -4
- memos/mem_cube/general.py +3 -1
- memos/mem_os/core.py +89 -23
- memos/mem_os/main.py +3 -6
- memos/mem_os/product.py +418 -154
- memos/mem_os/utils/reference_utils.py +20 -0
- memos/mem_reader/factory.py +2 -0
- memos/mem_reader/simple_struct.py +204 -82
- memos/mem_scheduler/analyzer/__init__.py +0 -0
- memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +569 -0
- memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
- memos/mem_scheduler/base_scheduler.py +126 -56
- memos/mem_scheduler/general_modules/dispatcher.py +2 -2
- memos/mem_scheduler/general_modules/misc.py +99 -1
- memos/mem_scheduler/general_modules/scheduler_logger.py +17 -11
- memos/mem_scheduler/general_scheduler.py +40 -88
- memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
- memos/mem_scheduler/memory_manage_modules/memory_filter.py +308 -0
- memos/mem_scheduler/{general_modules → memory_manage_modules}/retriever.py +34 -7
- memos/mem_scheduler/monitors/dispatcher_monitor.py +9 -8
- memos/mem_scheduler/monitors/general_monitor.py +119 -39
- memos/mem_scheduler/optimized_scheduler.py +124 -0
- memos/mem_scheduler/orm_modules/__init__.py +0 -0
- memos/mem_scheduler/orm_modules/base_model.py +635 -0
- memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
- memos/mem_scheduler/scheduler_factory.py +2 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +96 -29
- memos/mem_scheduler/utils/config_utils.py +100 -0
- memos/mem_scheduler/utils/db_utils.py +33 -0
- memos/mem_scheduler/utils/filter_utils.py +1 -1
- memos/mem_scheduler/webservice_modules/__init__.py +0 -0
- memos/mem_user/mysql_user_manager.py +4 -2
- memos/memories/activation/kv.py +2 -1
- memos/memories/textual/item.py +96 -17
- memos/memories/textual/naive.py +1 -1
- memos/memories/textual/tree.py +57 -3
- memos/memories/textual/tree_text_memory/organize/handler.py +4 -2
- memos/memories/textual/tree_text_memory/organize/manager.py +28 -14
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +1 -2
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +75 -23
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +10 -6
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -2
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +119 -21
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +172 -44
- memos/memories/textual/tree_text_memory/retrieve/utils.py +6 -4
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +5 -4
- memos/memos_tools/notification_utils.py +46 -0
- memos/memos_tools/singleton.py +174 -0
- memos/memos_tools/thread_safe_dict.py +22 -0
- memos/memos_tools/thread_safe_dict_segment.py +382 -0
- memos/parsers/factory.py +2 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +24 -0
- memos/reranker/concat.py +59 -0
- memos/reranker/cosine_local.py +96 -0
- memos/reranker/factory.py +48 -0
- memos/reranker/http_bge.py +312 -0
- memos/reranker/noop.py +16 -0
- memos/templates/mem_reader_prompts.py +289 -40
- memos/templates/mem_scheduler_prompts.py +242 -0
- memos/templates/mos_prompts.py +133 -60
- memos/types.py +4 -1
- memos/api/context/context.py +0 -147
- memos/mem_scheduler/mos_for_test_scheduler.py +0 -146
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/entry_points.txt +0 -0
- {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info/licenses}/LICENSE +0 -0
- /memos/mem_scheduler/{general_modules → webservice_modules}/rabbitmq_service.py +0 -0
- /memos/mem_scheduler/{general_modules → webservice_modules}/redis_service.py +0 -0
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
from memos.memories.textual.item import (
|
|
2
|
+
TextualMemoryItem,
|
|
3
|
+
)
|
|
4
|
+
|
|
5
|
+
|
|
1
6
|
def split_continuous_references(text: str) -> str:
|
|
2
7
|
"""
|
|
3
8
|
Split continuous reference tags into individual reference tags.
|
|
@@ -131,3 +136,18 @@ def process_streaming_references_complete(text_buffer: str) -> tuple[str, str]:
|
|
|
131
136
|
# No reference-like patterns found, process all text
|
|
132
137
|
processed_text = split_continuous_references(text_buffer)
|
|
133
138
|
return processed_text, ""
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def prepare_reference_data(memories_list: list[TextualMemoryItem]) -> list[dict]:
|
|
142
|
+
# Prepare reference data
|
|
143
|
+
reference = []
|
|
144
|
+
for memories in memories_list:
|
|
145
|
+
memories_json = memories.model_dump()
|
|
146
|
+
memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
|
|
147
|
+
memories_json["metadata"]["embedding"] = []
|
|
148
|
+
memories_json["metadata"]["sources"] = []
|
|
149
|
+
memories_json["metadata"]["memory"] = memories.memory
|
|
150
|
+
memories_json["metadata"]["id"] = memories.id
|
|
151
|
+
reference.append({"metadata": memories_json["metadata"]})
|
|
152
|
+
|
|
153
|
+
return reference
|
memos/mem_reader/factory.py
CHANGED
|
@@ -3,6 +3,7 @@ from typing import Any, ClassVar
|
|
|
3
3
|
from memos.configs.mem_reader import MemReaderConfigFactory
|
|
4
4
|
from memos.mem_reader.base import BaseMemReader
|
|
5
5
|
from memos.mem_reader.simple_struct import SimpleStructMemReader
|
|
6
|
+
from memos.memos_tools.singleton import singleton_factory
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
class MemReaderFactory(BaseMemReader):
|
|
@@ -13,6 +14,7 @@ class MemReaderFactory(BaseMemReader):
|
|
|
13
14
|
}
|
|
14
15
|
|
|
15
16
|
@classmethod
|
|
17
|
+
@singleton_factory()
|
|
16
18
|
def from_config(cls, config_factory: MemReaderConfigFactory) -> BaseMemReader:
|
|
17
19
|
backend = config_factory.backend
|
|
18
20
|
if backend not in cls.backend_to_class:
|
|
@@ -1,14 +1,19 @@
|
|
|
1
1
|
import concurrent.futures
|
|
2
2
|
import copy
|
|
3
3
|
import json
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
4
6
|
|
|
5
7
|
from abc import ABC
|
|
6
8
|
from typing import Any
|
|
7
9
|
|
|
10
|
+
from tqdm import tqdm
|
|
11
|
+
|
|
8
12
|
from memos import log
|
|
9
13
|
from memos.chunkers import ChunkerFactory
|
|
10
14
|
from memos.configs.mem_reader import SimpleStructMemReaderConfig
|
|
11
15
|
from memos.configs.parser import ParserConfigFactory
|
|
16
|
+
from memos.context.context import ContextThreadPoolExecutor
|
|
12
17
|
from memos.embedders.factory import EmbedderFactory
|
|
13
18
|
from memos.llms.factory import LLMFactory
|
|
14
19
|
from memos.mem_reader.base import BaseMemReader
|
|
@@ -16,12 +21,95 @@ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemory
|
|
|
16
21
|
from memos.parsers.factory import ParserFactory
|
|
17
22
|
from memos.templates.mem_reader_prompts import (
|
|
18
23
|
SIMPLE_STRUCT_DOC_READER_PROMPT,
|
|
24
|
+
SIMPLE_STRUCT_DOC_READER_PROMPT_ZH,
|
|
19
25
|
SIMPLE_STRUCT_MEM_READER_EXAMPLE,
|
|
26
|
+
SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
|
|
20
27
|
SIMPLE_STRUCT_MEM_READER_PROMPT,
|
|
28
|
+
SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
|
|
21
29
|
)
|
|
30
|
+
from memos.utils import timed
|
|
22
31
|
|
|
23
32
|
|
|
24
33
|
logger = log.get_logger(__name__)
|
|
34
|
+
PROMPT_DICT = {
|
|
35
|
+
"chat": {
|
|
36
|
+
"en": SIMPLE_STRUCT_MEM_READER_PROMPT,
|
|
37
|
+
"zh": SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
|
|
38
|
+
"en_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE,
|
|
39
|
+
"zh_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
|
|
40
|
+
},
|
|
41
|
+
"doc": {"en": SIMPLE_STRUCT_DOC_READER_PROMPT, "zh": SIMPLE_STRUCT_DOC_READER_PROMPT_ZH},
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def detect_lang(text):
|
|
46
|
+
try:
|
|
47
|
+
if not text or not isinstance(text, str):
|
|
48
|
+
return "en"
|
|
49
|
+
chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
|
|
50
|
+
chinese_chars = re.findall(chinese_pattern, text)
|
|
51
|
+
if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3:
|
|
52
|
+
return "zh"
|
|
53
|
+
return "en"
|
|
54
|
+
except Exception:
|
|
55
|
+
return "en"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _build_node(idx, message, info, scene_file, llm, parse_json_result, embedder):
|
|
59
|
+
# generate
|
|
60
|
+
try:
|
|
61
|
+
raw = llm.generate(message)
|
|
62
|
+
if not raw:
|
|
63
|
+
logger.warning(f"[LLM] Empty generation for input: {message}")
|
|
64
|
+
return None
|
|
65
|
+
except Exception as e:
|
|
66
|
+
logger.error(f"[LLM] Exception during generation: {e}")
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
# parse_json_result
|
|
70
|
+
try:
|
|
71
|
+
chunk_res = parse_json_result(raw)
|
|
72
|
+
if not chunk_res:
|
|
73
|
+
logger.warning(f"[Parse] Failed to parse result: {raw}")
|
|
74
|
+
return None
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.error(f"[Parse] Exception during JSON parsing: {e}")
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
try:
|
|
80
|
+
value = chunk_res.get("value", "").strip()
|
|
81
|
+
if not value:
|
|
82
|
+
logger.warning("[BuildNode] value is empty")
|
|
83
|
+
return None
|
|
84
|
+
|
|
85
|
+
tags = chunk_res.get("tags", [])
|
|
86
|
+
if not isinstance(tags, list):
|
|
87
|
+
tags = []
|
|
88
|
+
|
|
89
|
+
key = chunk_res.get("key", None)
|
|
90
|
+
|
|
91
|
+
embedding = embedder.embed([value])[0]
|
|
92
|
+
|
|
93
|
+
return TextualMemoryItem(
|
|
94
|
+
memory=value,
|
|
95
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
96
|
+
user_id=info.get("user_id", ""),
|
|
97
|
+
session_id=info.get("session_id", ""),
|
|
98
|
+
memory_type="LongTermMemory",
|
|
99
|
+
status="activated",
|
|
100
|
+
tags=tags,
|
|
101
|
+
key=key,
|
|
102
|
+
embedding=embedding,
|
|
103
|
+
usage=[],
|
|
104
|
+
sources=[{"type": "doc", "doc_path": f"{scene_file}_{idx}"}],
|
|
105
|
+
background="",
|
|
106
|
+
confidence=0.99,
|
|
107
|
+
type="fact",
|
|
108
|
+
),
|
|
109
|
+
)
|
|
110
|
+
except Exception as e:
|
|
111
|
+
logger.error(f"[BuildNode] Error building node: {e}")
|
|
112
|
+
return None
|
|
25
113
|
|
|
26
114
|
|
|
27
115
|
class SimpleStructMemReader(BaseMemReader, ABC):
|
|
@@ -39,42 +127,77 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
39
127
|
self.embedder = EmbedderFactory.from_config(config.embedder)
|
|
40
128
|
self.chunker = ChunkerFactory.from_config(config.chunker)
|
|
41
129
|
|
|
130
|
+
@timed
|
|
42
131
|
def _process_chat_data(self, scene_data_info, info):
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
132
|
+
mem_list = []
|
|
133
|
+
for item in scene_data_info:
|
|
134
|
+
if "chat_time" in item:
|
|
135
|
+
mem = item["role"] + ": " + f"[{item['chat_time']}]: " + item["content"]
|
|
136
|
+
mem_list.append(mem)
|
|
137
|
+
else:
|
|
138
|
+
mem = item["role"] + ":" + item["content"]
|
|
139
|
+
mem_list.append(mem)
|
|
140
|
+
lang = detect_lang("\n".join(mem_list))
|
|
141
|
+
template = PROMPT_DICT["chat"][lang]
|
|
142
|
+
examples = PROMPT_DICT["chat"][f"{lang}_example"]
|
|
143
|
+
|
|
144
|
+
prompt = template.replace("${conversation}", "\n".join(mem_list))
|
|
46
145
|
if self.config.remove_prompt_example:
|
|
47
|
-
prompt = prompt.replace(
|
|
146
|
+
prompt = prompt.replace(examples, "")
|
|
48
147
|
|
|
49
148
|
messages = [{"role": "user", "content": prompt}]
|
|
50
149
|
|
|
51
|
-
|
|
52
|
-
|
|
150
|
+
try:
|
|
151
|
+
response_text = self.llm.generate(messages)
|
|
152
|
+
response_json = self.parse_json_result(response_text)
|
|
153
|
+
except Exception as e:
|
|
154
|
+
logger.error(f"[LLM] Exception during chat generation: {e}")
|
|
155
|
+
response_json = {
|
|
156
|
+
"memory list": [
|
|
157
|
+
{
|
|
158
|
+
"key": "\n".join(mem_list)[:10],
|
|
159
|
+
"memory_type": "UserMemory",
|
|
160
|
+
"value": "\n".join(mem_list),
|
|
161
|
+
"tags": [],
|
|
162
|
+
}
|
|
163
|
+
],
|
|
164
|
+
"summary": "\n".join(mem_list),
|
|
165
|
+
}
|
|
53
166
|
|
|
54
167
|
chat_read_nodes = []
|
|
55
168
|
for memory_i_raw in response_json.get("memory list", []):
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
user_id=info.get("user_id"),
|
|
60
|
-
session_id=info.get("session_id"),
|
|
61
|
-
memory_type=memory_i_raw.get("memory_type", "")
|
|
169
|
+
try:
|
|
170
|
+
memory_type = (
|
|
171
|
+
memory_i_raw.get("memory_type", "LongTermMemory")
|
|
62
172
|
.replace("长期记忆", "LongTermMemory")
|
|
63
|
-
.replace("用户记忆", "UserMemory")
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
173
|
+
.replace("用户记忆", "UserMemory")
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
if memory_type not in ["LongTermMemory", "UserMemory"]:
|
|
177
|
+
memory_type = "LongTermMemory"
|
|
178
|
+
|
|
179
|
+
node_i = TextualMemoryItem(
|
|
180
|
+
memory=memory_i_raw.get("value", ""),
|
|
181
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
182
|
+
user_id=info.get("user_id"),
|
|
183
|
+
session_id=info.get("session_id"),
|
|
184
|
+
memory_type=memory_type,
|
|
185
|
+
status="activated",
|
|
186
|
+
tags=memory_i_raw.get("tags", [])
|
|
187
|
+
if type(memory_i_raw.get("tags", [])) is list
|
|
188
|
+
else [],
|
|
189
|
+
key=memory_i_raw.get("key", ""),
|
|
190
|
+
embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
|
|
191
|
+
usage=[],
|
|
192
|
+
sources=scene_data_info,
|
|
193
|
+
background=response_json.get("summary", ""),
|
|
194
|
+
confidence=0.99,
|
|
195
|
+
type="fact",
|
|
196
|
+
),
|
|
197
|
+
)
|
|
198
|
+
chat_read_nodes.append(node_i)
|
|
199
|
+
except Exception as e:
|
|
200
|
+
logger.error(f"[ChatReader] Error parsing memory item: {e}")
|
|
78
201
|
|
|
79
202
|
return chat_read_nodes
|
|
80
203
|
|
|
@@ -127,8 +250,8 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
127
250
|
else:
|
|
128
251
|
processing_func = self._process_doc_data
|
|
129
252
|
|
|
130
|
-
# Process Q&A pairs concurrently
|
|
131
|
-
with
|
|
253
|
+
# Process Q&A pairs concurrently with context propagation
|
|
254
|
+
with ContextThreadPoolExecutor() as executor:
|
|
132
255
|
futures = [
|
|
133
256
|
executor.submit(processing_func, scene_data_info, info)
|
|
134
257
|
for scene_data_info in list_scene_data_info
|
|
@@ -166,11 +289,9 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
166
289
|
for item in items:
|
|
167
290
|
# Convert dictionary to string
|
|
168
291
|
if "chat_time" in item:
|
|
169
|
-
|
|
170
|
-
result.append(mem)
|
|
292
|
+
result.append(item)
|
|
171
293
|
else:
|
|
172
|
-
|
|
173
|
-
result.append(mem)
|
|
294
|
+
result.append(item)
|
|
174
295
|
if len(result) >= 10:
|
|
175
296
|
results.append(result)
|
|
176
297
|
context = copy.deepcopy(result[-2:])
|
|
@@ -180,59 +301,60 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
180
301
|
elif type == "doc":
|
|
181
302
|
for item in scene_data:
|
|
182
303
|
try:
|
|
183
|
-
if
|
|
184
|
-
|
|
185
|
-
|
|
304
|
+
if os.path.exists(item):
|
|
305
|
+
try:
|
|
306
|
+
parsed_text = parser.parse(item)
|
|
307
|
+
results.append({"file": item, "text": parsed_text})
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.error(f"[SceneParser] Error parsing {item}: {e}")
|
|
310
|
+
continue
|
|
186
311
|
else:
|
|
187
312
|
parsed_text = item
|
|
188
|
-
results.append({"file":
|
|
313
|
+
results.append({"file": "pure_text", "text": parsed_text})
|
|
189
314
|
except Exception as e:
|
|
190
315
|
print(f"Error parsing file {item}: {e!s}")
|
|
191
316
|
|
|
192
317
|
return results
|
|
193
318
|
|
|
194
|
-
def _process_doc_data(self, scene_data_info, info):
|
|
319
|
+
def _process_doc_data(self, scene_data_info, info, **kwargs):
|
|
195
320
|
chunks = self.chunker.chunk(scene_data_info["text"])
|
|
196
|
-
messages = [
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
for chunk in chunks
|
|
204
|
-
]
|
|
321
|
+
messages = []
|
|
322
|
+
for chunk in chunks:
|
|
323
|
+
lang = detect_lang(chunk.text)
|
|
324
|
+
template = PROMPT_DICT["doc"][lang]
|
|
325
|
+
prompt = template.replace("{chunk_text}", chunk.text)
|
|
326
|
+
message = [{"role": "user", "content": prompt}]
|
|
327
|
+
messages.append(message)
|
|
205
328
|
|
|
206
|
-
processed_chunks = []
|
|
207
|
-
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
|
208
|
-
futures = [executor.submit(self.llm.generate, message) for message in messages]
|
|
209
|
-
for future in concurrent.futures.as_completed(futures):
|
|
210
|
-
chunk_result = future.result()
|
|
211
|
-
if chunk_result:
|
|
212
|
-
processed_chunks.append(chunk_result)
|
|
213
|
-
|
|
214
|
-
processed_chunks = [self.parse_json_result(r) for r in processed_chunks]
|
|
215
329
|
doc_nodes = []
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
330
|
+
scene_file = scene_data_info["file"]
|
|
331
|
+
|
|
332
|
+
with ContextThreadPoolExecutor(max_workers=50) as executor:
|
|
333
|
+
futures = {
|
|
334
|
+
executor.submit(
|
|
335
|
+
_build_node,
|
|
336
|
+
idx,
|
|
337
|
+
msg,
|
|
338
|
+
info,
|
|
339
|
+
scene_file,
|
|
340
|
+
self.llm,
|
|
341
|
+
self.parse_json_result,
|
|
342
|
+
self.embedder,
|
|
343
|
+
): idx
|
|
344
|
+
for idx, msg in enumerate(messages)
|
|
345
|
+
}
|
|
346
|
+
total = len(futures)
|
|
347
|
+
|
|
348
|
+
for future in tqdm(
|
|
349
|
+
concurrent.futures.as_completed(futures), total=total, desc="Processing"
|
|
350
|
+
):
|
|
351
|
+
try:
|
|
352
|
+
node = future.result()
|
|
353
|
+
if node:
|
|
354
|
+
doc_nodes.append(node)
|
|
355
|
+
except Exception as e:
|
|
356
|
+
tqdm.write(f"[ERROR] {e}")
|
|
357
|
+
logger.error(f"[DocReader] Future task failed: {e}")
|
|
236
358
|
return doc_nodes
|
|
237
359
|
|
|
238
360
|
def parse_json_result(self, response_text):
|
|
@@ -240,14 +362,14 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
240
362
|
json_start = response_text.find("{")
|
|
241
363
|
response_text = response_text[json_start:]
|
|
242
364
|
response_text = response_text.replace("```", "").strip()
|
|
243
|
-
if response_text
|
|
365
|
+
if not response_text.endswith("}"):
|
|
244
366
|
response_text += "}"
|
|
245
|
-
|
|
246
|
-
return response_json
|
|
367
|
+
return json.loads(response_text)
|
|
247
368
|
except json.JSONDecodeError as e:
|
|
248
|
-
logger.
|
|
249
|
-
|
|
250
|
-
|
|
369
|
+
logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}")
|
|
370
|
+
return {}
|
|
371
|
+
except Exception as e:
|
|
372
|
+
logger.error(f"[JSONParse] Unexpected error: {e}")
|
|
251
373
|
return {}
|
|
252
374
|
|
|
253
375
|
def transform_memreader(self, data: dict) -> list[TextualMemoryItem]:
|
|
File without changes
|