MemoryOS 0.0.1__py3-none-any.whl → 0.1.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- memoryos-0.1.12.dist-info/METADATA +257 -0
- memoryos-0.1.12.dist-info/RECORD +117 -0
- memos/__init__.py +20 -1
- memos/api/start_api.py +420 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/factory.py +22 -0
- memos/chunkers/sentence_chunker.py +35 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +45 -0
- memos/configs/embedder.py +53 -0
- memos/configs/graph_db.py +45 -0
- memos/configs/llm.py +71 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +89 -0
- memos/configs/mem_os.py +70 -0
- memos/configs/mem_reader.py +53 -0
- memos/configs/mem_scheduler.py +78 -0
- memos/configs/memory.py +190 -0
- memos/configs/parser.py +38 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +64 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/base.py +15 -0
- memos/embedders/factory.py +23 -0
- memos/embedders/ollama.py +74 -0
- memos/embedders/sentence_transformer.py +40 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +215 -0
- memos/graph_dbs/factory.py +21 -0
- memos/graph_dbs/neo4j.py +827 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +16 -0
- memos/llms/factory.py +25 -0
- memos/llms/hf.py +231 -0
- memos/llms/ollama.py +82 -0
- memos/llms/openai.py +34 -0
- memos/llms/utils.py +14 -0
- memos/log.py +78 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +29 -0
- memos/mem_cube/general.py +146 -0
- memos/mem_cube/utils.py +24 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +819 -0
- memos/mem_os/main.py +12 -0
- memos/mem_os/product.py +89 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +27 -0
- memos/mem_reader/factory.py +21 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/simple_struct.py +241 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/base_scheduler.py +164 -0
- memos/mem_scheduler/general_scheduler.py +305 -0
- memos/mem_scheduler/modules/__init__.py +0 -0
- memos/mem_scheduler/modules/base.py +74 -0
- memos/mem_scheduler/modules/dispatcher.py +103 -0
- memos/mem_scheduler/modules/monitor.py +82 -0
- memos/mem_scheduler/modules/redis_service.py +146 -0
- memos/mem_scheduler/modules/retriever.py +41 -0
- memos/mem_scheduler/modules/schemas.py +146 -0
- memos/mem_scheduler/scheduler_factory.py +21 -0
- memos/mem_scheduler/utils.py +26 -0
- memos/mem_user/user_manager.py +478 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +232 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +34 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +89 -0
- memos/memories/textual/general.py +286 -0
- memos/memories/textual/item.py +167 -0
- memos/memories/textual/naive.py +185 -0
- memos/memories/textual/tree.py +289 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +64 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +166 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +19 -0
- memos/parsers/markitdown.py +22 -0
- memos/settings.py +8 -0
- memos/templates/__init__.py +0 -0
- memos/templates/mem_reader_prompts.py +98 -0
- memos/templates/mem_scheduler_prompts.py +65 -0
- memos/types.py +55 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +105 -0
- memos/vec_dbs/factory.py +21 -0
- memos/vec_dbs/item.py +43 -0
- memos/vec_dbs/qdrant.py +292 -0
- memoryos-0.0.1.dist-info/METADATA +0 -53
- memoryos-0.0.1.dist-info/RECORD +0 -5
- {memoryos-0.0.1.dist-info → memoryos-0.1.12.dist-info}/LICENSE +0 -0
- {memoryos-0.0.1.dist-info → memoryos-0.1.12.dist-info}/WHEEL +0 -0
memos/mem_os/main.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
from memos.configs.mem_os import MOSConfig
|
|
2
|
+
from memos.mem_os.core import MOSCore
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
class MOS(MOSCore):
|
|
6
|
+
"""
|
|
7
|
+
The MOS (Memory Operating System) class inherits from MOSCore.
|
|
8
|
+
This class maintains backward compatibility with the original MOS interface.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self, config: MOSConfig):
|
|
12
|
+
super().__init__(config)
|
memos/mem_os/product.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from collections.abc import Generator
|
|
4
|
+
from typing import Literal
|
|
5
|
+
|
|
6
|
+
from memos.configs.mem_os import MOSConfig
|
|
7
|
+
from memos.mem_os.core import MOSCore
|
|
8
|
+
from memos.memories.activation.item import ActivationMemoryItem
|
|
9
|
+
from memos.memories.parametric.item import ParametricMemoryItem
|
|
10
|
+
from memos.memories.textual.item import TextualMemoryMetadata, TreeNodeTextualMemoryMetadata
|
|
11
|
+
from memos.types import MessageList
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MOSProduct(MOSCore):
|
|
15
|
+
"""
|
|
16
|
+
The MOSProduct class inherits from MOSCore mainly for product usage.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, config: MOSConfig):
|
|
20
|
+
super().__init__(config)
|
|
21
|
+
|
|
22
|
+
def get_suggestion_query(self, user_id: str) -> list[str]:
|
|
23
|
+
"""Get suggestion query from LLM.
|
|
24
|
+
Args:
|
|
25
|
+
user_id (str, optional): Custom user ID.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
list[str]: The suggestion query list.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def chat(
|
|
32
|
+
self,
|
|
33
|
+
query: str,
|
|
34
|
+
user_id: str,
|
|
35
|
+
cube_id: str | None = None,
|
|
36
|
+
history: MessageList | None = None,
|
|
37
|
+
) -> Generator[str, None, None]:
|
|
38
|
+
"""Chat with LLM SSE Type.
|
|
39
|
+
Args:
|
|
40
|
+
query (str): Query string.
|
|
41
|
+
user_id (str, optional): Custom user ID.
|
|
42
|
+
cube_id (str, optional): Custom cube ID for user.
|
|
43
|
+
history (list[dict], optional): Chat history.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
Generator[str, None, None]: The response string generator.
|
|
47
|
+
"""
|
|
48
|
+
memories_list = self.search(query)["act_mem"]
|
|
49
|
+
content_list = []
|
|
50
|
+
for memory in memories_list:
|
|
51
|
+
content_list.append(memory.content)
|
|
52
|
+
yield f"data: {json.dumps({'type': 'metadata', 'content': content_list})}\n\n"
|
|
53
|
+
llm_response = super().chat(query, user_id)
|
|
54
|
+
for chunk in llm_response:
|
|
55
|
+
chunk_data: str = f"data: {json.dumps({'type': 'text', 'content': chunk})}\n\n"
|
|
56
|
+
yield chunk_data
|
|
57
|
+
reference = [{"id": "1234"}]
|
|
58
|
+
yield f"data: {json.dumps({'type': 'reference', 'content': reference})}\n\n"
|
|
59
|
+
yield f"data: {json.dumps({'type': 'end'})}\n\n"
|
|
60
|
+
|
|
61
|
+
def get_all(
|
|
62
|
+
self,
|
|
63
|
+
user_id: str,
|
|
64
|
+
memory_type: Literal["text_mem", "act_mem", "param_mem"],
|
|
65
|
+
cube_id: str | None = None,
|
|
66
|
+
) -> list[
|
|
67
|
+
dict[
|
|
68
|
+
str,
|
|
69
|
+
str
|
|
70
|
+
| list[
|
|
71
|
+
TextualMemoryMetadata
|
|
72
|
+
| TreeNodeTextualMemoryMetadata
|
|
73
|
+
| ActivationMemoryItem
|
|
74
|
+
| ParametricMemoryItem
|
|
75
|
+
],
|
|
76
|
+
]
|
|
77
|
+
]:
|
|
78
|
+
"""Get all memory items for a user.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
user_id (str): The ID of the user.
|
|
82
|
+
cube_id (str | None, optional): The ID of the cube. Defaults to None.
|
|
83
|
+
memory_type (Literal["text_mem", "act_mem", "param_mem"]): The type of memory to get.
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
list[TextualMemoryMetadata | TreeNodeTextualMemoryMetadata | ActivationMemoryItem | ParametricMemoryItem]: A list of memory items.
|
|
87
|
+
"""
|
|
88
|
+
memory_list = super().get_all(user_id, cube_id)[memory_type]
|
|
89
|
+
return memory_list
|
|
File without changes
|
memos/mem_reader/base.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from memos.configs.mem_reader import BaseMemReaderConfig
|
|
5
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class BaseMemReader(ABC):
|
|
9
|
+
"""MemReader interface class for reading information."""
|
|
10
|
+
|
|
11
|
+
@abstractmethod
|
|
12
|
+
def __init__(self, config: BaseMemReaderConfig):
|
|
13
|
+
"""Initialize the MemReader with the given configuration."""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
|
|
17
|
+
"""Get raw information related to the current scene."""
|
|
18
|
+
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def get_memory(
|
|
21
|
+
self, scene_data: list, type: str, info: dict[str, Any]
|
|
22
|
+
) -> list[list[TextualMemoryItem]]:
|
|
23
|
+
"""Various types of memories extracted from scene_data"""
|
|
24
|
+
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def transform_memreader(self, data: dict) -> list[TextualMemoryItem]:
|
|
27
|
+
"""Transform the memory data into a list of TextualMemoryItem objects."""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from typing import Any, ClassVar
|
|
2
|
+
|
|
3
|
+
from memos.configs.mem_reader import MemReaderConfigFactory
|
|
4
|
+
from memos.mem_reader.base import BaseMemReader
|
|
5
|
+
from memos.mem_reader.simple_struct import SimpleStructMemReader
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class MemReaderFactory(BaseMemReader):
|
|
9
|
+
"""Factory class for creating MemReader instances."""
|
|
10
|
+
|
|
11
|
+
backend_to_class: ClassVar[dict[str, Any]] = {
|
|
12
|
+
"simple_struct": SimpleStructMemReader,
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
@classmethod
|
|
16
|
+
def from_config(cls, config_factory: MemReaderConfigFactory) -> BaseMemReader:
|
|
17
|
+
backend = config_factory.backend
|
|
18
|
+
if backend not in cls.backend_to_class:
|
|
19
|
+
raise ValueError(f"Invalid backend: {backend}")
|
|
20
|
+
reader_class = cls.backend_to_class[backend]
|
|
21
|
+
return reader_class(config_factory.config)
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from typing import Any
|
|
3
|
+
|
|
4
|
+
from memos.llms.base import BaseLLM
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Memory:
|
|
8
|
+
"""Class representing the memory structure for storing and organizing memory content."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
user_id: str,
|
|
13
|
+
session_id: str,
|
|
14
|
+
created_at: datetime,
|
|
15
|
+
):
|
|
16
|
+
"""
|
|
17
|
+
Initialize the Memory structure.
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
user_id: User identifier
|
|
21
|
+
session_id: Session identifier
|
|
22
|
+
created_at: Creation timestamp
|
|
23
|
+
"""
|
|
24
|
+
self.objective_memory: dict[str, dict[str, Any]] = {}
|
|
25
|
+
self.subjective_memory: dict[str, dict[str, Any]] = {}
|
|
26
|
+
self.scene_memory = {
|
|
27
|
+
"qa_pair": {
|
|
28
|
+
"section": [],
|
|
29
|
+
"info": {
|
|
30
|
+
"user_id": user_id,
|
|
31
|
+
"session_id": session_id,
|
|
32
|
+
"created_at": created_at,
|
|
33
|
+
"summary": "",
|
|
34
|
+
"label": [],
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
"document": {
|
|
38
|
+
"section": [],
|
|
39
|
+
"info": {
|
|
40
|
+
"user_id": user_id,
|
|
41
|
+
"session_id": session_id,
|
|
42
|
+
"created_at": created_at,
|
|
43
|
+
"doc_type": "", # pdf, txt, etc.
|
|
44
|
+
"doc_category": "", # research_paper, news, etc.
|
|
45
|
+
"doc_name": "",
|
|
46
|
+
"summary": "",
|
|
47
|
+
"label": [],
|
|
48
|
+
},
|
|
49
|
+
},
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
def to_dict(self) -> dict[str, Any]:
|
|
53
|
+
"""
|
|
54
|
+
Convert the Memory object to a dictionary.
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Dictionary representation of the Memory object
|
|
58
|
+
"""
|
|
59
|
+
return {
|
|
60
|
+
"objective_memory": self.objective_memory,
|
|
61
|
+
"subjective_memory": self.subjective_memory,
|
|
62
|
+
"scene_memory": self.scene_memory,
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
def update_user_memory(
|
|
66
|
+
self,
|
|
67
|
+
memory_type: str,
|
|
68
|
+
key: str,
|
|
69
|
+
value: Any,
|
|
70
|
+
origin_data: str,
|
|
71
|
+
confidence_score: float = 1.0,
|
|
72
|
+
timestamp: str | None = None,
|
|
73
|
+
) -> None:
|
|
74
|
+
"""
|
|
75
|
+
Update a memory item in either objective_memory or subjective_memory.
|
|
76
|
+
If a key already exists, the new memory item's info will replace the existing one,
|
|
77
|
+
and the values will be connected.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
memory_type: Type of memory to update ('objective' or 'subjective')
|
|
81
|
+
key: Key for the memory item. Must be one of:
|
|
82
|
+
|
|
83
|
+
| Memory Type | Key | Description |
|
|
84
|
+
|-------------------|----------------------|---------------------------------------------------------|
|
|
85
|
+
| objective_memory | nickname | User's preferred name or alias |
|
|
86
|
+
| objective_memory | gender | User's gender (male, female, other) |
|
|
87
|
+
| objective_memory | personality | User's personality traits or MBTI type |
|
|
88
|
+
| objective_memory | birth | User's birthdate or age information |
|
|
89
|
+
| objective_memory | education | User's educational background |
|
|
90
|
+
| objective_memory | work | User's professional history |
|
|
91
|
+
| objective_memory | achievement | User's notable accomplishments |
|
|
92
|
+
| objective_memory | occupation | User's current job or role |
|
|
93
|
+
| objective_memory | residence | User's home location or living situation |
|
|
94
|
+
| objective_memory | location | User's current geographical location |
|
|
95
|
+
| objective_memory | income | User's financial information |
|
|
96
|
+
| objective_memory | preference | User's likes and dislikes |
|
|
97
|
+
| objective_memory | expertise | User's skills and knowledge areas |
|
|
98
|
+
| objective_memory | language | User's language proficiency |
|
|
99
|
+
| objective_memory | hobby | User's recreational activities |
|
|
100
|
+
| objective_memory | goal | User's long-term aspirations |
|
|
101
|
+
|-------------------|----------------------|---------------------------------------------------------|
|
|
102
|
+
| subjective_memory | current_mood | User's current emotional state |
|
|
103
|
+
| subjective_memory | response_style | User's preferred interaction style |
|
|
104
|
+
| subjective_memory | language_style | User's language patterns and preferences |
|
|
105
|
+
| subjective_memory | information_density | User's preference for detail level in responses |
|
|
106
|
+
| subjective_memory | interaction_pace | User's preferred conversation speed and frequency |
|
|
107
|
+
| subjective_memory | followed_topic | Topics the user is currently interested in |
|
|
108
|
+
| subjective_memory | current_goal | User's immediate objectives in the conversation |
|
|
109
|
+
| subjective_memory | content_type | User's preferred field of interest (e.g., technology, finance, etc.) |
|
|
110
|
+
| subjective_memory | role_preference | User's preferred assistant role (e.g., domain expert, translation assistant, etc.) |
|
|
111
|
+
|
|
112
|
+
value: Value to store
|
|
113
|
+
origin_data: Original data that led to this memory
|
|
114
|
+
confidence_score: Confidence score (0.0 to 1.0)
|
|
115
|
+
timestamp: Timestamp string, if None current time will be used
|
|
116
|
+
"""
|
|
117
|
+
if timestamp is None:
|
|
118
|
+
timestamp = datetime.now()
|
|
119
|
+
|
|
120
|
+
memory_item = {
|
|
121
|
+
"value": value,
|
|
122
|
+
"info": {
|
|
123
|
+
"timestamp": timestamp,
|
|
124
|
+
"confidence_score": confidence_score,
|
|
125
|
+
"origin_data": origin_data,
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if memory_type == "objective":
|
|
130
|
+
memory_dict = self.objective_memory
|
|
131
|
+
elif memory_type == "subjective":
|
|
132
|
+
memory_dict = self.subjective_memory
|
|
133
|
+
else:
|
|
134
|
+
raise ValueError(
|
|
135
|
+
f"Invalid memory_type: {memory_type}. Must be 'objective' or 'subjective'."
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
# Check if key already exists
|
|
139
|
+
if key in memory_dict:
|
|
140
|
+
existing_item = memory_dict[key]
|
|
141
|
+
|
|
142
|
+
# Connect the values (keep history but present as a connected string)
|
|
143
|
+
combined_value = f"{existing_item['value']} | {value}"
|
|
144
|
+
|
|
145
|
+
# Update the memory item with combined value and new info (using the newest info)
|
|
146
|
+
memory_dict[key] = {
|
|
147
|
+
"value": combined_value,
|
|
148
|
+
"info": memory_item["info"], # Use the new info
|
|
149
|
+
}
|
|
150
|
+
else:
|
|
151
|
+
# If key doesn't exist, simply add the new memory item
|
|
152
|
+
memory_dict[key] = memory_item
|
|
153
|
+
|
|
154
|
+
def add_qa_batch(
|
|
155
|
+
self, batch_summary: str, pair_summaries: list[dict], themes: list[str], order: int
|
|
156
|
+
) -> None:
|
|
157
|
+
"""
|
|
158
|
+
Add a batch of Q&A pairs to the scene memory as a single subsection.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
batch_summary: The summary of the entire batch
|
|
162
|
+
pair_summaries: List of dictionaries, each containing:
|
|
163
|
+
- question: The summarized question for a single pair
|
|
164
|
+
- summary: The original dialogue for a single pair
|
|
165
|
+
- prompt: The prompt used for summarization
|
|
166
|
+
- time: The extracted time information (if any)
|
|
167
|
+
themes: List of themes associated with the batch
|
|
168
|
+
order: Order of the batch in the sequence
|
|
169
|
+
"""
|
|
170
|
+
qa_subsection = {
|
|
171
|
+
"subsection": {},
|
|
172
|
+
"info": {
|
|
173
|
+
"summary": batch_summary,
|
|
174
|
+
"label": themes,
|
|
175
|
+
"origin_data": "",
|
|
176
|
+
"order": order,
|
|
177
|
+
},
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
for pair in pair_summaries:
|
|
181
|
+
qa_subsection["subsection"][pair["question"]] = {
|
|
182
|
+
"summary": pair["summary"],
|
|
183
|
+
"sources": pair["prompt"].split("\n\n", 1)[-1],
|
|
184
|
+
"time": pair.get("time", ""), # Add time field with default empty string
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
self.scene_memory["qa_pair"]["section"].append(qa_subsection)
|
|
188
|
+
|
|
189
|
+
def add_document_chunk_group(
|
|
190
|
+
self, summary: str, label: list[str], order: int, sub_chunks: list
|
|
191
|
+
) -> None:
|
|
192
|
+
"""
|
|
193
|
+
Add a group of document chunks as a single section with multiple facts in the subsection.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
summary: The summary of the large chunk
|
|
197
|
+
label: List of theme labels for the large chunk
|
|
198
|
+
order: Order of the large chunk in the sequence
|
|
199
|
+
sub_chunks: List of dictionaries containing small chunks information,
|
|
200
|
+
each with keys: 'question', 'chunk_text', 'prompt'
|
|
201
|
+
"""
|
|
202
|
+
doc_section = {
|
|
203
|
+
"subsection": {},
|
|
204
|
+
"info": {
|
|
205
|
+
"summary": summary,
|
|
206
|
+
"label": label,
|
|
207
|
+
"origin_data": "",
|
|
208
|
+
"order": order,
|
|
209
|
+
},
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
# Add each small chunk as a fact in the subsection
|
|
213
|
+
for sub_chunk in sub_chunks:
|
|
214
|
+
question = sub_chunk["question"]
|
|
215
|
+
doc_section["subsection"][question] = {
|
|
216
|
+
"summary": sub_chunk["chunk_text"],
|
|
217
|
+
"sources": sub_chunk["prompt"].split("\n\n", 1)[-1],
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
self.scene_memory["document"]["section"].append(doc_section)
|
|
221
|
+
|
|
222
|
+
def process_qa_pair_summaries(self, llm: BaseLLM | None = None) -> None:
|
|
223
|
+
"""
|
|
224
|
+
Process all qa_pair subsection summaries to generate a section summary.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
llm: Optional LLM instance to generate summary. If None, concatenates subsection summaries.
|
|
228
|
+
Returns:
|
|
229
|
+
The generated section summary
|
|
230
|
+
"""
|
|
231
|
+
all_summaries = []
|
|
232
|
+
all_labels = set()
|
|
233
|
+
|
|
234
|
+
# Collect all subsection summaries and labels
|
|
235
|
+
for section in self.scene_memory["qa_pair"]["section"]:
|
|
236
|
+
if "info" in section and "summary" in section["info"]:
|
|
237
|
+
all_summaries.append(section["info"]["summary"])
|
|
238
|
+
if "info" in section and "label" in section["info"]:
|
|
239
|
+
all_labels.update(section["info"]["label"])
|
|
240
|
+
|
|
241
|
+
# Generate summary
|
|
242
|
+
if llm is not None:
|
|
243
|
+
# Use LLM to generate a coherent summary
|
|
244
|
+
all_summaries_str = "\n".join(all_summaries)
|
|
245
|
+
messages = [
|
|
246
|
+
{
|
|
247
|
+
"role": "user",
|
|
248
|
+
"content": f"Summarize this text into a concise and objective sentence that captures its main idea. Provide only the required content directly, without including any additional information.\n\n{all_summaries_str}",
|
|
249
|
+
}
|
|
250
|
+
]
|
|
251
|
+
section_summary = llm.generate(messages)
|
|
252
|
+
else:
|
|
253
|
+
# Simple concatenation of summaries
|
|
254
|
+
section_summary = " ".join(all_summaries)
|
|
255
|
+
|
|
256
|
+
# Update the section info
|
|
257
|
+
self.scene_memory["qa_pair"]["info"]["summary"] = section_summary
|
|
258
|
+
self.scene_memory["qa_pair"]["info"]["label"] = list(all_labels)
|
|
259
|
+
|
|
260
|
+
def process_document_summaries(self, llm=None) -> str:
|
|
261
|
+
"""
|
|
262
|
+
Process all document subsection summaries to generate a section summary.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
llm: Optional LLM instance to generate summary. If None, concatenates subsection summaries.
|
|
266
|
+
Returns:
|
|
267
|
+
The generated section summary
|
|
268
|
+
"""
|
|
269
|
+
all_summaries = []
|
|
270
|
+
all_labels = set()
|
|
271
|
+
|
|
272
|
+
# Collect all subsection summaries and labels
|
|
273
|
+
for section in self.scene_memory["document"]["section"]:
|
|
274
|
+
if "info" in section and "summary" in section["info"]:
|
|
275
|
+
all_summaries.append(section["info"]["summary"])
|
|
276
|
+
if "info" in section and "label" in section["info"]:
|
|
277
|
+
all_labels.update(section["info"]["label"])
|
|
278
|
+
|
|
279
|
+
# Generate summary
|
|
280
|
+
if llm is not None:
|
|
281
|
+
# Use LLM to generate a coherent summary
|
|
282
|
+
all_summaries_str = "\n".join(all_summaries)
|
|
283
|
+
messages = [
|
|
284
|
+
{
|
|
285
|
+
"role": "user",
|
|
286
|
+
"content": f"Summarize this text into a concise and objective sentence that captures its main idea. Provide only the required content directly, without including any additional information.\n\n{all_summaries_str}",
|
|
287
|
+
}
|
|
288
|
+
]
|
|
289
|
+
section_summary = llm.generate(messages)
|
|
290
|
+
else:
|
|
291
|
+
# Simple concatenation of summaries
|
|
292
|
+
section_summary = " ".join(all_summaries)
|
|
293
|
+
|
|
294
|
+
# Update the section info
|
|
295
|
+
self.scene_memory["document"]["info"]["summary"] = section_summary
|
|
296
|
+
self.scene_memory["document"]["info"]["label"] = list(all_labels)
|
|
297
|
+
|
|
298
|
+
return section_summary
|