reme-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reme_ai/__init__.py +6 -0
- reme_ai/app.py +17 -0
- reme_ai/config/__init__.py +0 -0
- reme_ai/config/config_parser.py +6 -0
- reme_ai/constants/__init__.py +7 -0
- reme_ai/constants/common_constants.py +48 -0
- reme_ai/constants/language_constants.py +215 -0
- reme_ai/enumeration/__init__.py +0 -0
- reme_ai/enumeration/language_constants.py +215 -0
- reme_ai/react/__init__.py +1 -0
- reme_ai/react/simple_react_op.py +21 -0
- reme_ai/retrieve/__init__.py +2 -0
- reme_ai/retrieve/personal/__init__.py +17 -0
- reme_ai/retrieve/personal/extract_time_op.py +97 -0
- reme_ai/retrieve/personal/fuse_rerank_op.py +180 -0
- reme_ai/retrieve/personal/print_memory_op.py +131 -0
- reme_ai/retrieve/personal/read_message_op.py +52 -0
- reme_ai/retrieve/personal/retrieve_memory_op.py +13 -0
- reme_ai/retrieve/personal/semantic_rank_op.py +170 -0
- reme_ai/retrieve/personal/set_query_op.py +37 -0
- reme_ai/retrieve/task/__init__.py +4 -0
- reme_ai/retrieve/task/build_query_op.py +38 -0
- reme_ai/retrieve/task/merge_memory_op.py +27 -0
- reme_ai/retrieve/task/rerank_memory_op.py +149 -0
- reme_ai/retrieve/task/rewrite_memory_op.py +149 -0
- reme_ai/schema/__init__.py +1 -0
- reme_ai/schema/memory.py +144 -0
- reme_ai/summary/__init__.py +2 -0
- reme_ai/summary/personal/__init__.py +8 -0
- reme_ai/summary/personal/contra_repeat_op.py +143 -0
- reme_ai/summary/personal/get_observation_op.py +147 -0
- reme_ai/summary/personal/get_observation_with_time_op.py +165 -0
- reme_ai/summary/personal/get_reflection_subject_op.py +179 -0
- reme_ai/summary/personal/info_filter_op.py +177 -0
- reme_ai/summary/personal/load_today_memory_op.py +117 -0
- reme_ai/summary/personal/long_contra_repeat_op.py +210 -0
- reme_ai/summary/personal/update_insight_op.py +244 -0
- reme_ai/summary/task/__init__.py +10 -0
- reme_ai/summary/task/comparative_extraction_op.py +233 -0
- reme_ai/summary/task/failure_extraction_op.py +73 -0
- reme_ai/summary/task/memory_deduplication_op.py +163 -0
- reme_ai/summary/task/memory_validation_op.py +108 -0
- reme_ai/summary/task/pdf_preprocess_op_wrapper.py +50 -0
- reme_ai/summary/task/simple_comparative_summary_op.py +71 -0
- reme_ai/summary/task/simple_summary_op.py +67 -0
- reme_ai/summary/task/success_extraction_op.py +73 -0
- reme_ai/summary/task/trajectory_preprocess_op.py +76 -0
- reme_ai/summary/task/trajectory_segmentation_op.py +118 -0
- reme_ai/utils/__init__.py +0 -0
- reme_ai/utils/datetime_handler.py +345 -0
- reme_ai/utils/miner_u_pdf_processor.py +726 -0
- reme_ai/utils/op_utils.py +115 -0
- reme_ai/vector_store/__init__.py +6 -0
- reme_ai/vector_store/delete_memory_op.py +25 -0
- reme_ai/vector_store/recall_vector_store_op.py +36 -0
- reme_ai/vector_store/update_memory_freq_op.py +33 -0
- reme_ai/vector_store/update_memory_utility_op.py +32 -0
- reme_ai/vector_store/update_vector_store_op.py +32 -0
- reme_ai/vector_store/vector_store_action_op.py +55 -0
- reme_ai-0.1.0.dist-info/METADATA +218 -0
- reme_ai-0.1.0.dist-info/RECORD +65 -0
- reme_ai-0.1.0.dist-info/WHEEL +5 -0
- reme_ai-0.1.0.dist-info/entry_points.txt +2 -0
- reme_ai-0.1.0.dist-info/licenses/LICENSE +201 -0
- reme_ai-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
import re
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from flowllm import C, BaseLLMOp
|
5
|
+
from flowllm.schema.message import Message
|
6
|
+
from loguru import logger
|
7
|
+
|
8
|
+
from reme_ai.schema.memory import BaseMemory, PersonalMemory
|
9
|
+
from reme_ai.utils.datetime_handler import DatetimeHandler
|
10
|
+
|
11
|
+
|
12
|
+
@C.register_op()
|
13
|
+
class GetObservationOp(BaseLLMOp):
|
14
|
+
"""
|
15
|
+
A specialized operation class to generate observations from chat messages using BaseLLMOp.
|
16
|
+
"""
|
17
|
+
file_path: str = __file__
|
18
|
+
|
19
|
+
def execute(self):
|
20
|
+
"""Extract personal observations from chat messages"""
|
21
|
+
# Get messages from context - guaranteed to exist by flow input
|
22
|
+
messages: List[Message] = self.context.messages
|
23
|
+
if not messages:
|
24
|
+
logger.warning("No messages found in context")
|
25
|
+
return
|
26
|
+
|
27
|
+
# Filter messages - exclude those with time-related keywords
|
28
|
+
filtered_messages = self._filter_messages(messages)
|
29
|
+
if not filtered_messages:
|
30
|
+
logger.warning("No messages left after filtering")
|
31
|
+
self.context.observation_memories = []
|
32
|
+
return
|
33
|
+
|
34
|
+
logger.info(f"Extracting observations from {len(filtered_messages)} filtered messages")
|
35
|
+
|
36
|
+
# Extract observations using LLM
|
37
|
+
observation_memories = self._extract_observations_from_messages(filtered_messages)
|
38
|
+
|
39
|
+
# Store results in context using standardized key
|
40
|
+
self.context.observation_memories = observation_memories
|
41
|
+
logger.info(f"Generated {len(observation_memories)} observation memories")
|
42
|
+
|
43
|
+
def _filter_messages(self, messages: List[Message]) -> List[Message]:
|
44
|
+
"""
|
45
|
+
Filters the chat messages to exclude those containing time-related keywords.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
messages: List of messages to filter
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
List[Message]: A list of filtered messages without time keywords.
|
52
|
+
"""
|
53
|
+
filtered_messages = []
|
54
|
+
for msg in messages:
|
55
|
+
if not DatetimeHandler.has_time_word(query=msg.content, language=self.language):
|
56
|
+
filtered_messages.append(msg)
|
57
|
+
|
58
|
+
logger.info(f"Filtered messages from {len(messages)} to {len(filtered_messages)}")
|
59
|
+
return filtered_messages
|
60
|
+
|
61
|
+
def _extract_observations_from_messages(self, filtered_messages: List[Message]) -> List[BaseMemory]:
|
62
|
+
"""Extract observations from filtered messages using LLM"""
|
63
|
+
user_name = self.context.get("user_name", "user")
|
64
|
+
|
65
|
+
# Build prompt for observation extraction
|
66
|
+
user_query_list = []
|
67
|
+
for i, msg in enumerate(filtered_messages):
|
68
|
+
user_query_list.append(f"{i + 1} {user_name}: {msg.content}")
|
69
|
+
|
70
|
+
# Create prompt using the prompt format method
|
71
|
+
system_prompt = self.prompt_format(prompt_name="get_observation_system",
|
72
|
+
num_obs=len(user_query_list),
|
73
|
+
user_name=user_name)
|
74
|
+
few_shot = self.prompt_format(prompt_name="get_observation_few_shot", user_name=user_name)
|
75
|
+
user_query = self.prompt_format(prompt_name="get_observation_user_query",
|
76
|
+
user_query="\n".join(user_query_list),
|
77
|
+
user_name=user_name)
|
78
|
+
|
79
|
+
full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
|
80
|
+
logger.info(f"get_observation_prompt={full_prompt}")
|
81
|
+
|
82
|
+
def parse_observations(message: Message) -> List[BaseMemory]:
|
83
|
+
"""Parse LLM response and create observation memories"""
|
84
|
+
response_text = message.content
|
85
|
+
logger.info(f"get_observation_response={response_text}")
|
86
|
+
|
87
|
+
# Parse observations using class method
|
88
|
+
parsed_observations = GetObservationOp.parse_observation_response(response_text)
|
89
|
+
|
90
|
+
observation_memories = []
|
91
|
+
for obs in parsed_observations:
|
92
|
+
idx = obs["index"] - 1 # Convert to 0-based index
|
93
|
+
if idx >= len(filtered_messages):
|
94
|
+
logger.warning(f"Invalid index {idx} for messages list of length {len(filtered_messages)}")
|
95
|
+
continue
|
96
|
+
|
97
|
+
# Create observation memory
|
98
|
+
observation = PersonalMemory(
|
99
|
+
workspace_id=self.context.workspace_id,
|
100
|
+
when_to_use=obs["keywords"],
|
101
|
+
content=obs["content"],
|
102
|
+
target=user_name,
|
103
|
+
author=self.llm.model_name,
|
104
|
+
metadata={
|
105
|
+
"keywords": obs["keywords"],
|
106
|
+
"source_message": filtered_messages[idx].content,
|
107
|
+
"observation_type": "personal_info"
|
108
|
+
}
|
109
|
+
)
|
110
|
+
observation_memories.append(observation)
|
111
|
+
logger.info(f"Created observation: {obs['content'][:50]}...")
|
112
|
+
|
113
|
+
return observation_memories
|
114
|
+
|
115
|
+
# Use LLM chat with callback function
|
116
|
+
return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_observations)
|
117
|
+
|
118
|
+
@staticmethod
|
119
|
+
def parse_observation_response(response_text: str) -> List[dict]:
|
120
|
+
"""Parse observation response to extract structured data"""
|
121
|
+
# Pattern to match both Chinese and English observation formats
|
122
|
+
pattern = r"信息:<(\d+)>\s*<>\s*<([^<>]+)>\s*<([^<>]*)>|Information:\s*<(\d+)>\s*<>\s*<([^<>]+)>\s*<([^<>]*)>"
|
123
|
+
matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
|
124
|
+
|
125
|
+
observations = []
|
126
|
+
for match in matches:
|
127
|
+
# Handle both Chinese and English patterns
|
128
|
+
if match[0]: # Chinese pattern
|
129
|
+
idx_str, content, keywords = match[0], match[1], match[2]
|
130
|
+
else: # English pattern
|
131
|
+
idx_str, content, keywords = match[3], match[4], match[5]
|
132
|
+
|
133
|
+
try:
|
134
|
+
idx = int(idx_str)
|
135
|
+
# Skip if content indicates no meaningful observation
|
136
|
+
content_lower = content.lower().strip()
|
137
|
+
if content_lower not in ['无', 'none', '', 'repeat']:
|
138
|
+
observations.append({
|
139
|
+
"index": idx,
|
140
|
+
"content": content.strip(),
|
141
|
+
"keywords": keywords.strip() if keywords else ""
|
142
|
+
})
|
143
|
+
except ValueError:
|
144
|
+
logger.warning(f"Invalid index format: {idx_str}")
|
145
|
+
continue
|
146
|
+
|
147
|
+
return observations
|
@@ -0,0 +1,165 @@
|
|
1
|
+
import re
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from flowllm import C, BaseLLMOp
|
5
|
+
from flowllm.schema.message import Message
|
6
|
+
from loguru import logger
|
7
|
+
|
8
|
+
from reme_ai.schema.memory import BaseMemory, PersonalMemory
|
9
|
+
from reme_ai.utils.datetime_handler import DatetimeHandler
|
10
|
+
|
11
|
+
|
12
|
+
@C.register_op()
|
13
|
+
class GetObservationWithTimeOp(BaseLLMOp):
|
14
|
+
"""
|
15
|
+
A specialized operation class to extract observations with time information from chat messages using BaseLLMOp.
|
16
|
+
"""
|
17
|
+
file_path: str = __file__
|
18
|
+
|
19
|
+
def execute(self):
|
20
|
+
"""Extract personal observations with time information from chat messages"""
|
21
|
+
# Get messages from context - guaranteed to exist by flow input
|
22
|
+
messages: List[Message] = self.context.messages
|
23
|
+
if not messages:
|
24
|
+
logger.warning("No messages found in context")
|
25
|
+
return
|
26
|
+
|
27
|
+
# Filter messages - only include those with time-related keywords
|
28
|
+
filtered_messages = self._filter_messages(messages)
|
29
|
+
if not filtered_messages:
|
30
|
+
logger.warning("No messages with time keywords found")
|
31
|
+
self.context.observation_memories_with_time = []
|
32
|
+
return
|
33
|
+
|
34
|
+
logger.info(f"Extracting observations with time from {len(filtered_messages)} filtered messages")
|
35
|
+
|
36
|
+
# Extract observations using LLM
|
37
|
+
observation_memories_with_time = self._extract_observations_with_time_from_messages(filtered_messages)
|
38
|
+
|
39
|
+
# Store results in context using standardized key
|
40
|
+
self.context.observation_memories_with_time = observation_memories_with_time
|
41
|
+
logger.info(f"Generated {len(observation_memories_with_time)} observation memories with time")
|
42
|
+
|
43
|
+
def _filter_messages(self, messages: List[Message]) -> List[Message]:
|
44
|
+
"""
|
45
|
+
Filters the chat messages to only include those containing time-related keywords.
|
46
|
+
|
47
|
+
Args:
|
48
|
+
messages: List of messages to filter
|
49
|
+
|
50
|
+
Returns:
|
51
|
+
List[Message]: A list of filtered messages that mention time.
|
52
|
+
"""
|
53
|
+
filtered_messages = []
|
54
|
+
for msg in messages:
|
55
|
+
if DatetimeHandler.has_time_word(query=msg.content, language=self.language):
|
56
|
+
filtered_messages.append(msg)
|
57
|
+
|
58
|
+
logger.info(f"Filtered messages from {len(messages)} to {len(filtered_messages)}")
|
59
|
+
return filtered_messages
|
60
|
+
|
61
|
+
def _extract_observations_with_time_from_messages(self, filtered_messages: List[Message]) -> List[BaseMemory]:
|
62
|
+
"""Extract observations with time information from filtered messages using LLM"""
|
63
|
+
user_name = self.context.get("user_name", "user")
|
64
|
+
|
65
|
+
# Build prompt for observation extraction with time
|
66
|
+
user_query_list = []
|
67
|
+
for i, msg in enumerate(filtered_messages):
|
68
|
+
# Create a DatetimeHandler instance for each message's timestamp and format it
|
69
|
+
dt_handler = DatetimeHandler(dt=msg.time_created)
|
70
|
+
|
71
|
+
# Get time format from prompt configuration
|
72
|
+
time_format = self.prompt_format(prompt_name="time_string_format")
|
73
|
+
dt = dt_handler.string_format(string_format=time_format, language=self.language)
|
74
|
+
|
75
|
+
# Append formatted timestamp-query pairs to the user_query_list
|
76
|
+
colon = self._get_colon_word()
|
77
|
+
user_query_list.append(f"{i + 1} {dt} {user_name}{colon}{msg.content}")
|
78
|
+
|
79
|
+
# Create prompt using the prompt format method
|
80
|
+
system_prompt = self.prompt_format(prompt_name="get_observation_with_time_system",
|
81
|
+
num_obs=len(user_query_list),
|
82
|
+
user_name=user_name)
|
83
|
+
few_shot = self.prompt_format(prompt_name="get_observation_with_time_few_shot", user_name=user_name)
|
84
|
+
user_query = self.prompt_format(prompt_name="get_observation_with_time_user_query",
|
85
|
+
user_query="\n".join(user_query_list),
|
86
|
+
user_name=user_name)
|
87
|
+
|
88
|
+
full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
|
89
|
+
logger.info(f"get_observation_with_time_prompt={full_prompt}")
|
90
|
+
|
91
|
+
def parse_observations(message: Message) -> List[BaseMemory]:
|
92
|
+
"""Parse LLM response and create observation memories with time"""
|
93
|
+
response_text = message.content
|
94
|
+
logger.info(f"get_observation_with_time_response={response_text}")
|
95
|
+
|
96
|
+
# Parse observations using class method
|
97
|
+
parsed_observations = GetObservationWithTimeOp.parse_observation_with_time_response(response_text)
|
98
|
+
|
99
|
+
observation_memories = []
|
100
|
+
for obs in parsed_observations:
|
101
|
+
idx = obs["index"] - 1 # Convert to 0-based index
|
102
|
+
if idx >= len(filtered_messages):
|
103
|
+
logger.warning(f"Invalid index {idx} for messages list of length {len(filtered_messages)}")
|
104
|
+
continue
|
105
|
+
|
106
|
+
# Create observation memory
|
107
|
+
observation = PersonalMemory(
|
108
|
+
workspace_id=self.context.workspace_id,
|
109
|
+
when_to_use=obs["keywords"],
|
110
|
+
content=obs["content"],
|
111
|
+
target=user_name,
|
112
|
+
author=getattr(self.llm, "model_name", "system"),
|
113
|
+
metadata={
|
114
|
+
"keywords": obs["keywords"],
|
115
|
+
"time_info": obs.get("time_info", ""),
|
116
|
+
"source_message": filtered_messages[idx].content,
|
117
|
+
"observation_type": "personal_info_with_time"
|
118
|
+
}
|
119
|
+
)
|
120
|
+
observation_memories.append(observation)
|
121
|
+
logger.info(f"Created observation with time: {obs['content'][:50]}...")
|
122
|
+
|
123
|
+
return observation_memories
|
124
|
+
|
125
|
+
# Use LLM chat with callback function
|
126
|
+
return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_observations)
|
127
|
+
|
128
|
+
def _get_colon_word(self) -> str:
|
129
|
+
"""Get language-specific colon word"""
|
130
|
+
colon_dict = {"zh": ":", "cn": ":", "en": ": "}
|
131
|
+
return colon_dict.get(self.language, ": ")
|
132
|
+
|
133
|
+
@staticmethod
|
134
|
+
def parse_observation_with_time_response(response_text: str) -> List[dict]:
|
135
|
+
"""Parse observation with time response to extract structured data"""
|
136
|
+
# Pattern to match both Chinese and English observation formats with time information
|
137
|
+
# Chinese: 信息:<1> <时间信息或不输出> <明确的重要信息或"无"> <关键词>
|
138
|
+
# English: Information: <1> <Time information or do not output> <Clear important information or "None"> <Keywords>
|
139
|
+
pattern = r"信息:<(\d+)>\s*<([^<>]*)>\s*<([^<>]+)>\s*<([^<>]*)>|Information:\s*<(\d+)>\s*<([^<>]*)>\s*<([^<>]+)>\s*<([^<>]*)>"
|
140
|
+
matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
|
141
|
+
|
142
|
+
observations = []
|
143
|
+
for match in matches:
|
144
|
+
# Handle both Chinese and English patterns
|
145
|
+
if match[0]: # Chinese pattern
|
146
|
+
idx_str, time_info, content, keywords = match[0], match[1], match[2], match[3]
|
147
|
+
else: # English pattern
|
148
|
+
idx_str, time_info, content, keywords = match[4], match[5], match[6], match[7]
|
149
|
+
|
150
|
+
try:
|
151
|
+
idx = int(idx_str)
|
152
|
+
# Skip if content indicates no meaningful observation
|
153
|
+
content_lower = content.lower().strip()
|
154
|
+
if content_lower not in ['无', 'none', '', 'repeat']:
|
155
|
+
observations.append({
|
156
|
+
"index": idx,
|
157
|
+
"time_info": time_info.strip() if time_info else "",
|
158
|
+
"content": content.strip(),
|
159
|
+
"keywords": keywords.strip() if keywords else ""
|
160
|
+
})
|
161
|
+
except ValueError:
|
162
|
+
logger.warning(f"Invalid index format: {idx_str}")
|
163
|
+
continue
|
164
|
+
|
165
|
+
return observations
|
@@ -0,0 +1,179 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from flowllm import C, BaseLLMOp
|
4
|
+
from flowllm.schema.message import Message
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
from reme_ai.schema.memory import BaseMemory, PersonalMemory
|
8
|
+
|
9
|
+
|
10
|
+
@C.register_op()
|
11
|
+
class GetReflectionSubjectOp(BaseLLMOp):
|
12
|
+
"""
|
13
|
+
A specialized operation class responsible for retrieving unreflected memory nodes,
|
14
|
+
generating reflection prompts with current insights, invoking an LLM for fresh insights,
|
15
|
+
parsing the LLM responses, forming new insight nodes, and updating memory statuses accordingly.
|
16
|
+
"""
|
17
|
+
file_path: str = __file__
|
18
|
+
|
19
|
+
def new_insight_memory(self, insight_content: str, target: str) -> PersonalMemory:
|
20
|
+
"""
|
21
|
+
Creates a new PersonalMemory for an insight with the given content.
|
22
|
+
|
23
|
+
Args:
|
24
|
+
insight_content (str): The content of the insight.
|
25
|
+
target (str): The target person the insight is about.
|
26
|
+
|
27
|
+
Returns:
|
28
|
+
PersonalMemory: A new PersonalMemory instance representing the insight.
|
29
|
+
"""
|
30
|
+
return PersonalMemory(
|
31
|
+
workspace_id=self.context.get("workspace_id", ""),
|
32
|
+
content=insight_content,
|
33
|
+
target=target,
|
34
|
+
reflection_subject=insight_content, # Store the subject in the dedicated field
|
35
|
+
author=getattr(self.llm, "model_name", "system"),
|
36
|
+
metadata={
|
37
|
+
"insight_type": "reflection_subject",
|
38
|
+
"memory_type": "personal_topic"
|
39
|
+
}
|
40
|
+
)
|
41
|
+
|
42
|
+
def execute(self):
|
43
|
+
"""
|
44
|
+
Generate reflection subjects (topics) from personal memories for insight extraction.
|
45
|
+
|
46
|
+
Process:
|
47
|
+
1. Retrieve personal memories and existing insights from context
|
48
|
+
2. Check if sufficient memories exist for reflection
|
49
|
+
3. Generate new reflection subjects using LLM
|
50
|
+
4. Create insight memory objects for new subjects
|
51
|
+
5. Store results in context for next operation
|
52
|
+
"""
|
53
|
+
# Get memories from previous operation
|
54
|
+
personal_memories = self.context.response.metadata.get("personal_memories", [])
|
55
|
+
existing_insights = self.context.response.metadata.get("existing_insights", [])
|
56
|
+
|
57
|
+
# Get operation parameters
|
58
|
+
reflect_obs_cnt_threshold = self.op_params.get("reflect_obs_cnt_threshold", 10)
|
59
|
+
reflect_num_questions = self.op_params.get("reflect_num_questions", 3)
|
60
|
+
user_name = self.context.get("user_name", "user")
|
61
|
+
|
62
|
+
# Validate sufficient memories for reflection
|
63
|
+
if len(personal_memories) < reflect_obs_cnt_threshold:
|
64
|
+
logger.info(f"Insufficient memories for reflection: {len(personal_memories)} < {reflect_obs_cnt_threshold}")
|
65
|
+
self.context.response.metadata["insight_memories"] = []
|
66
|
+
return
|
67
|
+
|
68
|
+
# Extract existing insight subjects to avoid duplication
|
69
|
+
existing_subjects = []
|
70
|
+
if existing_insights:
|
71
|
+
existing_subjects = [memory.content for memory in existing_insights if
|
72
|
+
hasattr(memory, 'content') and memory.content]
|
73
|
+
|
74
|
+
logger.info(f"Found {len(existing_subjects)} existing insight subjects")
|
75
|
+
|
76
|
+
# Prepare memory content for LLM analysis
|
77
|
+
memory_contents = []
|
78
|
+
for memory in personal_memories:
|
79
|
+
if hasattr(memory, 'content') and memory.content.strip():
|
80
|
+
memory_contents.append(memory.content.strip())
|
81
|
+
|
82
|
+
if not memory_contents:
|
83
|
+
logger.warning("No valid memory content found for reflection")
|
84
|
+
self.context.response.metadata["insight_memories"] = []
|
85
|
+
return
|
86
|
+
|
87
|
+
# Generate reflection subjects using LLM
|
88
|
+
insight_memories = self._generate_reflection_subjects(
|
89
|
+
memory_contents, existing_subjects, user_name, reflect_num_questions
|
90
|
+
)
|
91
|
+
|
92
|
+
# Store results in context
|
93
|
+
self.context.response.metadata["insight_memories"] = insight_memories
|
94
|
+
logger.info(f"Generated {len(insight_memories)} new reflection subject memories")
|
95
|
+
|
96
|
+
def _generate_reflection_subjects(self, memory_contents: List[str], existing_subjects: List[str],
|
97
|
+
user_name: str, num_questions: int) -> List[BaseMemory]:
|
98
|
+
"""
|
99
|
+
Generate new reflection subjects using LLM analysis of memory contents.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
memory_contents: List of memory content strings
|
103
|
+
existing_subjects: List of already existing subject strings
|
104
|
+
user_name: Target username
|
105
|
+
num_questions: Maximum number of new subjects to generate
|
106
|
+
|
107
|
+
Returns:
|
108
|
+
List of PersonalMemory objects representing new reflection subjects
|
109
|
+
"""
|
110
|
+
# Build LLM prompt
|
111
|
+
system_prompt = self.prompt_format(
|
112
|
+
prompt_name="get_reflection_subject_system",
|
113
|
+
user_name=user_name,
|
114
|
+
num_questions=num_questions
|
115
|
+
)
|
116
|
+
few_shot = self.prompt_format(
|
117
|
+
prompt_name="get_reflection_subject_few_shot",
|
118
|
+
user_name=user_name
|
119
|
+
)
|
120
|
+
user_query = self.prompt_format(
|
121
|
+
prompt_name="get_reflection_subject_user_query",
|
122
|
+
user_name=user_name,
|
123
|
+
exist_keys=", ".join(existing_subjects) if existing_subjects else "None",
|
124
|
+
user_query="\n".join(memory_contents)
|
125
|
+
)
|
126
|
+
|
127
|
+
full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
|
128
|
+
logger.info(f"Reflection subject prompt length: {len(full_prompt)} chars")
|
129
|
+
|
130
|
+
def parse_reflection_response(message: Message) -> List[BaseMemory]:
|
131
|
+
"""Parse LLM response and create insight memories"""
|
132
|
+
response_text = message.content
|
133
|
+
logger.info(f"Reflection subjects response: {response_text}")
|
134
|
+
|
135
|
+
# Parse new subjects using class method
|
136
|
+
new_subjects = GetReflectionSubjectOp.parse_reflection_subjects_response(response_text, existing_subjects)
|
137
|
+
|
138
|
+
# Create insight memory objects
|
139
|
+
insight_memories = []
|
140
|
+
for subject in new_subjects:
|
141
|
+
insight_memory = self.new_insight_memory(
|
142
|
+
insight_content=subject,
|
143
|
+
target=user_name
|
144
|
+
)
|
145
|
+
insight_memories.append(insight_memory)
|
146
|
+
logger.info(f"Created reflection subject: {subject}")
|
147
|
+
|
148
|
+
return insight_memories
|
149
|
+
|
150
|
+
# Generate subjects using LLM
|
151
|
+
return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_reflection_response)
|
152
|
+
|
153
|
+
def get_language_value(self, value_dict: dict):
|
154
|
+
"""Get language-specific value from dictionary"""
|
155
|
+
return value_dict.get(self.language, value_dict.get("en"))
|
156
|
+
|
157
|
+
@staticmethod
|
158
|
+
def parse_reflection_subjects_response(response_text: str, existing_subjects: List[str] = None) -> List[str]:
|
159
|
+
"""Parse reflection subjects response to extract new subject attributes"""
|
160
|
+
if existing_subjects is None:
|
161
|
+
existing_subjects = []
|
162
|
+
|
163
|
+
# Split response into lines and clean up
|
164
|
+
lines = response_text.strip().split('\n')
|
165
|
+
subjects = []
|
166
|
+
|
167
|
+
for line in lines:
|
168
|
+
line = line.strip()
|
169
|
+
# Skip empty lines, "None" responses, and existing subjects
|
170
|
+
if (line and
|
171
|
+
line not in ['无', 'None', ''] and
|
172
|
+
line not in existing_subjects and
|
173
|
+
not line.startswith('新增') and # Skip Chinese header
|
174
|
+
not line.startswith('New ') and # Skip English header
|
175
|
+
len(line) > 1): # Skip single character responses
|
176
|
+
subjects.append(line)
|
177
|
+
|
178
|
+
logger.info(f"Parsed {len(subjects)} new reflection subjects from response")
|
179
|
+
return subjects
|
@@ -0,0 +1,177 @@
|
|
1
|
+
import re
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from flowllm import C, BaseLLMOp
|
5
|
+
from flowllm.schema.message import Message
|
6
|
+
from loguru import logger
|
7
|
+
|
8
|
+
from reme_ai.schema.memory import PersonalMemory
|
9
|
+
|
10
|
+
|
11
|
+
@C.register_op()
|
12
|
+
class InfoFilterOp(BaseLLMOp):
|
13
|
+
"""
|
14
|
+
A specialized operation class to filter messages based on information content scores using BaseLLMOp.
|
15
|
+
This filters chat messages by retaining only those that include significant information about the user.
|
16
|
+
"""
|
17
|
+
file_path: str = __file__
|
18
|
+
|
19
|
+
def execute(self):
|
20
|
+
"""Filter messages based on information content scores"""
|
21
|
+
# Get messages from context - guaranteed to exist by flow input
|
22
|
+
self.context.messages = [Message(**x) if isinstance(x, dict) else x for x in self.context.messages]
|
23
|
+
messages: List[Message] = self.context.messages
|
24
|
+
if not messages:
|
25
|
+
logger.warning("No messages found in context")
|
26
|
+
return
|
27
|
+
|
28
|
+
# Get operation parameters
|
29
|
+
preserved_scores = self.op_params.get("preserved_scores", "2,3")
|
30
|
+
info_filter_msg_max_size = self.op_params.get("info_filter_msg_max_size", 200)
|
31
|
+
user_name = self.context.get("user_name", "user")
|
32
|
+
|
33
|
+
# Filter and process messages
|
34
|
+
info_messages = self._filter_and_process_messages(messages, user_name, info_filter_msg_max_size)
|
35
|
+
if not info_messages:
|
36
|
+
logger.warning("No messages left after filtering")
|
37
|
+
self.context.messages = []
|
38
|
+
return
|
39
|
+
|
40
|
+
logger.info(f"Filtering {len(info_messages)} messages for information content")
|
41
|
+
|
42
|
+
# Filter messages using LLM
|
43
|
+
filtered_memories = self._filter_messages_with_llm(info_messages, user_name, preserved_scores)
|
44
|
+
|
45
|
+
# Store results in context using standardized key
|
46
|
+
self.context.messages = filtered_memories
|
47
|
+
logger.info(f"Filtered to {len(filtered_memories)} high-information messages")
|
48
|
+
|
49
|
+
@staticmethod
|
50
|
+
def _filter_and_process_messages(messages: List[Message], user_name: str, max_size: int) -> List[Message]:
|
51
|
+
"""Filter and process messages for information filtering"""
|
52
|
+
info_messages = []
|
53
|
+
|
54
|
+
for msg in messages:
|
55
|
+
# Ensure metadata exists
|
56
|
+
|
57
|
+
# Skip memorized messages
|
58
|
+
if msg.metadata.get('memorized', False):
|
59
|
+
continue
|
60
|
+
|
61
|
+
# Only process messages from the target user
|
62
|
+
# role_name = msg.metadata.get('role_name')
|
63
|
+
# if role_name and role_name != user_name:
|
64
|
+
# continue
|
65
|
+
|
66
|
+
elif msg.role.value != "user":
|
67
|
+
continue
|
68
|
+
|
69
|
+
# Truncate long messages
|
70
|
+
if len(msg.content) >= max_size:
|
71
|
+
half_size = int(max_size * 0.5 + 0.5)
|
72
|
+
msg.content = msg.content[:half_size] + msg.content[-half_size:]
|
73
|
+
|
74
|
+
info_messages.append(msg)
|
75
|
+
|
76
|
+
logger.info(f"Filtered messages from {len(messages)} to {len(info_messages)}")
|
77
|
+
return info_messages
|
78
|
+
|
79
|
+
def _filter_messages_with_llm(self, info_messages: List[Message], user_name: str, preserved_scores: str) -> List[
|
80
|
+
PersonalMemory]:
|
81
|
+
"""Filter messages using LLM to score information content"""
|
82
|
+
|
83
|
+
# Build prompt for information filtering
|
84
|
+
user_query_list = []
|
85
|
+
colon = self._get_colon_word()
|
86
|
+
for i, msg in enumerate(info_messages):
|
87
|
+
user_query_list.append(f"{i + 1} {user_name}{colon} {msg.content}")
|
88
|
+
|
89
|
+
# Create prompt using the prompt format method
|
90
|
+
system_prompt = self.prompt_format(prompt_name="info_filter_system",
|
91
|
+
batch_size=len(info_messages),
|
92
|
+
user_name=user_name)
|
93
|
+
few_shot = self.prompt_format(prompt_name="info_filter_few_shot", user_name=user_name)
|
94
|
+
user_query = self.prompt_format(prompt_name="info_filter_user_query",
|
95
|
+
user_query="\n".join(user_query_list))
|
96
|
+
|
97
|
+
full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
|
98
|
+
logger.info(f"info_filter_prompt={full_prompt}")
|
99
|
+
|
100
|
+
def parse_and_filter(message: Message) -> List[PersonalMemory]:
|
101
|
+
"""Parse LLM response and create filtered memories"""
|
102
|
+
response_text = message.content
|
103
|
+
logger.info(f"info_filter_response={response_text}")
|
104
|
+
|
105
|
+
# Parse scores using class method
|
106
|
+
info_scores = InfoFilterOp.parse_info_filter_response(response_text)
|
107
|
+
|
108
|
+
if len(info_scores) != len(info_messages):
|
109
|
+
logger.warning(f"score_size != messages_size, {len(info_scores)} vs {len(info_messages)}")
|
110
|
+
|
111
|
+
filtered_memories = []
|
112
|
+
for idx, score in info_scores:
|
113
|
+
# Convert to 0-based index
|
114
|
+
msg_idx = idx - 1
|
115
|
+
if msg_idx >= len(info_messages):
|
116
|
+
logger.warning(f"Invalid index {msg_idx} for messages list of length {len(info_messages)}")
|
117
|
+
continue
|
118
|
+
|
119
|
+
# Check if score should be preserved
|
120
|
+
if score in preserved_scores:
|
121
|
+
message = info_messages[msg_idx]
|
122
|
+
|
123
|
+
# Create memory from filtered message with combined metadata
|
124
|
+
memory = PersonalMemory(
|
125
|
+
workspace_id=self.context.get("workspace_id", ""),
|
126
|
+
content=message.content,
|
127
|
+
target=user_name,
|
128
|
+
author=getattr(self.llm, "model_name", "system"),
|
129
|
+
metadata={
|
130
|
+
"info_score": score,
|
131
|
+
"filter_type": "info_content",
|
132
|
+
"original_message_time": getattr(message, 'time_created', None),
|
133
|
+
"role_name": message.metadata.pop("role_name", user_name),
|
134
|
+
"memorized": True,
|
135
|
+
**message.metadata # Include all original metadata
|
136
|
+
}
|
137
|
+
)
|
138
|
+
filtered_memories.append(memory)
|
139
|
+
logger.info(f"Info filter: kept message with score {score}: {message.content[:50]}...")
|
140
|
+
|
141
|
+
return filtered_memories
|
142
|
+
|
143
|
+
# Use LLM chat with callback function
|
144
|
+
return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_and_filter)
|
145
|
+
|
146
|
+
def _get_colon_word(self) -> str:
|
147
|
+
"""Get language-specific colon word"""
|
148
|
+
colon_dict = {"zh": ":", "cn": ":", "en": ": "}
|
149
|
+
return colon_dict.get(self.language, ": ")
|
150
|
+
|
151
|
+
@staticmethod
|
152
|
+
def parse_info_filter_response(response_text: str) -> List[tuple]:
|
153
|
+
"""Parse info filter response to extract message scores"""
|
154
|
+
# Pattern to match both Chinese and English result formats
|
155
|
+
# Chinese: 结果:<序号> <分数>
|
156
|
+
# English: Result: <Index> <Score>
|
157
|
+
pattern = r"结果:<(\d+)>\s*<([0-3])>|Result:\s*<(\d+)>\s*<([0-3])>"
|
158
|
+
matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
|
159
|
+
|
160
|
+
scores = []
|
161
|
+
for match in matches:
|
162
|
+
# Handle both Chinese and English patterns
|
163
|
+
if match[0]: # Chinese pattern
|
164
|
+
idx_str, score_str = match[0], match[1]
|
165
|
+
else: # English pattern
|
166
|
+
idx_str, score_str = match[2], match[3]
|
167
|
+
|
168
|
+
try:
|
169
|
+
idx = int(idx_str)
|
170
|
+
score = score_str
|
171
|
+
scores.append((idx, score))
|
172
|
+
except ValueError:
|
173
|
+
logger.warning(f"Invalid index or score format: {idx_str}, {score_str}")
|
174
|
+
continue
|
175
|
+
|
176
|
+
logger.info(f"Parsed {len(scores)} info filter scores from response")
|
177
|
+
return scores
|