reme-ai 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reme_ai/__init__.py +6 -0
- reme_ai/app.py +17 -0
- reme_ai/config/__init__.py +0 -0
- reme_ai/config/config_parser.py +6 -0
- reme_ai/constants/__init__.py +7 -0
- reme_ai/constants/common_constants.py +48 -0
- reme_ai/constants/language_constants.py +215 -0
- reme_ai/enumeration/__init__.py +0 -0
- reme_ai/enumeration/language_constants.py +215 -0
- reme_ai/react/__init__.py +1 -0
- reme_ai/react/simple_react_op.py +21 -0
- reme_ai/retrieve/__init__.py +2 -0
- reme_ai/retrieve/personal/__init__.py +17 -0
- reme_ai/retrieve/personal/extract_time_op.py +97 -0
- reme_ai/retrieve/personal/fuse_rerank_op.py +180 -0
- reme_ai/retrieve/personal/print_memory_op.py +131 -0
- reme_ai/retrieve/personal/read_message_op.py +52 -0
- reme_ai/retrieve/personal/retrieve_memory_op.py +13 -0
- reme_ai/retrieve/personal/semantic_rank_op.py +170 -0
- reme_ai/retrieve/personal/set_query_op.py +37 -0
- reme_ai/retrieve/task/__init__.py +4 -0
- reme_ai/retrieve/task/build_query_op.py +38 -0
- reme_ai/retrieve/task/merge_memory_op.py +27 -0
- reme_ai/retrieve/task/rerank_memory_op.py +149 -0
- reme_ai/retrieve/task/rewrite_memory_op.py +149 -0
- reme_ai/schema/__init__.py +1 -0
- reme_ai/schema/memory.py +144 -0
- reme_ai/summary/__init__.py +2 -0
- reme_ai/summary/personal/__init__.py +8 -0
- reme_ai/summary/personal/contra_repeat_op.py +143 -0
- reme_ai/summary/personal/get_observation_op.py +147 -0
- reme_ai/summary/personal/get_observation_with_time_op.py +165 -0
- reme_ai/summary/personal/get_reflection_subject_op.py +179 -0
- reme_ai/summary/personal/info_filter_op.py +177 -0
- reme_ai/summary/personal/load_today_memory_op.py +117 -0
- reme_ai/summary/personal/long_contra_repeat_op.py +210 -0
- reme_ai/summary/personal/update_insight_op.py +244 -0
- reme_ai/summary/task/__init__.py +10 -0
- reme_ai/summary/task/comparative_extraction_op.py +233 -0
- reme_ai/summary/task/failure_extraction_op.py +73 -0
- reme_ai/summary/task/memory_deduplication_op.py +163 -0
- reme_ai/summary/task/memory_validation_op.py +108 -0
- reme_ai/summary/task/pdf_preprocess_op_wrapper.py +50 -0
- reme_ai/summary/task/simple_comparative_summary_op.py +71 -0
- reme_ai/summary/task/simple_summary_op.py +67 -0
- reme_ai/summary/task/success_extraction_op.py +73 -0
- reme_ai/summary/task/trajectory_preprocess_op.py +76 -0
- reme_ai/summary/task/trajectory_segmentation_op.py +118 -0
- reme_ai/utils/__init__.py +0 -0
- reme_ai/utils/datetime_handler.py +345 -0
- reme_ai/utils/miner_u_pdf_processor.py +726 -0
- reme_ai/utils/op_utils.py +115 -0
- reme_ai/vector_store/__init__.py +6 -0
- reme_ai/vector_store/delete_memory_op.py +25 -0
- reme_ai/vector_store/recall_vector_store_op.py +36 -0
- reme_ai/vector_store/update_memory_freq_op.py +33 -0
- reme_ai/vector_store/update_memory_utility_op.py +32 -0
- reme_ai/vector_store/update_vector_store_op.py +32 -0
- reme_ai/vector_store/vector_store_action_op.py +55 -0
- reme_ai-0.1.0.dist-info/METADATA +218 -0
- reme_ai-0.1.0.dist-info/RECORD +65 -0
- reme_ai-0.1.0.dist-info/WHEEL +5 -0
- reme_ai-0.1.0.dist-info/entry_points.txt +2 -0
- reme_ai-0.1.0.dist-info/licenses/LICENSE +201 -0
- reme_ai-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,117 @@
|
|
1
|
+
from typing import List
|
2
|
+
|
3
|
+
from flowllm import C, BaseLLMOp
|
4
|
+
from flowllm.schema.vector_node import VectorNode
|
5
|
+
from loguru import logger
|
6
|
+
|
7
|
+
from reme_ai.schema.memory import BaseMemory, vector_node_to_memory
|
8
|
+
from reme_ai.utils.datetime_handler import DatetimeHandler
|
9
|
+
|
10
|
+
|
11
|
+
@C.register_op()
|
12
|
+
class LoadTodayMemoryOp(BaseLLMOp):
|
13
|
+
"""
|
14
|
+
Operation to load today's memories from vector store for deduplication.
|
15
|
+
Focuses specifically on retrieving and deduplicating memories from the current date.
|
16
|
+
"""
|
17
|
+
file_path: str = __file__
|
18
|
+
|
19
|
+
def execute(self):
|
20
|
+
"""
|
21
|
+
Load today's memories from vector store and perform deduplication.
|
22
|
+
|
23
|
+
This operation:
|
24
|
+
1. Retrieves memories from today using vector store search
|
25
|
+
2. Converts vector nodes to memory objects
|
26
|
+
3. Performs deduplication based on content similarity
|
27
|
+
4. Stores deduplicated memories in context
|
28
|
+
"""
|
29
|
+
# Get operation parameters
|
30
|
+
top_k = self.op_params.get("top_k", 50)
|
31
|
+
|
32
|
+
# Get required context values
|
33
|
+
workspace_id = self.context.workspace_id
|
34
|
+
user_name = self.context.get("user_name", "user")
|
35
|
+
|
36
|
+
logger.info(f"Loading today's memories for user: {user_name} (top_k: {top_k})")
|
37
|
+
|
38
|
+
# Get today's memories from vector store
|
39
|
+
today_memories = self._retrieve_today_memories(workspace_id, user_name, top_k)
|
40
|
+
|
41
|
+
if not today_memories:
|
42
|
+
logger.info("No memories found for today")
|
43
|
+
self.context.today_memories = []
|
44
|
+
return
|
45
|
+
|
46
|
+
logger.info(f"Retrieved {len(today_memories)} memories from today")
|
47
|
+
self.context.today_memories = today_memories
|
48
|
+
logger.info(f"Final today's memory list size: {len(today_memories)}")
|
49
|
+
|
50
|
+
def _retrieve_today_memories(self, workspace_id: str, user_name: str, top_k: int) -> List[BaseMemory]:
|
51
|
+
"""
|
52
|
+
Retrieve memories from today using vector store with date filtering.
|
53
|
+
|
54
|
+
Args:
|
55
|
+
workspace_id: Workspace identifier
|
56
|
+
user_name: Target username
|
57
|
+
top_k: Maximum number of memories to retrieve
|
58
|
+
|
59
|
+
Returns:
|
60
|
+
List of today's memories
|
61
|
+
"""
|
62
|
+
try:
|
63
|
+
# Get today's date for filtering
|
64
|
+
dt_handler = DatetimeHandler()
|
65
|
+
today_date = dt_handler.datetime_format().split()[0] # Extract date part (YYYY-MM-DD)
|
66
|
+
|
67
|
+
logger.info(f"Searching for memories from date: {today_date}")
|
68
|
+
|
69
|
+
# Create filter criteria for today's memories
|
70
|
+
filter_dict = {
|
71
|
+
"memory_type": "personal",
|
72
|
+
"target": user_name,
|
73
|
+
"created_date": today_date
|
74
|
+
}
|
75
|
+
|
76
|
+
# Search vector store with date filter
|
77
|
+
nodes: List[VectorNode] = self.vector_store.search(
|
78
|
+
query="", # Empty query to get all results for today
|
79
|
+
workspace_id=workspace_id,
|
80
|
+
top_k=top_k,
|
81
|
+
filter_dict=filter_dict
|
82
|
+
)
|
83
|
+
|
84
|
+
logger.info(f"Vector store returned {len(nodes)} nodes for today")
|
85
|
+
|
86
|
+
# Convert vector nodes to memory objects
|
87
|
+
memories = self._convert_nodes_to_memories(nodes)
|
88
|
+
logger.info(f"Successfully converted {len(memories)} nodes to memories")
|
89
|
+
|
90
|
+
return memories
|
91
|
+
|
92
|
+
except Exception as e:
|
93
|
+
logger.error(f"Error retrieving today's memories: {e}")
|
94
|
+
return []
|
95
|
+
|
96
|
+
@staticmethod
|
97
|
+
def _convert_nodes_to_memories(nodes: List[VectorNode]) -> List[BaseMemory]:
|
98
|
+
"""
|
99
|
+
Convert vector nodes to memory objects.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
nodes: List of vector nodes from vector store
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
List of converted memory objects
|
106
|
+
"""
|
107
|
+
memories = []
|
108
|
+
|
109
|
+
for i, node in enumerate(nodes):
|
110
|
+
try:
|
111
|
+
memory = vector_node_to_memory(node)
|
112
|
+
memories.append(memory)
|
113
|
+
except Exception as e:
|
114
|
+
logger.warning(f"Failed to convert node {i} to memory: {e}")
|
115
|
+
continue
|
116
|
+
|
117
|
+
return memories
|
@@ -0,0 +1,210 @@
|
|
1
|
+
import re
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from flowllm import C, BaseLLMOp
|
5
|
+
from flowllm.enumeration.role import Role
|
6
|
+
from flowllm.schema.message import Message
|
7
|
+
from loguru import logger
|
8
|
+
|
9
|
+
from reme_ai.schema.memory import BaseMemory, PersonalMemory
|
10
|
+
|
11
|
+
|
12
|
+
@C.register_op()
|
13
|
+
class LongContraRepeatOp(BaseLLMOp):
|
14
|
+
"""
|
15
|
+
Manages and updates memory entries within a conversation scope by identifying
|
16
|
+
and handling contradictions or redundancies. It extends BaseLLMOp to provide
|
17
|
+
specialized functionality for long conversations with potential contradictory
|
18
|
+
or repetitive statements.
|
19
|
+
"""
|
20
|
+
file_path: str = __file__
|
21
|
+
|
22
|
+
def execute(self):
|
23
|
+
"""
|
24
|
+
Analyze memories for contradictions and redundancies, resolving conflicts.
|
25
|
+
|
26
|
+
Process:
|
27
|
+
1. Get updated insight memories from previous operation
|
28
|
+
2. Check for contradictions and redundancies among memories
|
29
|
+
3. Resolve conflicts by keeping most recent/accurate information
|
30
|
+
4. Filter out redundant memories
|
31
|
+
5. Store cleaned memory list in context
|
32
|
+
"""
|
33
|
+
# Get memories from previous operation
|
34
|
+
updated_insights = self.context.response.metadata.get("updated_insight_memories", [])
|
35
|
+
|
36
|
+
if not updated_insights:
|
37
|
+
logger.info("No updated insight memories to process for contradictions")
|
38
|
+
self.context.response.metadata["memory_list"] = []
|
39
|
+
return
|
40
|
+
|
41
|
+
# Get operation parameters
|
42
|
+
max_memories_to_process = self.op_params.get("long_contra_repeat_max_count", 50)
|
43
|
+
enable_processing = self.op_params.get("enable_long_contra_repeat", True)
|
44
|
+
|
45
|
+
if not enable_processing:
|
46
|
+
logger.info("Long contradiction/repeat processing is disabled")
|
47
|
+
self.context.response.metadata["memory_list"] = updated_insights
|
48
|
+
return
|
49
|
+
|
50
|
+
# Sort memories by creation time (most recent first) and limit count
|
51
|
+
sorted_memories = sorted(
|
52
|
+
updated_insights,
|
53
|
+
key=lambda x: x.time_created,
|
54
|
+
reverse=True
|
55
|
+
)[:max_memories_to_process]
|
56
|
+
|
57
|
+
if len(sorted_memories) <= 1:
|
58
|
+
logger.info("Only one memory to process, skipping contradiction analysis")
|
59
|
+
self.context.response.metadata["memory_list"] = sorted_memories
|
60
|
+
return
|
61
|
+
|
62
|
+
logger.info(f"Processing {len(sorted_memories)} memories for contradictions and redundancies")
|
63
|
+
|
64
|
+
# Analyze and resolve contradictions
|
65
|
+
filtered_memories = self._analyze_and_resolve_conflicts(sorted_memories)
|
66
|
+
|
67
|
+
# Store results in context
|
68
|
+
self.context.response.metadata["memory_list"] = filtered_memories
|
69
|
+
logger.info(f"Conflict resolution: {len(sorted_memories)} -> {len(filtered_memories)} memories")
|
70
|
+
|
71
|
+
def _analyze_and_resolve_conflicts(self, memories: List[BaseMemory]) -> List[BaseMemory]:
|
72
|
+
"""
|
73
|
+
Analyze memories for contradictions and redundancies using LLM.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
memories: List of memories to analyze
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
List of filtered memories with conflicts resolved
|
80
|
+
"""
|
81
|
+
user_name = self.context.get("user_name", "user")
|
82
|
+
|
83
|
+
# Prepare memory content for LLM analysis
|
84
|
+
memory_texts = []
|
85
|
+
for i, memory in enumerate(memories):
|
86
|
+
memory_texts.append(f"{i + 1} {memory.content}")
|
87
|
+
|
88
|
+
# Build LLM prompt
|
89
|
+
system_prompt = self.prompt_format(
|
90
|
+
prompt_name="long_contra_repeat_system",
|
91
|
+
num_obs=len(memory_texts),
|
92
|
+
user_name=user_name
|
93
|
+
)
|
94
|
+
few_shot = self.prompt_format(
|
95
|
+
prompt_name="long_contra_repeat_few_shot",
|
96
|
+
user_name=user_name
|
97
|
+
)
|
98
|
+
user_query = self.prompt_format(
|
99
|
+
prompt_name="long_contra_repeat_user_query",
|
100
|
+
user_query="\n".join(memory_texts)
|
101
|
+
)
|
102
|
+
|
103
|
+
full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
|
104
|
+
logger.info(f"Contradiction analysis prompt length: {len(full_prompt)} chars")
|
105
|
+
|
106
|
+
# Get LLM analysis
|
107
|
+
response = self.llm.chat([Message(role=Role.USER, content=full_prompt)])
|
108
|
+
|
109
|
+
if not response or not response.content:
|
110
|
+
logger.warning("Empty response from LLM, keeping all memories")
|
111
|
+
return memories
|
112
|
+
|
113
|
+
# Parse response and filter memories
|
114
|
+
return self._parse_and_filter_memories(response.content, memories, user_name)
|
115
|
+
|
116
|
+
@staticmethod
|
117
|
+
def _parse_and_filter_memories(response_text: str, memories: List[BaseMemory], user_name: str) -> List[BaseMemory]:
|
118
|
+
"""Parse LLM response and filter memories based on contradiction/containment analysis"""
|
119
|
+
|
120
|
+
# Use class method to parse the response
|
121
|
+
judgments = LongContraRepeatOp.parse_long_contra_repeat_response(response_text)
|
122
|
+
|
123
|
+
if not judgments:
|
124
|
+
logger.warning("No valid judgments found in response")
|
125
|
+
return memories
|
126
|
+
|
127
|
+
# Process each judgment
|
128
|
+
filtered_memories = []
|
129
|
+
processed_indices = set()
|
130
|
+
|
131
|
+
for idx, judgment, modified_content in judgments:
|
132
|
+
try:
|
133
|
+
memory_idx = idx - 1 # Convert to 0-based index
|
134
|
+
if memory_idx >= len(memories):
|
135
|
+
logger.warning(f"Invalid index {memory_idx} for memories list of length {len(memories)}")
|
136
|
+
continue
|
137
|
+
|
138
|
+
processed_indices.add(memory_idx)
|
139
|
+
memory = memories[memory_idx]
|
140
|
+
judgment_lower = judgment.lower()
|
141
|
+
|
142
|
+
if judgment_lower in ['矛盾', 'contradiction']:
|
143
|
+
# For contradictory memories, either modify content or mark for removal
|
144
|
+
if modified_content.strip():
|
145
|
+
# Create new memory with modified content
|
146
|
+
modified_memory = PersonalMemory(
|
147
|
+
workspace_id=memory.workspace_id,
|
148
|
+
memory_id=memory.memory_id,
|
149
|
+
content=modified_content.strip(),
|
150
|
+
target=memory.target if hasattr(memory, 'target') else user_name,
|
151
|
+
author=memory.author,
|
152
|
+
metadata={**memory.metadata, 'modified_by': 'long_contra_repeat'}
|
153
|
+
)
|
154
|
+
modified_memory.update_time_modified()
|
155
|
+
filtered_memories.append(modified_memory)
|
156
|
+
logger.info(f"Modified contradictory memory {idx}: {modified_content.strip()[:50]}...")
|
157
|
+
else:
|
158
|
+
# Remove contradictory memory without modification
|
159
|
+
logger.info(f"Removing contradictory memory {idx}: {memory.content[:50]}...")
|
160
|
+
|
161
|
+
elif judgment_lower in ['被包含', 'contained']:
|
162
|
+
# Remove contained/redundant memories
|
163
|
+
logger.info(f"Removing contained memory {idx}: {memory.content[:50]}...")
|
164
|
+
|
165
|
+
else: # 'none' case
|
166
|
+
# Keep the memory as is
|
167
|
+
filtered_memories.append(memory)
|
168
|
+
|
169
|
+
except Exception as e:
|
170
|
+
logger.warning(f"Error processing judgment for index {idx}: {e}")
|
171
|
+
continue
|
172
|
+
|
173
|
+
# Add any memories that weren't processed (shouldn't happen with correct LLM response)
|
174
|
+
for i, memory in enumerate(memories):
|
175
|
+
if i not in processed_indices:
|
176
|
+
filtered_memories.append(memory)
|
177
|
+
logger.warning(f"Memory {i + 1} was not processed by LLM, keeping as is")
|
178
|
+
|
179
|
+
return filtered_memories
|
180
|
+
|
181
|
+
def get_language_value(self, value_dict: dict):
|
182
|
+
"""Get language-specific value from dictionary"""
|
183
|
+
return value_dict.get(self.language, value_dict.get("en"))
|
184
|
+
|
185
|
+
@staticmethod
|
186
|
+
def parse_long_contra_repeat_response(response_text: str) -> List[tuple]:
|
187
|
+
"""Parse long contra repeat response to extract judgments"""
|
188
|
+
# Pattern to match both Chinese and English judgment formats
|
189
|
+
# Chinese: 判断:<序号> <矛盾|被包含|无> <修改后的内容>
|
190
|
+
# English: Judgment: <Index> <Contradiction|Contained|None> <Modified content>
|
191
|
+
pattern = r"判断:<(\d+)>\s*<(矛盾|被包含|无)>\s*<([^<>]*)>|Judgment:\s*<(\d+)>\s*<(Contradiction|Contained|None)>\s*<([^<>]*)>"
|
192
|
+
matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
|
193
|
+
|
194
|
+
judgments = []
|
195
|
+
for match in matches:
|
196
|
+
# Handle both Chinese and English patterns
|
197
|
+
if match[0]: # Chinese pattern
|
198
|
+
idx_str, judgment, modified_content = match[0], match[1], match[2]
|
199
|
+
else: # English pattern
|
200
|
+
idx_str, judgment, modified_content = match[3], match[4], match[5]
|
201
|
+
|
202
|
+
try:
|
203
|
+
idx = int(idx_str)
|
204
|
+
judgments.append((idx, judgment, modified_content))
|
205
|
+
except ValueError:
|
206
|
+
logger.warning(f"Invalid index format: {idx_str}")
|
207
|
+
continue
|
208
|
+
|
209
|
+
logger.info(f"Parsed {len(judgments)} long contra repeat judgments from response")
|
210
|
+
return judgments
|
@@ -0,0 +1,244 @@
|
|
1
|
+
import re
|
2
|
+
from typing import List
|
3
|
+
|
4
|
+
from flowllm import C, BaseLLMOp
|
5
|
+
from flowllm.schema.message import Message
|
6
|
+
from loguru import logger
|
7
|
+
|
8
|
+
from reme_ai.schema.memory import PersonalMemory
|
9
|
+
|
10
|
+
|
11
|
+
@C.register_op()
|
12
|
+
class UpdateInsightOp(BaseLLMOp):
|
13
|
+
"""
|
14
|
+
This class is responsible for updating insight value in a memory system. It filters insight nodes
|
15
|
+
based on their association with observed nodes, utilizes a ranking model to prioritize them,
|
16
|
+
generates refreshed insights via an LLM, and manages node statuses and content updates.
|
17
|
+
"""
|
18
|
+
file_path: str = __file__
|
19
|
+
|
20
|
+
def execute(self):
|
21
|
+
"""
|
22
|
+
Update insight values based on new observation memories.
|
23
|
+
|
24
|
+
Process:
|
25
|
+
1. Get insight subjects and personal memories from context
|
26
|
+
2. Find relevant observations for each insight subject
|
27
|
+
3. Update insight values using LLM integration
|
28
|
+
4. Store updated insights in context
|
29
|
+
"""
|
30
|
+
# Get memories from previous operations
|
31
|
+
insight_memories = self.context.response.metadata.get("insight_memories", [])
|
32
|
+
personal_memories = self.context.response.metadata.get("personal_memories", [])
|
33
|
+
|
34
|
+
if not insight_memories:
|
35
|
+
logger.info("No insight memories to update")
|
36
|
+
self.context.response.metadata["updated_insight_memories"] = []
|
37
|
+
return
|
38
|
+
|
39
|
+
if not personal_memories:
|
40
|
+
logger.info("No observation memories available for insight updates")
|
41
|
+
self.context.response.metadata["updated_insight_memories"] = []
|
42
|
+
return
|
43
|
+
|
44
|
+
# Get operation parameters
|
45
|
+
update_insight_threshold = self.op_params.get("update_insight_threshold", 0.3)
|
46
|
+
update_insight_max_count = self.op_params.get("update_insight_max_count", 5)
|
47
|
+
user_name = self.context.get("user_name", "user")
|
48
|
+
|
49
|
+
logger.info(f"Updating {len(insight_memories)} insights with {len(personal_memories)} observations")
|
50
|
+
|
51
|
+
# Score and filter insights based on relevance to observations
|
52
|
+
scored_insights = self._score_insights_by_relevance(
|
53
|
+
insight_memories, personal_memories, update_insight_threshold
|
54
|
+
)
|
55
|
+
|
56
|
+
if not scored_insights:
|
57
|
+
logger.info("No insights meet relevance threshold for updating")
|
58
|
+
self.context.response.metadata["updated_insight_memories"] = []
|
59
|
+
return
|
60
|
+
|
61
|
+
# Select top insights for updating
|
62
|
+
top_insights = sorted(scored_insights, key=lambda x: x[1], reverse=True)[:update_insight_max_count]
|
63
|
+
logger.info(f"Selected {len(top_insights)} insights for updating")
|
64
|
+
|
65
|
+
# Update each selected insight
|
66
|
+
updated_insights = []
|
67
|
+
for insight_memory, relevance_score, relevant_observations in top_insights:
|
68
|
+
updated_insight = self._update_insight_with_observations(
|
69
|
+
insight_memory, relevant_observations, user_name
|
70
|
+
)
|
71
|
+
if updated_insight:
|
72
|
+
updated_insights.append(updated_insight)
|
73
|
+
|
74
|
+
# Store results in context
|
75
|
+
self.context.response.metadata["updated_insight_memories"] = updated_insights
|
76
|
+
logger.info(f"Successfully updated {len(updated_insights)} insight memories")
|
77
|
+
|
78
|
+
def _score_insights_by_relevance(self, insight_memories: List[PersonalMemory],
|
79
|
+
observation_memories: List[PersonalMemory],
|
80
|
+
threshold: float) -> List[tuple]:
|
81
|
+
"""
|
82
|
+
Score insight memories based on relevance to observation memories.
|
83
|
+
|
84
|
+
Args:
|
85
|
+
insight_memories: List of insight memories to score
|
86
|
+
observation_memories: List of observation memories for comparison
|
87
|
+
threshold: Minimum relevance score threshold
|
88
|
+
|
89
|
+
Returns:
|
90
|
+
List[tuple]: List of (insight_memory, relevance_score, relevant_observations)
|
91
|
+
"""
|
92
|
+
scored_insights = []
|
93
|
+
|
94
|
+
for insight_memory in insight_memories:
|
95
|
+
relevant_observations = []
|
96
|
+
max_relevance = 0.0
|
97
|
+
|
98
|
+
insight_subject = getattr(insight_memory, 'reflection_subject', '') or insight_memory.content
|
99
|
+
insight_keywords = set(insight_memory.content.lower().split())
|
100
|
+
|
101
|
+
# Find observations relevant to this insight
|
102
|
+
for obs_memory in observation_memories:
|
103
|
+
relevance_score = self._calculate_relevance_score(
|
104
|
+
insight_memory, obs_memory, insight_keywords
|
105
|
+
)
|
106
|
+
|
107
|
+
if relevance_score >= threshold:
|
108
|
+
relevant_observations.append(obs_memory)
|
109
|
+
max_relevance = max(max_relevance, relevance_score)
|
110
|
+
|
111
|
+
# Include insight if it has relevant observations
|
112
|
+
if relevant_observations:
|
113
|
+
scored_insights.append((insight_memory, max_relevance, relevant_observations))
|
114
|
+
logger.info(
|
115
|
+
f"Insight '{insight_subject[:40]}...' scored {max_relevance:.3f} with {len(relevant_observations)} observations"
|
116
|
+
)
|
117
|
+
|
118
|
+
return scored_insights
|
119
|
+
|
120
|
+
@staticmethod
|
121
|
+
def _calculate_relevance_score(insight_memory: PersonalMemory,
|
122
|
+
obs_memory: PersonalMemory, insight_keywords: set) -> float:
|
123
|
+
"""Calculate relevance score between insight and observation memory"""
|
124
|
+
# High relevance for same reflection subject
|
125
|
+
insight_subject = getattr(insight_memory, 'reflection_subject', '')
|
126
|
+
obs_subject = getattr(obs_memory, 'reflection_subject', '')
|
127
|
+
|
128
|
+
if insight_subject and obs_subject and insight_subject == obs_subject:
|
129
|
+
return 0.9
|
130
|
+
|
131
|
+
# Medium relevance for keyword overlap
|
132
|
+
obs_keywords = set(obs_memory.content.lower().split())
|
133
|
+
intersection = len(insight_keywords.intersection(obs_keywords))
|
134
|
+
union = len(insight_keywords.union(obs_keywords))
|
135
|
+
|
136
|
+
return intersection / union if union > 0 else 0.0
|
137
|
+
|
138
|
+
def _update_insight_with_observations(self, insight_memory: PersonalMemory,
|
139
|
+
relevant_observations: List[PersonalMemory],
|
140
|
+
user_name: str) -> PersonalMemory:
|
141
|
+
"""
|
142
|
+
Update a single insight memory based on relevant observations using LLM.
|
143
|
+
|
144
|
+
Args:
|
145
|
+
insight_memory: The insight memory to update
|
146
|
+
relevant_observations: List of relevant observation memories
|
147
|
+
user_name: The target username
|
148
|
+
|
149
|
+
Returns:
|
150
|
+
PersonalMemory: Updated insight memory or None if update failed
|
151
|
+
"""
|
152
|
+
logger.info(
|
153
|
+
f"Updating insight: {insight_memory.content[:50]}... with {len(relevant_observations)} observations")
|
154
|
+
|
155
|
+
# Build observation context
|
156
|
+
observation_texts = [obs.content for obs in relevant_observations]
|
157
|
+
|
158
|
+
# Create prompt using the prompt format method
|
159
|
+
insight_key = insight_memory.reflection_subject or "personal_info"
|
160
|
+
insight_key_value = f"{insight_key}: {insight_memory.content}"
|
161
|
+
|
162
|
+
system_prompt = self.prompt_format(prompt_name="update_insight_system", user_name=user_name)
|
163
|
+
few_shot = self.prompt_format(prompt_name="update_insight_few_shot", user_name=user_name)
|
164
|
+
user_query = self.prompt_format(prompt_name="update_insight_user_query",
|
165
|
+
user_query="\n".join(observation_texts),
|
166
|
+
insight_key=insight_key,
|
167
|
+
insight_key_value=insight_key_value)
|
168
|
+
|
169
|
+
full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
|
170
|
+
logger.info(f"update_insight_prompt={full_prompt}")
|
171
|
+
|
172
|
+
def parse_update_response(message: Message) -> PersonalMemory:
|
173
|
+
"""Parse LLM response and create updated insight memory"""
|
174
|
+
response_text = message.content
|
175
|
+
logger.info(f"update_insight_response={response_text}")
|
176
|
+
|
177
|
+
# Parse the response to extract updated insight
|
178
|
+
updated_content = UpdateInsightOp.parse_update_insight_response(response_text, self.language)
|
179
|
+
|
180
|
+
if not updated_content or updated_content.lower() in ['无', 'none', '']:
|
181
|
+
logger.info(f"No update needed for insight: {insight_memory.content[:50]}...")
|
182
|
+
return insight_memory
|
183
|
+
|
184
|
+
if updated_content == insight_memory.content:
|
185
|
+
logger.info(f"Insight content unchanged: {insight_memory.content[:50]}...")
|
186
|
+
return insight_memory
|
187
|
+
|
188
|
+
# Create updated insight memory
|
189
|
+
updated_insight = PersonalMemory(
|
190
|
+
workspace_id=insight_memory.workspace_id,
|
191
|
+
memory_id=insight_memory.memory_id,
|
192
|
+
memory_type="personal_insight",
|
193
|
+
content=updated_content,
|
194
|
+
target=insight_memory.target,
|
195
|
+
reflection_subject=insight_memory.reflection_subject,
|
196
|
+
author=getattr(self.llm, "model_name", "system"),
|
197
|
+
metadata={
|
198
|
+
**insight_memory.metadata,
|
199
|
+
"updated_by": "update_insight_op",
|
200
|
+
"original_content": insight_memory.content,
|
201
|
+
"update_reason": "integrated_new_observations"
|
202
|
+
}
|
203
|
+
)
|
204
|
+
updated_insight.update_time_modified()
|
205
|
+
|
206
|
+
logger.info(f"Updated insight: {updated_content[:50]}...")
|
207
|
+
return updated_insight
|
208
|
+
|
209
|
+
# Use LLM chat with callback function
|
210
|
+
try:
|
211
|
+
return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_update_response)
|
212
|
+
except Exception as e:
|
213
|
+
logger.error(f"Error updating insight: {e}")
|
214
|
+
return insight_memory
|
215
|
+
|
216
|
+
@staticmethod
|
217
|
+
def parse_update_insight_response(response_text: str, language: str = "en") -> str:
|
218
|
+
"""Parse update insight response to extract updated insight content"""
|
219
|
+
# Pattern to match both Chinese and English insight formats
|
220
|
+
# Chinese: {user_name}的资料: <信息>
|
221
|
+
# English: {user_name}'s profile: <Information>
|
222
|
+
if language in ["zh", "cn"]:
|
223
|
+
pattern = r"的资料[::]\s*<([^<>]+)>"
|
224
|
+
else:
|
225
|
+
pattern = r"profile[::]\s*<([^<>]+)>"
|
226
|
+
|
227
|
+
matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
|
228
|
+
|
229
|
+
if matches:
|
230
|
+
insight_content = matches[0].strip()
|
231
|
+
logger.info(f"Parsed insight content: {insight_content}")
|
232
|
+
return insight_content
|
233
|
+
|
234
|
+
# Fallback: try to find content between angle brackets
|
235
|
+
fallback_pattern = r"<([^<>]+)>"
|
236
|
+
fallback_matches = re.findall(fallback_pattern, response_text)
|
237
|
+
if fallback_matches:
|
238
|
+
# Get the last match as it's likely the final answer
|
239
|
+
insight_content = fallback_matches[-1].strip()
|
240
|
+
logger.info(f"Parsed insight content (fallback): {insight_content}")
|
241
|
+
return insight_content
|
242
|
+
|
243
|
+
logger.warning("No insight content found in response")
|
244
|
+
return ""
|
@@ -0,0 +1,10 @@
|
|
1
|
+
from .comparative_extraction_op import ComparativeExtractionOp
|
2
|
+
from .failure_extraction_op import FailureExtractionOp
|
3
|
+
from .memory_deduplication_op import MemoryDeduplicationOp
|
4
|
+
from .memory_validation_op import MemoryValidationOp
|
5
|
+
from .pdf_preprocess_op_wrapper import PDFPreprocessOp
|
6
|
+
from .simple_comparative_summary_op import SimpleComparativeSummaryOp
|
7
|
+
from .simple_summary_op import SimpleSummaryOp
|
8
|
+
from .success_extraction_op import SuccessExtractionOp
|
9
|
+
from .trajectory_preprocess_op import TrajectoryPreprocessOp
|
10
|
+
from .trajectory_segmentation_op import TrajectorySegmentationOp
|