reme-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. reme_ai/__init__.py +6 -0
  2. reme_ai/app.py +17 -0
  3. reme_ai/config/__init__.py +0 -0
  4. reme_ai/config/config_parser.py +6 -0
  5. reme_ai/constants/__init__.py +7 -0
  6. reme_ai/constants/common_constants.py +48 -0
  7. reme_ai/constants/language_constants.py +215 -0
  8. reme_ai/enumeration/__init__.py +0 -0
  9. reme_ai/enumeration/language_constants.py +215 -0
  10. reme_ai/react/__init__.py +1 -0
  11. reme_ai/react/simple_react_op.py +21 -0
  12. reme_ai/retrieve/__init__.py +2 -0
  13. reme_ai/retrieve/personal/__init__.py +17 -0
  14. reme_ai/retrieve/personal/extract_time_op.py +97 -0
  15. reme_ai/retrieve/personal/fuse_rerank_op.py +180 -0
  16. reme_ai/retrieve/personal/print_memory_op.py +131 -0
  17. reme_ai/retrieve/personal/read_message_op.py +52 -0
  18. reme_ai/retrieve/personal/retrieve_memory_op.py +13 -0
  19. reme_ai/retrieve/personal/semantic_rank_op.py +170 -0
  20. reme_ai/retrieve/personal/set_query_op.py +37 -0
  21. reme_ai/retrieve/task/__init__.py +4 -0
  22. reme_ai/retrieve/task/build_query_op.py +38 -0
  23. reme_ai/retrieve/task/merge_memory_op.py +27 -0
  24. reme_ai/retrieve/task/rerank_memory_op.py +149 -0
  25. reme_ai/retrieve/task/rewrite_memory_op.py +149 -0
  26. reme_ai/schema/__init__.py +1 -0
  27. reme_ai/schema/memory.py +144 -0
  28. reme_ai/summary/__init__.py +2 -0
  29. reme_ai/summary/personal/__init__.py +8 -0
  30. reme_ai/summary/personal/contra_repeat_op.py +143 -0
  31. reme_ai/summary/personal/get_observation_op.py +147 -0
  32. reme_ai/summary/personal/get_observation_with_time_op.py +165 -0
  33. reme_ai/summary/personal/get_reflection_subject_op.py +179 -0
  34. reme_ai/summary/personal/info_filter_op.py +177 -0
  35. reme_ai/summary/personal/load_today_memory_op.py +117 -0
  36. reme_ai/summary/personal/long_contra_repeat_op.py +210 -0
  37. reme_ai/summary/personal/update_insight_op.py +244 -0
  38. reme_ai/summary/task/__init__.py +10 -0
  39. reme_ai/summary/task/comparative_extraction_op.py +233 -0
  40. reme_ai/summary/task/failure_extraction_op.py +73 -0
  41. reme_ai/summary/task/memory_deduplication_op.py +163 -0
  42. reme_ai/summary/task/memory_validation_op.py +108 -0
  43. reme_ai/summary/task/pdf_preprocess_op_wrapper.py +50 -0
  44. reme_ai/summary/task/simple_comparative_summary_op.py +71 -0
  45. reme_ai/summary/task/simple_summary_op.py +67 -0
  46. reme_ai/summary/task/success_extraction_op.py +73 -0
  47. reme_ai/summary/task/trajectory_preprocess_op.py +76 -0
  48. reme_ai/summary/task/trajectory_segmentation_op.py +118 -0
  49. reme_ai/utils/__init__.py +0 -0
  50. reme_ai/utils/datetime_handler.py +345 -0
  51. reme_ai/utils/miner_u_pdf_processor.py +726 -0
  52. reme_ai/utils/op_utils.py +115 -0
  53. reme_ai/vector_store/__init__.py +6 -0
  54. reme_ai/vector_store/delete_memory_op.py +25 -0
  55. reme_ai/vector_store/recall_vector_store_op.py +36 -0
  56. reme_ai/vector_store/update_memory_freq_op.py +33 -0
  57. reme_ai/vector_store/update_memory_utility_op.py +32 -0
  58. reme_ai/vector_store/update_vector_store_op.py +32 -0
  59. reme_ai/vector_store/vector_store_action_op.py +55 -0
  60. reme_ai-0.1.0.dist-info/METADATA +218 -0
  61. reme_ai-0.1.0.dist-info/RECORD +65 -0
  62. reme_ai-0.1.0.dist-info/WHEEL +5 -0
  63. reme_ai-0.1.0.dist-info/entry_points.txt +2 -0
  64. reme_ai-0.1.0.dist-info/licenses/LICENSE +201 -0
  65. reme_ai-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,147 @@
1
+ import re
2
+ from typing import List
3
+
4
+ from flowllm import C, BaseLLMOp
5
+ from flowllm.schema.message import Message
6
+ from loguru import logger
7
+
8
+ from reme_ai.schema.memory import BaseMemory, PersonalMemory
9
+ from reme_ai.utils.datetime_handler import DatetimeHandler
10
+
11
+
12
+ @C.register_op()
13
+ class GetObservationOp(BaseLLMOp):
14
+ """
15
+ A specialized operation class to generate observations from chat messages using BaseLLMOp.
16
+ """
17
+ file_path: str = __file__
18
+
19
+ def execute(self):
20
+ """Extract personal observations from chat messages"""
21
+ # Get messages from context - guaranteed to exist by flow input
22
+ messages: List[Message] = self.context.messages
23
+ if not messages:
24
+ logger.warning("No messages found in context")
25
+ return
26
+
27
+ # Filter messages - exclude those with time-related keywords
28
+ filtered_messages = self._filter_messages(messages)
29
+ if not filtered_messages:
30
+ logger.warning("No messages left after filtering")
31
+ self.context.observation_memories = []
32
+ return
33
+
34
+ logger.info(f"Extracting observations from {len(filtered_messages)} filtered messages")
35
+
36
+ # Extract observations using LLM
37
+ observation_memories = self._extract_observations_from_messages(filtered_messages)
38
+
39
+ # Store results in context using standardized key
40
+ self.context.observation_memories = observation_memories
41
+ logger.info(f"Generated {len(observation_memories)} observation memories")
42
+
43
+ def _filter_messages(self, messages: List[Message]) -> List[Message]:
44
+ """
45
+ Filters the chat messages to exclude those containing time-related keywords.
46
+
47
+ Args:
48
+ messages: List of messages to filter
49
+
50
+ Returns:
51
+ List[Message]: A list of filtered messages without time keywords.
52
+ """
53
+ filtered_messages = []
54
+ for msg in messages:
55
+ if not DatetimeHandler.has_time_word(query=msg.content, language=self.language):
56
+ filtered_messages.append(msg)
57
+
58
+ logger.info(f"Filtered messages from {len(messages)} to {len(filtered_messages)}")
59
+ return filtered_messages
60
+
61
+ def _extract_observations_from_messages(self, filtered_messages: List[Message]) -> List[BaseMemory]:
62
+ """Extract observations from filtered messages using LLM"""
63
+ user_name = self.context.get("user_name", "user")
64
+
65
+ # Build prompt for observation extraction
66
+ user_query_list = []
67
+ for i, msg in enumerate(filtered_messages):
68
+ user_query_list.append(f"{i + 1} {user_name}: {msg.content}")
69
+
70
+ # Create prompt using the prompt format method
71
+ system_prompt = self.prompt_format(prompt_name="get_observation_system",
72
+ num_obs=len(user_query_list),
73
+ user_name=user_name)
74
+ few_shot = self.prompt_format(prompt_name="get_observation_few_shot", user_name=user_name)
75
+ user_query = self.prompt_format(prompt_name="get_observation_user_query",
76
+ user_query="\n".join(user_query_list),
77
+ user_name=user_name)
78
+
79
+ full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
80
+ logger.info(f"get_observation_prompt={full_prompt}")
81
+
82
+ def parse_observations(message: Message) -> List[BaseMemory]:
83
+ """Parse LLM response and create observation memories"""
84
+ response_text = message.content
85
+ logger.info(f"get_observation_response={response_text}")
86
+
87
+ # Parse observations using class method
88
+ parsed_observations = GetObservationOp.parse_observation_response(response_text)
89
+
90
+ observation_memories = []
91
+ for obs in parsed_observations:
92
+ idx = obs["index"] - 1 # Convert to 0-based index
93
+ if idx >= len(filtered_messages):
94
+ logger.warning(f"Invalid index {idx} for messages list of length {len(filtered_messages)}")
95
+ continue
96
+
97
+ # Create observation memory
98
+ observation = PersonalMemory(
99
+ workspace_id=self.context.workspace_id,
100
+ when_to_use=obs["keywords"],
101
+ content=obs["content"],
102
+ target=user_name,
103
+ author=self.llm.model_name,
104
+ metadata={
105
+ "keywords": obs["keywords"],
106
+ "source_message": filtered_messages[idx].content,
107
+ "observation_type": "personal_info"
108
+ }
109
+ )
110
+ observation_memories.append(observation)
111
+ logger.info(f"Created observation: {obs['content'][:50]}...")
112
+
113
+ return observation_memories
114
+
115
+ # Use LLM chat with callback function
116
+ return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_observations)
117
+
118
+ @staticmethod
119
+ def parse_observation_response(response_text: str) -> List[dict]:
120
+ """Parse observation response to extract structured data"""
121
+ # Pattern to match both Chinese and English observation formats
122
+ pattern = r"信息:<(\d+)>\s*<>\s*<([^<>]+)>\s*<([^<>]*)>|Information:\s*<(\d+)>\s*<>\s*<([^<>]+)>\s*<([^<>]*)>"
123
+ matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
124
+
125
+ observations = []
126
+ for match in matches:
127
+ # Handle both Chinese and English patterns
128
+ if match[0]: # Chinese pattern
129
+ idx_str, content, keywords = match[0], match[1], match[2]
130
+ else: # English pattern
131
+ idx_str, content, keywords = match[3], match[4], match[5]
132
+
133
+ try:
134
+ idx = int(idx_str)
135
+ # Skip if content indicates no meaningful observation
136
+ content_lower = content.lower().strip()
137
+ if content_lower not in ['无', 'none', '', 'repeat']:
138
+ observations.append({
139
+ "index": idx,
140
+ "content": content.strip(),
141
+ "keywords": keywords.strip() if keywords else ""
142
+ })
143
+ except ValueError:
144
+ logger.warning(f"Invalid index format: {idx_str}")
145
+ continue
146
+
147
+ return observations
@@ -0,0 +1,165 @@
1
+ import re
2
+ from typing import List
3
+
4
+ from flowllm import C, BaseLLMOp
5
+ from flowllm.schema.message import Message
6
+ from loguru import logger
7
+
8
+ from reme_ai.schema.memory import BaseMemory, PersonalMemory
9
+ from reme_ai.utils.datetime_handler import DatetimeHandler
10
+
11
+
12
+ @C.register_op()
13
+ class GetObservationWithTimeOp(BaseLLMOp):
14
+ """
15
+ A specialized operation class to extract observations with time information from chat messages using BaseLLMOp.
16
+ """
17
+ file_path: str = __file__
18
+
19
+ def execute(self):
20
+ """Extract personal observations with time information from chat messages"""
21
+ # Get messages from context - guaranteed to exist by flow input
22
+ messages: List[Message] = self.context.messages
23
+ if not messages:
24
+ logger.warning("No messages found in context")
25
+ return
26
+
27
+ # Filter messages - only include those with time-related keywords
28
+ filtered_messages = self._filter_messages(messages)
29
+ if not filtered_messages:
30
+ logger.warning("No messages with time keywords found")
31
+ self.context.observation_memories_with_time = []
32
+ return
33
+
34
+ logger.info(f"Extracting observations with time from {len(filtered_messages)} filtered messages")
35
+
36
+ # Extract observations using LLM
37
+ observation_memories_with_time = self._extract_observations_with_time_from_messages(filtered_messages)
38
+
39
+ # Store results in context using standardized key
40
+ self.context.observation_memories_with_time = observation_memories_with_time
41
+ logger.info(f"Generated {len(observation_memories_with_time)} observation memories with time")
42
+
43
+ def _filter_messages(self, messages: List[Message]) -> List[Message]:
44
+ """
45
+ Filters the chat messages to only include those containing time-related keywords.
46
+
47
+ Args:
48
+ messages: List of messages to filter
49
+
50
+ Returns:
51
+ List[Message]: A list of filtered messages that mention time.
52
+ """
53
+ filtered_messages = []
54
+ for msg in messages:
55
+ if DatetimeHandler.has_time_word(query=msg.content, language=self.language):
56
+ filtered_messages.append(msg)
57
+
58
+ logger.info(f"Filtered messages from {len(messages)} to {len(filtered_messages)}")
59
+ return filtered_messages
60
+
61
+ def _extract_observations_with_time_from_messages(self, filtered_messages: List[Message]) -> List[BaseMemory]:
62
+ """Extract observations with time information from filtered messages using LLM"""
63
+ user_name = self.context.get("user_name", "user")
64
+
65
+ # Build prompt for observation extraction with time
66
+ user_query_list = []
67
+ for i, msg in enumerate(filtered_messages):
68
+ # Create a DatetimeHandler instance for each message's timestamp and format it
69
+ dt_handler = DatetimeHandler(dt=msg.time_created)
70
+
71
+ # Get time format from prompt configuration
72
+ time_format = self.prompt_format(prompt_name="time_string_format")
73
+ dt = dt_handler.string_format(string_format=time_format, language=self.language)
74
+
75
+ # Append formatted timestamp-query pairs to the user_query_list
76
+ colon = self._get_colon_word()
77
+ user_query_list.append(f"{i + 1} {dt} {user_name}{colon}{msg.content}")
78
+
79
+ # Create prompt using the prompt format method
80
+ system_prompt = self.prompt_format(prompt_name="get_observation_with_time_system",
81
+ num_obs=len(user_query_list),
82
+ user_name=user_name)
83
+ few_shot = self.prompt_format(prompt_name="get_observation_with_time_few_shot", user_name=user_name)
84
+ user_query = self.prompt_format(prompt_name="get_observation_with_time_user_query",
85
+ user_query="\n".join(user_query_list),
86
+ user_name=user_name)
87
+
88
+ full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
89
+ logger.info(f"get_observation_with_time_prompt={full_prompt}")
90
+
91
+ def parse_observations(message: Message) -> List[BaseMemory]:
92
+ """Parse LLM response and create observation memories with time"""
93
+ response_text = message.content
94
+ logger.info(f"get_observation_with_time_response={response_text}")
95
+
96
+ # Parse observations using class method
97
+ parsed_observations = GetObservationWithTimeOp.parse_observation_with_time_response(response_text)
98
+
99
+ observation_memories = []
100
+ for obs in parsed_observations:
101
+ idx = obs["index"] - 1 # Convert to 0-based index
102
+ if idx >= len(filtered_messages):
103
+ logger.warning(f"Invalid index {idx} for messages list of length {len(filtered_messages)}")
104
+ continue
105
+
106
+ # Create observation memory
107
+ observation = PersonalMemory(
108
+ workspace_id=self.context.workspace_id,
109
+ when_to_use=obs["keywords"],
110
+ content=obs["content"],
111
+ target=user_name,
112
+ author=getattr(self.llm, "model_name", "system"),
113
+ metadata={
114
+ "keywords": obs["keywords"],
115
+ "time_info": obs.get("time_info", ""),
116
+ "source_message": filtered_messages[idx].content,
117
+ "observation_type": "personal_info_with_time"
118
+ }
119
+ )
120
+ observation_memories.append(observation)
121
+ logger.info(f"Created observation with time: {obs['content'][:50]}...")
122
+
123
+ return observation_memories
124
+
125
+ # Use LLM chat with callback function
126
+ return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_observations)
127
+
128
+ def _get_colon_word(self) -> str:
129
+ """Get language-specific colon word"""
130
+ colon_dict = {"zh": ":", "cn": ":", "en": ": "}
131
+ return colon_dict.get(self.language, ": ")
132
+
133
+ @staticmethod
134
+ def parse_observation_with_time_response(response_text: str) -> List[dict]:
135
+ """Parse observation with time response to extract structured data"""
136
+ # Pattern to match both Chinese and English observation formats with time information
137
+ # Chinese: 信息:<1> <时间信息或不输出> <明确的重要信息或"无"> <关键词>
138
+ # English: Information: <1> <Time information or do not output> <Clear important information or "None"> <Keywords>
139
+ pattern = r"信息:<(\d+)>\s*<([^<>]*)>\s*<([^<>]+)>\s*<([^<>]*)>|Information:\s*<(\d+)>\s*<([^<>]*)>\s*<([^<>]+)>\s*<([^<>]*)>"
140
+ matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
141
+
142
+ observations = []
143
+ for match in matches:
144
+ # Handle both Chinese and English patterns
145
+ if match[0]: # Chinese pattern
146
+ idx_str, time_info, content, keywords = match[0], match[1], match[2], match[3]
147
+ else: # English pattern
148
+ idx_str, time_info, content, keywords = match[4], match[5], match[6], match[7]
149
+
150
+ try:
151
+ idx = int(idx_str)
152
+ # Skip if content indicates no meaningful observation
153
+ content_lower = content.lower().strip()
154
+ if content_lower not in ['无', 'none', '', 'repeat']:
155
+ observations.append({
156
+ "index": idx,
157
+ "time_info": time_info.strip() if time_info else "",
158
+ "content": content.strip(),
159
+ "keywords": keywords.strip() if keywords else ""
160
+ })
161
+ except ValueError:
162
+ logger.warning(f"Invalid index format: {idx_str}")
163
+ continue
164
+
165
+ return observations
@@ -0,0 +1,179 @@
1
+ from typing import List
2
+
3
+ from flowllm import C, BaseLLMOp
4
+ from flowllm.schema.message import Message
5
+ from loguru import logger
6
+
7
+ from reme_ai.schema.memory import BaseMemory, PersonalMemory
8
+
9
+
10
+ @C.register_op()
11
+ class GetReflectionSubjectOp(BaseLLMOp):
12
+ """
13
+ A specialized operation class responsible for retrieving unreflected memory nodes,
14
+ generating reflection prompts with current insights, invoking an LLM for fresh insights,
15
+ parsing the LLM responses, forming new insight nodes, and updating memory statuses accordingly.
16
+ """
17
+ file_path: str = __file__
18
+
19
+ def new_insight_memory(self, insight_content: str, target: str) -> PersonalMemory:
20
+ """
21
+ Creates a new PersonalMemory for an insight with the given content.
22
+
23
+ Args:
24
+ insight_content (str): The content of the insight.
25
+ target (str): The target person the insight is about.
26
+
27
+ Returns:
28
+ PersonalMemory: A new PersonalMemory instance representing the insight.
29
+ """
30
+ return PersonalMemory(
31
+ workspace_id=self.context.get("workspace_id", ""),
32
+ content=insight_content,
33
+ target=target,
34
+ reflection_subject=insight_content, # Store the subject in the dedicated field
35
+ author=getattr(self.llm, "model_name", "system"),
36
+ metadata={
37
+ "insight_type": "reflection_subject",
38
+ "memory_type": "personal_topic"
39
+ }
40
+ )
41
+
42
+ def execute(self):
43
+ """
44
+ Generate reflection subjects (topics) from personal memories for insight extraction.
45
+
46
+ Process:
47
+ 1. Retrieve personal memories and existing insights from context
48
+ 2. Check if sufficient memories exist for reflection
49
+ 3. Generate new reflection subjects using LLM
50
+ 4. Create insight memory objects for new subjects
51
+ 5. Store results in context for next operation
52
+ """
53
+ # Get memories from previous operation
54
+ personal_memories = self.context.response.metadata.get("personal_memories", [])
55
+ existing_insights = self.context.response.metadata.get("existing_insights", [])
56
+
57
+ # Get operation parameters
58
+ reflect_obs_cnt_threshold = self.op_params.get("reflect_obs_cnt_threshold", 10)
59
+ reflect_num_questions = self.op_params.get("reflect_num_questions", 3)
60
+ user_name = self.context.get("user_name", "user")
61
+
62
+ # Validate sufficient memories for reflection
63
+ if len(personal_memories) < reflect_obs_cnt_threshold:
64
+ logger.info(f"Insufficient memories for reflection: {len(personal_memories)} < {reflect_obs_cnt_threshold}")
65
+ self.context.response.metadata["insight_memories"] = []
66
+ return
67
+
68
+ # Extract existing insight subjects to avoid duplication
69
+ existing_subjects = []
70
+ if existing_insights:
71
+ existing_subjects = [memory.content for memory in existing_insights if
72
+ hasattr(memory, 'content') and memory.content]
73
+
74
+ logger.info(f"Found {len(existing_subjects)} existing insight subjects")
75
+
76
+ # Prepare memory content for LLM analysis
77
+ memory_contents = []
78
+ for memory in personal_memories:
79
+ if hasattr(memory, 'content') and memory.content.strip():
80
+ memory_contents.append(memory.content.strip())
81
+
82
+ if not memory_contents:
83
+ logger.warning("No valid memory content found for reflection")
84
+ self.context.response.metadata["insight_memories"] = []
85
+ return
86
+
87
+ # Generate reflection subjects using LLM
88
+ insight_memories = self._generate_reflection_subjects(
89
+ memory_contents, existing_subjects, user_name, reflect_num_questions
90
+ )
91
+
92
+ # Store results in context
93
+ self.context.response.metadata["insight_memories"] = insight_memories
94
+ logger.info(f"Generated {len(insight_memories)} new reflection subject memories")
95
+
96
+ def _generate_reflection_subjects(self, memory_contents: List[str], existing_subjects: List[str],
97
+ user_name: str, num_questions: int) -> List[BaseMemory]:
98
+ """
99
+ Generate new reflection subjects using LLM analysis of memory contents.
100
+
101
+ Args:
102
+ memory_contents: List of memory content strings
103
+ existing_subjects: List of already existing subject strings
104
+ user_name: Target username
105
+ num_questions: Maximum number of new subjects to generate
106
+
107
+ Returns:
108
+ List of PersonalMemory objects representing new reflection subjects
109
+ """
110
+ # Build LLM prompt
111
+ system_prompt = self.prompt_format(
112
+ prompt_name="get_reflection_subject_system",
113
+ user_name=user_name,
114
+ num_questions=num_questions
115
+ )
116
+ few_shot = self.prompt_format(
117
+ prompt_name="get_reflection_subject_few_shot",
118
+ user_name=user_name
119
+ )
120
+ user_query = self.prompt_format(
121
+ prompt_name="get_reflection_subject_user_query",
122
+ user_name=user_name,
123
+ exist_keys=", ".join(existing_subjects) if existing_subjects else "None",
124
+ user_query="\n".join(memory_contents)
125
+ )
126
+
127
+ full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
128
+ logger.info(f"Reflection subject prompt length: {len(full_prompt)} chars")
129
+
130
+ def parse_reflection_response(message: Message) -> List[BaseMemory]:
131
+ """Parse LLM response and create insight memories"""
132
+ response_text = message.content
133
+ logger.info(f"Reflection subjects response: {response_text}")
134
+
135
+ # Parse new subjects using class method
136
+ new_subjects = GetReflectionSubjectOp.parse_reflection_subjects_response(response_text, existing_subjects)
137
+
138
+ # Create insight memory objects
139
+ insight_memories = []
140
+ for subject in new_subjects:
141
+ insight_memory = self.new_insight_memory(
142
+ insight_content=subject,
143
+ target=user_name
144
+ )
145
+ insight_memories.append(insight_memory)
146
+ logger.info(f"Created reflection subject: {subject}")
147
+
148
+ return insight_memories
149
+
150
+ # Generate subjects using LLM
151
+ return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_reflection_response)
152
+
153
+ def get_language_value(self, value_dict: dict):
154
+ """Get language-specific value from dictionary"""
155
+ return value_dict.get(self.language, value_dict.get("en"))
156
+
157
+ @staticmethod
158
+ def parse_reflection_subjects_response(response_text: str, existing_subjects: List[str] = None) -> List[str]:
159
+ """Parse reflection subjects response to extract new subject attributes"""
160
+ if existing_subjects is None:
161
+ existing_subjects = []
162
+
163
+ # Split response into lines and clean up
164
+ lines = response_text.strip().split('\n')
165
+ subjects = []
166
+
167
+ for line in lines:
168
+ line = line.strip()
169
+ # Skip empty lines, "None" responses, and existing subjects
170
+ if (line and
171
+ line not in ['无', 'None', ''] and
172
+ line not in existing_subjects and
173
+ not line.startswith('新增') and # Skip Chinese header
174
+ not line.startswith('New ') and # Skip English header
175
+ len(line) > 1): # Skip single character responses
176
+ subjects.append(line)
177
+
178
+ logger.info(f"Parsed {len(subjects)} new reflection subjects from response")
179
+ return subjects
@@ -0,0 +1,177 @@
1
+ import re
2
+ from typing import List
3
+
4
+ from flowllm import C, BaseLLMOp
5
+ from flowllm.schema.message import Message
6
+ from loguru import logger
7
+
8
+ from reme_ai.schema.memory import PersonalMemory
9
+
10
+
11
+ @C.register_op()
12
+ class InfoFilterOp(BaseLLMOp):
13
+ """
14
+ A specialized operation class to filter messages based on information content scores using BaseLLMOp.
15
+ This filters chat messages by retaining only those that include significant information about the user.
16
+ """
17
+ file_path: str = __file__
18
+
19
+ def execute(self):
20
+ """Filter messages based on information content scores"""
21
+ # Get messages from context - guaranteed to exist by flow input
22
+ self.context.messages = [Message(**x) if isinstance(x, dict) else x for x in self.context.messages]
23
+ messages: List[Message] = self.context.messages
24
+ if not messages:
25
+ logger.warning("No messages found in context")
26
+ return
27
+
28
+ # Get operation parameters
29
+ preserved_scores = self.op_params.get("preserved_scores", "2,3")
30
+ info_filter_msg_max_size = self.op_params.get("info_filter_msg_max_size", 200)
31
+ user_name = self.context.get("user_name", "user")
32
+
33
+ # Filter and process messages
34
+ info_messages = self._filter_and_process_messages(messages, user_name, info_filter_msg_max_size)
35
+ if not info_messages:
36
+ logger.warning("No messages left after filtering")
37
+ self.context.messages = []
38
+ return
39
+
40
+ logger.info(f"Filtering {len(info_messages)} messages for information content")
41
+
42
+ # Filter messages using LLM
43
+ filtered_memories = self._filter_messages_with_llm(info_messages, user_name, preserved_scores)
44
+
45
+ # Store results in context using standardized key
46
+ self.context.messages = filtered_memories
47
+ logger.info(f"Filtered to {len(filtered_memories)} high-information messages")
48
+
49
+ @staticmethod
50
+ def _filter_and_process_messages(messages: List[Message], user_name: str, max_size: int) -> List[Message]:
51
+ """Filter and process messages for information filtering"""
52
+ info_messages = []
53
+
54
+ for msg in messages:
55
+ # Ensure metadata exists
56
+
57
+ # Skip memorized messages
58
+ if msg.metadata.get('memorized', False):
59
+ continue
60
+
61
+ # Only process messages from the target user
62
+ # role_name = msg.metadata.get('role_name')
63
+ # if role_name and role_name != user_name:
64
+ # continue
65
+
66
+ elif msg.role.value != "user":
67
+ continue
68
+
69
+ # Truncate long messages
70
+ if len(msg.content) >= max_size:
71
+ half_size = int(max_size * 0.5 + 0.5)
72
+ msg.content = msg.content[:half_size] + msg.content[-half_size:]
73
+
74
+ info_messages.append(msg)
75
+
76
+ logger.info(f"Filtered messages from {len(messages)} to {len(info_messages)}")
77
+ return info_messages
78
+
79
+ def _filter_messages_with_llm(self, info_messages: List[Message], user_name: str, preserved_scores: str) -> List[
80
+ PersonalMemory]:
81
+ """Filter messages using LLM to score information content"""
82
+
83
+ # Build prompt for information filtering
84
+ user_query_list = []
85
+ colon = self._get_colon_word()
86
+ for i, msg in enumerate(info_messages):
87
+ user_query_list.append(f"{i + 1} {user_name}{colon} {msg.content}")
88
+
89
+ # Create prompt using the prompt format method
90
+ system_prompt = self.prompt_format(prompt_name="info_filter_system",
91
+ batch_size=len(info_messages),
92
+ user_name=user_name)
93
+ few_shot = self.prompt_format(prompt_name="info_filter_few_shot", user_name=user_name)
94
+ user_query = self.prompt_format(prompt_name="info_filter_user_query",
95
+ user_query="\n".join(user_query_list))
96
+
97
+ full_prompt = f"{system_prompt}\n\n{few_shot}\n\n{user_query}"
98
+ logger.info(f"info_filter_prompt={full_prompt}")
99
+
100
+ def parse_and_filter(message: Message) -> List[PersonalMemory]:
101
+ """Parse LLM response and create filtered memories"""
102
+ response_text = message.content
103
+ logger.info(f"info_filter_response={response_text}")
104
+
105
+ # Parse scores using class method
106
+ info_scores = InfoFilterOp.parse_info_filter_response(response_text)
107
+
108
+ if len(info_scores) != len(info_messages):
109
+ logger.warning(f"score_size != messages_size, {len(info_scores)} vs {len(info_messages)}")
110
+
111
+ filtered_memories = []
112
+ for idx, score in info_scores:
113
+ # Convert to 0-based index
114
+ msg_idx = idx - 1
115
+ if msg_idx >= len(info_messages):
116
+ logger.warning(f"Invalid index {msg_idx} for messages list of length {len(info_messages)}")
117
+ continue
118
+
119
+ # Check if score should be preserved
120
+ if score in preserved_scores:
121
+ message = info_messages[msg_idx]
122
+
123
+ # Create memory from filtered message with combined metadata
124
+ memory = PersonalMemory(
125
+ workspace_id=self.context.get("workspace_id", ""),
126
+ content=message.content,
127
+ target=user_name,
128
+ author=getattr(self.llm, "model_name", "system"),
129
+ metadata={
130
+ "info_score": score,
131
+ "filter_type": "info_content",
132
+ "original_message_time": getattr(message, 'time_created', None),
133
+ "role_name": message.metadata.pop("role_name", user_name),
134
+ "memorized": True,
135
+ **message.metadata # Include all original metadata
136
+ }
137
+ )
138
+ filtered_memories.append(memory)
139
+ logger.info(f"Info filter: kept message with score {score}: {message.content[:50]}...")
140
+
141
+ return filtered_memories
142
+
143
+ # Use LLM chat with callback function
144
+ return self.llm.chat(messages=[Message(content=full_prompt)], callback_fn=parse_and_filter)
145
+
146
+ def _get_colon_word(self) -> str:
147
+ """Get language-specific colon word"""
148
+ colon_dict = {"zh": ":", "cn": ":", "en": ": "}
149
+ return colon_dict.get(self.language, ": ")
150
+
151
+ @staticmethod
152
+ def parse_info_filter_response(response_text: str) -> List[tuple]:
153
+ """Parse info filter response to extract message scores"""
154
+ # Pattern to match both Chinese and English result formats
155
+ # Chinese: 结果:<序号> <分数>
156
+ # English: Result: <Index> <Score>
157
+ pattern = r"结果:<(\d+)>\s*<([0-3])>|Result:\s*<(\d+)>\s*<([0-3])>"
158
+ matches = re.findall(pattern, response_text, re.IGNORECASE | re.MULTILINE)
159
+
160
+ scores = []
161
+ for match in matches:
162
+ # Handle both Chinese and English patterns
163
+ if match[0]: # Chinese pattern
164
+ idx_str, score_str = match[0], match[1]
165
+ else: # English pattern
166
+ idx_str, score_str = match[2], match[3]
167
+
168
+ try:
169
+ idx = int(idx_str)
170
+ score = score_str
171
+ scores.append((idx, score))
172
+ except ValueError:
173
+ logger.warning(f"Invalid index or score format: {idx_str}, {score_str}")
174
+ continue
175
+
176
+ logger.info(f"Parsed {len(scores)} info filter scores from response")
177
+ return scores