reme-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. reme_ai/__init__.py +6 -0
  2. reme_ai/app.py +17 -0
  3. reme_ai/config/__init__.py +0 -0
  4. reme_ai/config/config_parser.py +6 -0
  5. reme_ai/constants/__init__.py +7 -0
  6. reme_ai/constants/common_constants.py +48 -0
  7. reme_ai/constants/language_constants.py +215 -0
  8. reme_ai/enumeration/__init__.py +0 -0
  9. reme_ai/enumeration/language_constants.py +215 -0
  10. reme_ai/react/__init__.py +1 -0
  11. reme_ai/react/simple_react_op.py +21 -0
  12. reme_ai/retrieve/__init__.py +2 -0
  13. reme_ai/retrieve/personal/__init__.py +17 -0
  14. reme_ai/retrieve/personal/extract_time_op.py +97 -0
  15. reme_ai/retrieve/personal/fuse_rerank_op.py +180 -0
  16. reme_ai/retrieve/personal/print_memory_op.py +131 -0
  17. reme_ai/retrieve/personal/read_message_op.py +52 -0
  18. reme_ai/retrieve/personal/retrieve_memory_op.py +13 -0
  19. reme_ai/retrieve/personal/semantic_rank_op.py +170 -0
  20. reme_ai/retrieve/personal/set_query_op.py +37 -0
  21. reme_ai/retrieve/task/__init__.py +4 -0
  22. reme_ai/retrieve/task/build_query_op.py +38 -0
  23. reme_ai/retrieve/task/merge_memory_op.py +27 -0
  24. reme_ai/retrieve/task/rerank_memory_op.py +149 -0
  25. reme_ai/retrieve/task/rewrite_memory_op.py +149 -0
  26. reme_ai/schema/__init__.py +1 -0
  27. reme_ai/schema/memory.py +144 -0
  28. reme_ai/summary/__init__.py +2 -0
  29. reme_ai/summary/personal/__init__.py +8 -0
  30. reme_ai/summary/personal/contra_repeat_op.py +143 -0
  31. reme_ai/summary/personal/get_observation_op.py +147 -0
  32. reme_ai/summary/personal/get_observation_with_time_op.py +165 -0
  33. reme_ai/summary/personal/get_reflection_subject_op.py +179 -0
  34. reme_ai/summary/personal/info_filter_op.py +177 -0
  35. reme_ai/summary/personal/load_today_memory_op.py +117 -0
  36. reme_ai/summary/personal/long_contra_repeat_op.py +210 -0
  37. reme_ai/summary/personal/update_insight_op.py +244 -0
  38. reme_ai/summary/task/__init__.py +10 -0
  39. reme_ai/summary/task/comparative_extraction_op.py +233 -0
  40. reme_ai/summary/task/failure_extraction_op.py +73 -0
  41. reme_ai/summary/task/memory_deduplication_op.py +163 -0
  42. reme_ai/summary/task/memory_validation_op.py +108 -0
  43. reme_ai/summary/task/pdf_preprocess_op_wrapper.py +50 -0
  44. reme_ai/summary/task/simple_comparative_summary_op.py +71 -0
  45. reme_ai/summary/task/simple_summary_op.py +67 -0
  46. reme_ai/summary/task/success_extraction_op.py +73 -0
  47. reme_ai/summary/task/trajectory_preprocess_op.py +76 -0
  48. reme_ai/summary/task/trajectory_segmentation_op.py +118 -0
  49. reme_ai/utils/__init__.py +0 -0
  50. reme_ai/utils/datetime_handler.py +345 -0
  51. reme_ai/utils/miner_u_pdf_processor.py +726 -0
  52. reme_ai/utils/op_utils.py +115 -0
  53. reme_ai/vector_store/__init__.py +6 -0
  54. reme_ai/vector_store/delete_memory_op.py +25 -0
  55. reme_ai/vector_store/recall_vector_store_op.py +36 -0
  56. reme_ai/vector_store/update_memory_freq_op.py +33 -0
  57. reme_ai/vector_store/update_memory_utility_op.py +32 -0
  58. reme_ai/vector_store/update_vector_store_op.py +32 -0
  59. reme_ai/vector_store/vector_store_action_op.py +55 -0
  60. reme_ai-0.1.0.dist-info/METADATA +218 -0
  61. reme_ai-0.1.0.dist-info/RECORD +65 -0
  62. reme_ai-0.1.0.dist-info/WHEEL +5 -0
  63. reme_ai-0.1.0.dist-info/entry_points.txt +2 -0
  64. reme_ai-0.1.0.dist-info/licenses/LICENSE +201 -0
  65. reme_ai-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,233 @@
1
+ from typing import List, Tuple, Optional
2
+
3
+ from flowllm import C, BaseLLMOp
4
+ from loguru import logger
5
+
6
+ from reme_ai.schema import Message, Trajectory
7
+ from reme_ai.schema.memory import BaseMemory, TaskMemory
8
+ from reme_ai.utils.op_utils import merge_messages_content, parse_json_experience_response
9
+
10
+
11
+ @C.register_op()
12
+ class ComparativeExtractionOp(BaseLLMOp):
13
+ file_path: str = __file__
14
+
15
+ def execute(self):
16
+ """Extract comparative task memories by comparing different scoring trajectories"""
17
+ all_trajectories: List[Trajectory] = self.context.get("all_trajectories", [])
18
+ success_trajectories: List[Trajectory] = self.context.get("success_trajectories", [])
19
+ failure_trajectories: List[Trajectory] = self.context.get("failure_trajectories", [])
20
+
21
+ comparative_task_memories = []
22
+
23
+ # Soft comparison: highest score vs lowest score
24
+ if len(all_trajectories) >= 2 and self.op_params.get("enable_soft_comparison", True):
25
+ highest_traj, lowest_traj = self._find_highest_lowest_scoring_trajectories(all_trajectories)
26
+ if highest_traj and lowest_traj and highest_traj.score > lowest_traj.score:
27
+ logger.info(
28
+ f"Extracting soft comparative task memories: highest ({highest_traj.score:.2f}) vs lowest ({lowest_traj.score:.2f})")
29
+ soft_task_memories = self._extract_soft_comparative_task_memory(highest_traj, lowest_traj)
30
+ comparative_task_memories.extend(soft_task_memories)
31
+
32
+ # Hard comparison: success vs failure (if similarity search is enabled)
33
+ if (success_trajectories and failure_trajectories and
34
+ self.op_params.get("enable_similarity_comparison", False)):
35
+
36
+ similar_pairs = self._find_similar_step_sequences(success_trajectories, failure_trajectories)
37
+ logger.info(f"Found {len(similar_pairs)} similar pairs for hard comparison")
38
+
39
+ for success_steps, failure_steps, similarity_score in similar_pairs:
40
+ hard_task_memories = self._extract_hard_comparative_task_memory(success_steps, failure_steps,
41
+ similarity_score)
42
+ comparative_task_memories.extend(hard_task_memories)
43
+
44
+ logger.info(f"Extracted {len(comparative_task_memories)} comparative task memories")
45
+
46
+ # Add task memories to context
47
+ self.context.comparative_task_memories = comparative_task_memories
48
+
49
+ @staticmethod
50
+ def _find_highest_lowest_scoring_trajectories(trajectories: List[Trajectory]) -> Tuple[
51
+ Optional[Trajectory], Optional[Trajectory]]:
52
+ """Find the highest and lowest scoring trajectories"""
53
+ if len(trajectories) < 2:
54
+ return None, None
55
+
56
+ # Filter trajectories with valid scores
57
+ valid_trajectories = [traj for traj in trajectories if traj.score is not None]
58
+
59
+ if len(valid_trajectories) < 2:
60
+ logger.warning("Not enough trajectories with valid scores for comparison")
61
+ return None, None
62
+
63
+ # Sort by score
64
+ sorted_trajectories = sorted(valid_trajectories, key=lambda x: x.score, reverse=True)
65
+
66
+ highest_traj = sorted_trajectories[0]
67
+ lowest_traj = sorted_trajectories[-1]
68
+
69
+ return highest_traj, lowest_traj
70
+
71
+ @staticmethod
72
+ def _get_trajectory_score(trajectory: Trajectory) -> Optional[float]:
73
+ """Get trajectory score"""
74
+ return trajectory.score
75
+
76
+ def _extract_soft_comparative_task_memory(self, higher_traj: Trajectory, lower_traj: Trajectory) -> List[
77
+ BaseMemory]:
78
+ """Extract soft comparative task memory (high score vs low score)"""
79
+ higher_steps = self._get_trajectory_steps(higher_traj)
80
+ lower_steps = self._get_trajectory_steps(lower_traj)
81
+ higher_score = self._get_trajectory_score(higher_traj)
82
+ lower_score = self._get_trajectory_score(lower_traj)
83
+
84
+ prompt = self.prompt_format(
85
+ prompt_name="soft_comparative_step_task_memory_prompt",
86
+ higher_steps=merge_messages_content(higher_steps),
87
+ lower_steps=merge_messages_content(lower_steps),
88
+ higher_score=f"{higher_score:.2f}",
89
+ lower_score=f"{lower_score:.2f}"
90
+ )
91
+
92
+ def parse_task_memories(message: Message) -> List[BaseMemory]:
93
+ task_memories_data = parse_json_experience_response(message.content)
94
+ task_memories = []
95
+
96
+ for tm_data in task_memories_data:
97
+ task_memory = TaskMemory(
98
+ workspace_id=self.context.get("workspace_id", ""),
99
+ when_to_use=tm_data.get("when_to_use", tm_data.get("condition", "")),
100
+ content=tm_data.get("experience", ""),
101
+ author=getattr(self.llm, 'model_name', 'system'),
102
+ metadata=tm_data
103
+ )
104
+ task_memories.append(task_memory)
105
+
106
+ return task_memories
107
+
108
+ return self.llm.chat(messages=[Message(content=prompt)], callback_fn=parse_task_memories)
109
+
110
+ def _extract_hard_comparative_task_memory(self, success_steps: List[Message],
111
+ failure_steps: List[Message], similarity_score: float) -> List[
112
+ BaseMemory]:
113
+ """Extract hard comparative task memory (success vs failure)"""
114
+ prompt = self.prompt_format(
115
+ prompt_name="hard_comparative_step_task_memory_prompt",
116
+ success_steps=merge_messages_content(success_steps),
117
+ failure_steps=merge_messages_content(failure_steps),
118
+ similarity_score=similarity_score
119
+ )
120
+
121
+ def parse_task_memories(message: Message) -> List[BaseMemory]:
122
+ task_memories_data = parse_json_experience_response(message.content)
123
+ task_memories = []
124
+
125
+ for tm_data in task_memories_data:
126
+ task_memory = TaskMemory(
127
+ workspace_id=self.context.get("workspace_id", ""),
128
+ when_to_use=tm_data.get("when_to_use", tm_data.get("condition", "")),
129
+ content=tm_data.get("experience", ""),
130
+ author=getattr(self.llm, 'model_name', 'system'),
131
+ metadata=tm_data
132
+ )
133
+ task_memories.append(task_memory)
134
+
135
+ return task_memories
136
+
137
+ return self.llm.chat(messages=[Message(content=prompt)], callback_fn=parse_task_memories)
138
+
139
+ @staticmethod
140
+ def _get_trajectory_steps(trajectory: Trajectory) -> List[Message]:
141
+ """Get trajectory steps, prioritizing segmented steps"""
142
+ if hasattr(trajectory, 'segments') and trajectory.segments:
143
+ # If there are segments, merge all segments
144
+ all_steps = []
145
+ for segment in trajectory.segments:
146
+ all_steps.extend(segment)
147
+ return all_steps
148
+ else:
149
+ return trajectory.messages
150
+
151
+ def _find_similar_step_sequences(self, success_trajectories: List[Trajectory],
152
+ failure_trajectories: List[Trajectory]) -> List[
153
+ Tuple[List[Message], List[Message], float]]:
154
+ """Find similar step sequences for comparison"""
155
+ if not self.op_params.get("enable_similarity_comparison", False):
156
+ return []
157
+
158
+ try:
159
+ similar_pairs = []
160
+
161
+ # Get step sequences
162
+ success_step_sequences = []
163
+ for traj in success_trajectories:
164
+ if hasattr(traj.metadata, 'segments') and traj.metadata["segments"]:
165
+ success_step_sequences.extend(traj.metadata["segments"])
166
+ else:
167
+ success_step_sequences.append(traj.messages)
168
+
169
+ failure_step_sequences = []
170
+ for traj in failure_trajectories:
171
+ if hasattr(traj.metadata, 'segments') and traj.metadata["segments"]:
172
+ failure_step_sequences.extend(traj.metadata["segments"])
173
+ else:
174
+ failure_step_sequences.append(traj.messages)
175
+
176
+ # Limit comparison count to avoid computational overload
177
+ max_sequences = self.op_params.get("max_similarity_sequences", 5)
178
+ success_step_sequences = success_step_sequences[:max_sequences]
179
+ failure_step_sequences = failure_step_sequences[:max_sequences]
180
+
181
+ if not success_step_sequences or not failure_step_sequences:
182
+ return []
183
+
184
+ # Generate text representation for embedding
185
+ success_texts = [merge_messages_content(seq) for seq in success_step_sequences]
186
+ failure_texts = [merge_messages_content(seq) for seq in failure_step_sequences]
187
+
188
+ # Get embedding vectors
189
+ if hasattr(self.context, 'vector_store') and self.context.vector_store and hasattr(
190
+ self.context.vector_store, 'embedding_model'):
191
+ success_embeddings = self.context.vector_store.embedding_model.get_embeddings(success_texts)
192
+ failure_embeddings = self.context.vector_store.embedding_model.get_embeddings(failure_texts)
193
+
194
+ # Calculate similarity and find most similar pairs
195
+ similarity_threshold = self.op_params.get("similarity_threshold", 0.3)
196
+
197
+ for i, s_emb in enumerate(success_embeddings):
198
+ for j, f_emb in enumerate(failure_embeddings):
199
+ similarity = self._calculate_cosine_similarity(s_emb, f_emb)
200
+
201
+ if similarity > similarity_threshold:
202
+ similar_pairs.append((
203
+ success_step_sequences[i],
204
+ failure_step_sequences[j],
205
+ similarity
206
+ ))
207
+
208
+ # Return top most similar pairs
209
+ max_pairs = self.op_params.get("max_similarity_pairs", 3)
210
+ return sorted(similar_pairs, key=lambda x: x[2], reverse=True)[:max_pairs]
211
+
212
+ except Exception as e:
213
+ logger.error(f"Error finding similar step sequences: {e}")
214
+
215
+ return []
216
+
217
+ @staticmethod
218
+ def _calculate_cosine_similarity(embedding1: List[float], embedding2: List[float]) -> float:
219
+ """Calculate cosine similarity"""
220
+ import numpy as np
221
+
222
+ vec1 = np.array(embedding1)
223
+ vec2 = np.array(embedding2)
224
+
225
+ # Calculate cosine similarity
226
+ dot_product = np.dot(vec1, vec2)
227
+ norm1 = np.linalg.norm(vec1)
228
+ norm2 = np.linalg.norm(vec2)
229
+
230
+ if norm1 == 0 or norm2 == 0:
231
+ return 0.0
232
+
233
+ return dot_product / (norm1 * norm2)
@@ -0,0 +1,73 @@
1
+ from typing import List
2
+
3
+ from flowllm import C, BaseLLMOp
4
+ from loguru import logger
5
+
6
+ from reme_ai.schema import Message, Trajectory
7
+ from reme_ai.schema.memory import BaseMemory, TaskMemory
8
+ from reme_ai.utils.op_utils import merge_messages_content, parse_json_experience_response, get_trajectory_context
9
+
10
+
11
+ @C.register_op()
12
+ class FailureExtractionOp(BaseLLMOp):
13
+ file_path: str = __file__
14
+
15
+ def execute(self):
16
+ """Extract task memories from failed trajectories"""
17
+ failure_trajectories: List[Trajectory] = self.context.get("failure_trajectories", [])
18
+
19
+ if not failure_trajectories:
20
+ logger.info("No failure trajectories found for extraction")
21
+ return
22
+
23
+ logger.info(f"Extracting task memories from {len(failure_trajectories)} failed trajectories")
24
+
25
+ failure_task_memories = []
26
+
27
+ # Process trajectories
28
+ for trajectory in failure_trajectories:
29
+ if hasattr(trajectory, 'segments') and trajectory.segments:
30
+ # Process segmented step sequences
31
+ for segment in trajectory.segments:
32
+ task_memories = self._extract_failure_task_memory_from_steps(segment, trajectory)
33
+ failure_task_memories.extend(task_memories)
34
+ else:
35
+ # Process entire trajectory
36
+ task_memories = self._extract_failure_task_memory_from_steps(trajectory.messages, trajectory)
37
+ failure_task_memories.extend(task_memories)
38
+
39
+ logger.info(f"Extracted {len(failure_task_memories)} failure task memories")
40
+
41
+ # Add task memories to context
42
+ self.context.failure_task_memories = failure_task_memories
43
+
44
+ def _extract_failure_task_memory_from_steps(self, steps: List[Message], trajectory: Trajectory) -> List[BaseMemory]:
45
+ """Extract task memory from failed step sequences"""
46
+ step_content = merge_messages_content(steps)
47
+ context = get_trajectory_context(trajectory, steps)
48
+
49
+ prompt = self.prompt_format(
50
+ prompt_name="failure_step_task_memory_prompt",
51
+ query=trajectory.metadata.get('query', ''),
52
+ step_sequence=step_content,
53
+ context=context,
54
+ outcome="failed"
55
+ )
56
+
57
+ def parse_task_memories(message: Message) -> List[BaseMemory]:
58
+ task_memories_data = parse_json_experience_response(message.content)
59
+ task_memories = []
60
+
61
+ for tm_data in task_memories_data:
62
+ task_memory = TaskMemory(
63
+ workspace_id=self.context.get("workspace_id", ""),
64
+ when_to_use=tm_data.get("when_to_use", tm_data.get("condition", "")),
65
+ content=tm_data.get("experience", ""),
66
+ author=getattr(self.llm, 'model_name', 'system'),
67
+ metadata=tm_data
68
+ )
69
+ task_memories.append(task_memory)
70
+
71
+ return task_memories
72
+
73
+ return self.llm.chat(messages=[Message(content=prompt)], callback_fn=parse_task_memories)
@@ -0,0 +1,163 @@
1
+ from typing import List
2
+
3
+ from flowllm import C, BaseOp
4
+ from loguru import logger
5
+
6
+ from reme_ai.schema.memory import BaseMemory
7
+
8
+
9
+ @C.register_op()
10
+ class MemoryDeduplicationOp(BaseOp):
11
+ file_path: str = __file__
12
+
13
+ def execute(self):
14
+ """Remove duplicate task memories"""
15
+ # Get task memories to deduplicate
16
+ task_memories: List[BaseMemory] = self.context.memory_list
17
+
18
+ if not task_memories:
19
+ logger.info("No task memories found for deduplication")
20
+ return
21
+
22
+ logger.info(f"Starting deduplication for {len(task_memories)} task memories")
23
+
24
+ # Perform deduplication
25
+ deduplicated_task_memories = self._deduplicate_task_memories(task_memories)
26
+
27
+ logger.info(
28
+ f"Deduplication complete: {len(deduplicated_task_memories)} deduplicated task memories out of {len(task_memories)}")
29
+
30
+ # Update context
31
+ self.context.memory_list = deduplicated_task_memories
32
+
33
+ def _deduplicate_task_memories(self, task_memories: List[BaseMemory]) -> List[BaseMemory]:
34
+ """Remove duplicate task memories"""
35
+ if not task_memories:
36
+ return task_memories
37
+
38
+ similarity_threshold = self.op_params.get("similarity_threshold", 0.5)
39
+ workspace_id = self.context.get("workspace_id")
40
+
41
+ unique_task_memories = []
42
+
43
+ # Get existing task memory embeddings
44
+ existing_embeddings = self._get_existing_task_memory_embeddings(workspace_id)
45
+
46
+ for task_memory in task_memories:
47
+ # Generate embedding for current task memory
48
+ current_embedding = self._get_task_memory_embedding(task_memory)
49
+
50
+ if current_embedding is None:
51
+ logger.warning(f"Failed to generate embedding for task memory: {str(task_memory.when_to_use)[:50]}...")
52
+ continue
53
+
54
+ # Check similarity with existing task memories
55
+ if self._is_similar_to_existing_task_memories(current_embedding, existing_embeddings, similarity_threshold):
56
+ logger.debug(f"Skipping similar task memory: {str(task_memory.when_to_use)[:50]}...")
57
+ continue
58
+
59
+ # Check similarity with current batch task memories
60
+ if self._is_similar_to_current_task_memories(current_embedding, unique_task_memories, similarity_threshold):
61
+ logger.debug(f"Skipping duplicate in current batch: {str(task_memory.when_to_use)[:50]}...")
62
+ continue
63
+
64
+ # Add to unique task memories list
65
+ unique_task_memories.append(task_memory)
66
+ logger.debug(f"Added unique task memory: {str(task_memory.when_to_use)[:50]}...")
67
+
68
+ return unique_task_memories
69
+
70
+ def _get_existing_task_memory_embeddings(self, workspace_id: str) -> List[List[float]]:
71
+ """Get embeddings of existing task memories"""
72
+ try:
73
+ if not hasattr(self.context, 'vector_store') or not self.context.vector_store or not workspace_id:
74
+ return []
75
+
76
+ # Query existing task memory nodes
77
+ existing_nodes = self.context.vector_store.search(
78
+ query="...", # Empty query to get all
79
+ workspace_id=workspace_id,
80
+ top_k=self.op_params.get("max_existing_task_memories", 1000)
81
+ )
82
+
83
+ # Extract embeddings
84
+ existing_embeddings = []
85
+ for node in existing_nodes:
86
+ if hasattr(node, 'embedding') and node.embedding:
87
+ existing_embeddings.append(node.embedding)
88
+
89
+ logger.debug(
90
+ f"Retrieved {len(existing_embeddings)} existing task memory embeddings from workspace {workspace_id}")
91
+ return existing_embeddings
92
+
93
+ except Exception as e:
94
+ logger.warning(f"Failed to retrieve existing task memory embeddings: {e}")
95
+ return []
96
+
97
+ def _get_task_memory_embedding(self, task_memory: BaseMemory) -> List[float] | None:
98
+ """Generate embedding for task memory"""
99
+ try:
100
+ if not hasattr(self.context, 'vector_store') or not self.context.vector_store:
101
+ return None
102
+
103
+ # Combine task memory description and content for embedding
104
+ text_for_embedding = f"{task_memory.when_to_use} {task_memory.content}"
105
+ embeddings = self.context.vector_store.embedding_model.get_embeddings([text_for_embedding])
106
+
107
+ if embeddings and len(embeddings) > 0:
108
+ return embeddings[0]
109
+ else:
110
+ logger.warning("Empty embedding generated for task memory")
111
+ return None
112
+
113
+ except Exception as e:
114
+ logger.error(f"Error generating embedding for task memory: {e}")
115
+ return None
116
+
117
+ def _is_similar_to_existing_task_memories(self, current_embedding: List[float],
118
+ existing_embeddings: List[List[float]],
119
+ threshold: float) -> bool:
120
+ """Check if current embedding is similar to existing embeddings"""
121
+ for existing_embedding in existing_embeddings:
122
+ similarity = self._calculate_cosine_similarity(current_embedding, existing_embedding)
123
+ if similarity > threshold:
124
+ logger.debug(f"Found similar existing task memory with similarity: {similarity:.3f}")
125
+ return True
126
+ return False
127
+
128
+ def _is_similar_to_current_task_memories(self, current_embedding: List[float],
129
+ current_task_memories: List[BaseMemory],
130
+ threshold: float) -> bool:
131
+ for existing_task_memory in current_task_memories:
132
+ existing_embedding = self._get_task_memory_embedding(existing_task_memory)
133
+ if existing_embedding is None:
134
+ continue
135
+
136
+ similarity = self._calculate_cosine_similarity(current_embedding, existing_embedding)
137
+ if similarity > threshold:
138
+ logger.debug(f"Found similar task memory in current batch with similarity: {similarity:.3f}")
139
+ return True
140
+ return False
141
+
142
+ @staticmethod
143
+ def _calculate_cosine_similarity(embedding1: List[float], embedding2: List[float]) -> float:
144
+ """Calculate cosine similarity"""
145
+ try:
146
+ import numpy as np
147
+
148
+ vec1 = np.array(embedding1)
149
+ vec2 = np.array(embedding2)
150
+
151
+ # Calculate cosine similarity
152
+ dot_product = np.dot(vec1, vec2)
153
+ norm1 = np.linalg.norm(vec1)
154
+ norm2 = np.linalg.norm(vec2)
155
+
156
+ if norm1 == 0 or norm2 == 0:
157
+ return 0.0
158
+
159
+ return dot_product / (norm1 * norm2)
160
+
161
+ except Exception as e:
162
+ logger.error(f"Error calculating cosine similarity: {e}")
163
+ return 0.0
@@ -0,0 +1,108 @@
1
+ import json
2
+ import re
3
+ from typing import List, Dict, Any
4
+
5
+ from flowllm import C, BaseLLMOp
6
+ from loguru import logger
7
+
8
+ from reme_ai.schema import Message
9
+ from reme_ai.schema.memory import BaseMemory
10
+
11
+
12
+ @C.register_op()
13
+ class MemoryValidationOp(BaseLLMOp):
14
+ file_path: str = __file__
15
+
16
+ def execute(self):
17
+ """Validate quality of extracted task memories"""
18
+
19
+ task_memories: List[BaseMemory] = []
20
+ task_memories.extend(self.context.get("success_task_memories", []))
21
+ task_memories.extend(self.context.get("failure_task_memories", []))
22
+ task_memories.extend(self.context.get("comparative_task_memories", []))
23
+
24
+ if not task_memories:
25
+ logger.info("No task memories found for validation")
26
+ return
27
+
28
+ logger.info(f"Validating {len(task_memories)} extracted task memories")
29
+
30
+ # Validate task memories
31
+ validated_task_memories = []
32
+
33
+ for task_memory in task_memories:
34
+ validation_result = self._validate_single_task_memory(task_memory)
35
+ if validation_result and validation_result.get("is_valid", False):
36
+ task_memory.score = validation_result.get("score", 0.0)
37
+ validated_task_memories.append(task_memory)
38
+ else:
39
+ reason = validation_result.get("reason", "Unknown reason") if validation_result else "Validation failed"
40
+ logger.warning(f"Task memory validation failed: {reason}")
41
+
42
+ logger.info(f"Validated {len(validated_task_memories)} out of {len(task_memories)} task memories")
43
+
44
+ # Update context
45
+ self.context.response.answer = json.dumps([x.model_dump() for x in validated_task_memories])
46
+ self.context.response.metadata["memory_list"] = validated_task_memories
47
+
48
+ def _validate_single_task_memory(self, task_memory: BaseMemory) -> Dict[str, Any]:
49
+ """Validate single task memory"""
50
+ validation_info = self._llm_validate_task_memory(task_memory)
51
+ logger.info(f"Validating: {validation_info}")
52
+ return validation_info
53
+
54
+ def _llm_validate_task_memory(self, task_memory: BaseMemory) -> Dict[str, Any]:
55
+ """Validate task memory using LLM"""
56
+ try:
57
+ prompt = self.prompt_format(
58
+ prompt_name="task_memory_validation_prompt",
59
+ condition=task_memory.when_to_use,
60
+ task_memory_content=task_memory.content)
61
+
62
+ def parse_validation(message: Message) -> Dict[str, Any]:
63
+ try:
64
+ response_content = message.content
65
+
66
+ # Parse validation result
67
+ # Extract JSON blocks
68
+ json_pattern = r'```json\s*([\s\S]*?)\s*```'
69
+ json_blocks = re.findall(json_pattern, response_content)
70
+
71
+ if json_blocks:
72
+ parsed = json.loads(json_blocks[0])
73
+ else:
74
+ parsed = {}
75
+
76
+ is_valid = parsed.get("is_valid", True)
77
+ score = parsed.get("score", 0.5)
78
+
79
+ # Set validation threshold
80
+ validation_threshold = self.op_params.get("validation_threshold", 0.5)
81
+
82
+ return {
83
+ "is_valid": is_valid and score >= validation_threshold,
84
+ "score": score,
85
+ "feedback": response_content,
86
+ "reason": "" if (
87
+ is_valid and score >= validation_threshold) else f"Low validation score ({score:.2f}) or marked as invalid"
88
+ }
89
+
90
+ except Exception as e_inner:
91
+ logger.exception(f"Error parsing validation response: {e_inner}")
92
+ return {
93
+ "is_valid": False,
94
+ "score": 0.0,
95
+ "feedback": "",
96
+ "reason": f"Parse error: {str(e_inner)}"
97
+ }
98
+
99
+ return self.llm.chat(messages=[Message(content=prompt)], callback_fn=parse_validation)
100
+
101
+ except Exception as e:
102
+ logger.error(f"LLM validation failed: {e}")
103
+ return {
104
+ "is_valid": False,
105
+ "score": 0.0,
106
+ "feedback": "",
107
+ "reason": f"LLM validation error: {str(e)}"
108
+ }
@@ -0,0 +1,50 @@
1
+ from flowllm import C, BaseOp
2
+ from loguru import logger
3
+
4
+ from reme_ai.utils.miner_u_pdf_processor import MinerUPDFProcessor, chunk_pdf_content
5
+
6
+
7
+ @C.register_op()
8
+ class PDFPreprocessOp(BaseOp):
9
+ file_path: str = __file__
10
+
11
+ def execute(self):
12
+ """Process PDF files using MinerU and chunk content"""
13
+ pdf_path = self.context.get("pdf_path")
14
+ output_dir = self.context.get("output_dir")
15
+
16
+ if not pdf_path:
17
+ logger.error("No PDF path provided in context")
18
+ return
19
+
20
+ # Process PDF
21
+ processor = MinerUPDFProcessor(log_level="INFO")
22
+
23
+ try:
24
+ content_list, markdown_content = processor.process_pdf(
25
+ pdf_path=pdf_path,
26
+ output_dir=output_dir,
27
+ method=self.op_params.get("method", "auto"),
28
+ lang=self.op_params.get("lang"),
29
+ backend=self.op_params.get("backend", "pipeline")
30
+ )
31
+
32
+ # Create chunks if requested
33
+ chunks = []
34
+ if self.op_params.get("create_chunks", True):
35
+ max_length = self.op_params.get("max_chunk_length", 4000)
36
+ chunks = chunk_pdf_content(content_list, max_length=max_length)
37
+
38
+ # Store results in context
39
+ self.context.pdf_content_list = content_list
40
+ self.context.pdf_markdown_content = markdown_content
41
+ self.context.pdf_chunks = chunks
42
+
43
+ logger.info(f"PDF processing completed: {len(content_list)} content blocks, "
44
+ f"{len(chunks)} chunks, {len(markdown_content)} characters of markdown")
45
+
46
+ except Exception as e:
47
+ logger.error(f"PDF processing failed: {e}")
48
+ self.context.pdf_content_list = []
49
+ self.context.pdf_markdown_content = ""
50
+ self.context.pdf_chunks = []