reme-ai 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. reme_ai/__init__.py +6 -0
  2. reme_ai/app.py +17 -0
  3. reme_ai/config/__init__.py +0 -0
  4. reme_ai/config/config_parser.py +6 -0
  5. reme_ai/constants/__init__.py +7 -0
  6. reme_ai/constants/common_constants.py +48 -0
  7. reme_ai/constants/language_constants.py +215 -0
  8. reme_ai/enumeration/__init__.py +0 -0
  9. reme_ai/enumeration/language_constants.py +215 -0
  10. reme_ai/react/__init__.py +1 -0
  11. reme_ai/react/simple_react_op.py +21 -0
  12. reme_ai/retrieve/__init__.py +2 -0
  13. reme_ai/retrieve/personal/__init__.py +17 -0
  14. reme_ai/retrieve/personal/extract_time_op.py +97 -0
  15. reme_ai/retrieve/personal/fuse_rerank_op.py +180 -0
  16. reme_ai/retrieve/personal/print_memory_op.py +131 -0
  17. reme_ai/retrieve/personal/read_message_op.py +52 -0
  18. reme_ai/retrieve/personal/retrieve_memory_op.py +13 -0
  19. reme_ai/retrieve/personal/semantic_rank_op.py +170 -0
  20. reme_ai/retrieve/personal/set_query_op.py +37 -0
  21. reme_ai/retrieve/task/__init__.py +4 -0
  22. reme_ai/retrieve/task/build_query_op.py +38 -0
  23. reme_ai/retrieve/task/merge_memory_op.py +27 -0
  24. reme_ai/retrieve/task/rerank_memory_op.py +149 -0
  25. reme_ai/retrieve/task/rewrite_memory_op.py +149 -0
  26. reme_ai/schema/__init__.py +1 -0
  27. reme_ai/schema/memory.py +144 -0
  28. reme_ai/summary/__init__.py +2 -0
  29. reme_ai/summary/personal/__init__.py +8 -0
  30. reme_ai/summary/personal/contra_repeat_op.py +143 -0
  31. reme_ai/summary/personal/get_observation_op.py +147 -0
  32. reme_ai/summary/personal/get_observation_with_time_op.py +165 -0
  33. reme_ai/summary/personal/get_reflection_subject_op.py +179 -0
  34. reme_ai/summary/personal/info_filter_op.py +177 -0
  35. reme_ai/summary/personal/load_today_memory_op.py +117 -0
  36. reme_ai/summary/personal/long_contra_repeat_op.py +210 -0
  37. reme_ai/summary/personal/update_insight_op.py +244 -0
  38. reme_ai/summary/task/__init__.py +10 -0
  39. reme_ai/summary/task/comparative_extraction_op.py +233 -0
  40. reme_ai/summary/task/failure_extraction_op.py +73 -0
  41. reme_ai/summary/task/memory_deduplication_op.py +163 -0
  42. reme_ai/summary/task/memory_validation_op.py +108 -0
  43. reme_ai/summary/task/pdf_preprocess_op_wrapper.py +50 -0
  44. reme_ai/summary/task/simple_comparative_summary_op.py +71 -0
  45. reme_ai/summary/task/simple_summary_op.py +67 -0
  46. reme_ai/summary/task/success_extraction_op.py +73 -0
  47. reme_ai/summary/task/trajectory_preprocess_op.py +76 -0
  48. reme_ai/summary/task/trajectory_segmentation_op.py +118 -0
  49. reme_ai/utils/__init__.py +0 -0
  50. reme_ai/utils/datetime_handler.py +345 -0
  51. reme_ai/utils/miner_u_pdf_processor.py +726 -0
  52. reme_ai/utils/op_utils.py +115 -0
  53. reme_ai/vector_store/__init__.py +6 -0
  54. reme_ai/vector_store/delete_memory_op.py +25 -0
  55. reme_ai/vector_store/recall_vector_store_op.py +36 -0
  56. reme_ai/vector_store/update_memory_freq_op.py +33 -0
  57. reme_ai/vector_store/update_memory_utility_op.py +32 -0
  58. reme_ai/vector_store/update_vector_store_op.py +32 -0
  59. reme_ai/vector_store/vector_store_action_op.py +55 -0
  60. reme_ai-0.1.0.dist-info/METADATA +218 -0
  61. reme_ai-0.1.0.dist-info/RECORD +65 -0
  62. reme_ai-0.1.0.dist-info/WHEEL +5 -0
  63. reme_ai-0.1.0.dist-info/entry_points.txt +2 -0
  64. reme_ai-0.1.0.dist-info/licenses/LICENSE +201 -0
  65. reme_ai-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,71 @@
1
+ import json
2
+ from typing import List, Dict
3
+
4
+ from flowllm import C, BaseLLMOp
5
+ from loguru import logger
6
+
7
+ from reme_ai.schema import Message, Trajectory
8
+ from reme_ai.schema.memory import BaseMemory, TaskMemory
9
+ from reme_ai.utils.op_utils import merge_messages_content
10
+
11
+
12
+ @C.register_op()
13
+ class SimpleComparativeSummaryOp(BaseLLMOp):
14
+ file_path: str = __file__
15
+
16
+ def compare_summary_trajectory(self, trajectory_a: Trajectory, trajectory_b: Trajectory) -> List[BaseMemory]:
17
+ summary_prompt = self.prompt_format(prompt_name="summary_prompt",
18
+ execution_process_a=merge_messages_content(trajectory_a.messages),
19
+ execution_process_b=merge_messages_content(trajectory_b.messages),
20
+ summary_example=self.get_prompt("summary_example"))
21
+
22
+ def parse_content(message: Message):
23
+ content = message.content
24
+ task_memory_list = []
25
+ try:
26
+ content = content.split("```")[1].strip()
27
+ if content.startswith("json"):
28
+ content = content.strip("json")
29
+
30
+ for tm_dict in json.loads(content):
31
+ when_to_use = tm_dict.get("when_to_use", "").strip()
32
+ task_memory_content = tm_dict.get("experience", "").strip()
33
+ if when_to_use and task_memory_content:
34
+ task_memory_list.append(TaskMemory(workspace_id=self.context.get("workspace_id", ""),
35
+ when_to_use=when_to_use,
36
+ content=task_memory_content,
37
+ author=getattr(self.llm, 'model_name', 'system')))
38
+
39
+ return task_memory_list
40
+
41
+ except Exception as e:
42
+ logger.exception(f"parse content failed!\n{content}")
43
+ raise e
44
+
45
+ return self.llm.chat(messages=[Message(content=summary_prompt)], callback_fn=parse_content)
46
+
47
+ def execute(self):
48
+ trajectories: list = self.context.get("trajectories", [])
49
+ trajectories: List[Trajectory] = [Trajectory(**x) if isinstance(x, dict) else x for x in trajectories]
50
+
51
+ task_id_dict: Dict[str, List[Trajectory]] = {}
52
+ for trajectory in trajectories:
53
+ if trajectory.task_id not in task_id_dict:
54
+ task_id_dict[trajectory.task_id] = []
55
+ task_id_dict[trajectory.task_id].append(trajectory)
56
+
57
+ memory_list = []
58
+ for task_id, task_trajectories in task_id_dict.items():
59
+ task_trajectories: List[Trajectory] = sorted(task_trajectories, key=lambda x: x.score, reverse=True)
60
+ if len(task_trajectories) < 2:
61
+ continue
62
+
63
+ if task_trajectories[0].score > task_trajectories[-1].score:
64
+ task_memories = self.compare_summary_trajectory(trajectory_a=task_trajectories[0],
65
+ trajectory_b=task_trajectories[-1])
66
+ memory_list.extend(task_memories)
67
+
68
+ self.context.response.answer = json.dumps([x.model_dump() for x in memory_list])
69
+ self.context.response.metadata["memory_list"] = memory_list
70
+ for tm in memory_list:
71
+ logger.info(f"add task memory when_to_use={tm.when_to_use}\ncontent={tm.content}")
@@ -0,0 +1,67 @@
1
+ import json
2
+ from typing import List
3
+
4
+ from flowllm import C, BaseLLMOp
5
+ from loguru import logger
6
+
7
+ from reme_ai.schema import Message, Trajectory
8
+ from reme_ai.schema.memory import BaseMemory, TaskMemory
9
+ from reme_ai.utils.op_utils import merge_messages_content
10
+
11
+
12
+ @C.register_op()
13
+ class SimpleSummaryOp(BaseLLMOp):
14
+ file_path: str = __file__
15
+
16
+ def summary_trajectory(self, trajectory: Trajectory) -> List[BaseMemory]:
17
+ execution_process = merge_messages_content(trajectory.messages)
18
+ success_score_threshold: float = self.op_params.get("success_score_threshold", 0.9)
19
+ logger.info(f"success_score_threshold={success_score_threshold}")
20
+
21
+ execution_result = "success" if trajectory.score >= success_score_threshold else "fail"
22
+ summary_prompt = self.prompt_format(prompt_name="summary_prompt",
23
+ execution_process=execution_process,
24
+ execution_result=execution_result,
25
+ summary_example=self.get_prompt("summary_example"))
26
+
27
+ def parse_content(message: Message):
28
+ content = message.content
29
+ memory_list = []
30
+ try:
31
+ if "```" in content:
32
+ content = content.split("```")[1].strip()
33
+
34
+ if content.startswith("json"):
35
+ content = content.strip("json")
36
+
37
+ for exp_dict in json.loads(content):
38
+ when_to_use = exp_dict.get("when_to_use", "").strip()
39
+ memory = exp_dict.get("memory", "").strip()
40
+ if when_to_use and memory:
41
+ memory_list.append(TaskMemory(workspace_id=self.context.get("workspace_id", ""),
42
+ when_to_use=when_to_use,
43
+ content=memory,
44
+ author=getattr(self.llm, "model_name", "system")))
45
+
46
+ return memory_list
47
+
48
+ except Exception as e:
49
+ logger.exception(f"parse content failed!\n{content}")
50
+ raise e
51
+
52
+ return self.llm.chat(messages=[Message(content=summary_prompt)], callback_fn=parse_content)
53
+
54
+ def execute(self):
55
+ trajectories: list = self.context.trajectories
56
+ trajectories: List[Trajectory] = [Trajectory(**x) if isinstance(x, dict) else x for x in trajectories]
57
+
58
+ memory_list: List[BaseMemory] = []
59
+ for trajectory in trajectories:
60
+ memories = self.summary_trajectory(trajectory)
61
+ if memories:
62
+ memory_list.extend(memories)
63
+
64
+ self.context.response.answer = json.dumps([x.model_dump() for x in memory_list])
65
+ self.context.response.metadata["memory_list"] = memory_list
66
+ for memory in memory_list:
67
+ logger.info(f"add memory: when_to_use={memory.when_to_use}\ncontent={memory.content}")
@@ -0,0 +1,73 @@
1
+ from typing import List
2
+
3
+ from flowllm import C, BaseLLMOp
4
+ from loguru import logger
5
+
6
+ from reme_ai.schema import Message, Trajectory
7
+ from reme_ai.schema.memory import BaseMemory, TaskMemory
8
+ from reme_ai.utils.op_utils import merge_messages_content, parse_json_experience_response, get_trajectory_context
9
+
10
+
11
+ @C.register_op()
12
+ class SuccessExtractionOp(BaseLLMOp):
13
+ file_path: str = __file__
14
+
15
+ def execute(self):
16
+ """Extract task memories from successful trajectories"""
17
+ success_trajectories: List[Trajectory] = self.context.success_trajectories
18
+
19
+ if not success_trajectories:
20
+ logger.info("No success trajectories found for extraction")
21
+ return
22
+
23
+ logger.info(f"Extracting task memories from {len(success_trajectories)} successful trajectories")
24
+
25
+ success_task_memories = []
26
+
27
+ # Process trajectories
28
+ for trajectory in success_trajectories:
29
+ if "segments" in trajectory.metadata:
30
+ # Process segmented step sequences
31
+ for segment in trajectory.metadata["segments"]:
32
+ task_memories = self._extract_success_task_memory_from_steps(segment, trajectory)
33
+ success_task_memories.extend(task_memories)
34
+ else:
35
+ # Process entire trajectory
36
+ task_memories = self._extract_success_task_memory_from_steps(trajectory.messages, trajectory)
37
+ success_task_memories.extend(task_memories)
38
+
39
+ logger.info(f"Extracted {len(success_task_memories)} success task memories")
40
+
41
+ # Add task memories to context
42
+ self.context.success_task_memories = success_task_memories
43
+
44
+ def _extract_success_task_memory_from_steps(self, steps: List[Message], trajectory: Trajectory) -> List[BaseMemory]:
45
+ """Extract task memory from successful step sequences"""
46
+ step_content = merge_messages_content(steps)
47
+ context = get_trajectory_context(trajectory, steps)
48
+
49
+ prompt = self.prompt_format(
50
+ prompt_name="success_step_task_memory_prompt",
51
+ query=trajectory.metadata.get('query', ''),
52
+ step_sequence=step_content,
53
+ context=context,
54
+ outcome="successful"
55
+ )
56
+
57
+ def parse_task_memories(message: Message) -> List[BaseMemory]:
58
+ task_memories_data = parse_json_experience_response(message.content)
59
+ task_memories = []
60
+
61
+ for tm_data in task_memories_data:
62
+ task_memory = TaskMemory(
63
+ workspace_id=self.context.get("workspace_id", ""),
64
+ when_to_use=tm_data.get("when_to_use", tm_data.get("condition", "")),
65
+ content=tm_data.get("experience", ""),
66
+ author=getattr(self.llm, 'model_name', 'system'),
67
+ metadata=tm_data
68
+ )
69
+ task_memories.append(task_memory)
70
+
71
+ return task_memories
72
+
73
+ return self.llm.chat(messages=[Message(content=prompt)], callback_fn=parse_task_memories)
@@ -0,0 +1,76 @@
1
+ import json
2
+ from typing import List, Dict
3
+
4
+ from flowllm import C, BaseOp
5
+ from loguru import logger
6
+
7
+ from reme_ai.schema import Trajectory
8
+
9
+
10
+ @C.register_op()
11
+ class TrajectoryPreprocessOp(BaseOp):
12
+ file_path: str = __file__
13
+
14
+ def execute(self):
15
+ """Preprocess trajectories: validate and classify"""
16
+ trajectories: list = self.context.get("trajectories", [])
17
+ # trajectories: List[Trajectory] = [Trajectory(**x) if isinstance(x, dict) else x for x in trajectories]
18
+ new_trajectories: List[Trajectory] = []
19
+ for x in trajectories:
20
+ if isinstance(x, dict):
21
+ x["messages"] = self._modify_tool_calls(x["messages"])
22
+ new_trajectories.append(Trajectory(**x))
23
+ else:
24
+ new_trajectories.append(x)
25
+ trajectories = new_trajectories
26
+
27
+ # Classify trajectories
28
+ classified = self._classify_trajectories(trajectories)
29
+ logger.info(f"Classified trajectories - Success: {len(classified['success'])}, "
30
+ f"Failure: {len(classified['failure'])}, All: {len(classified['all'])}")
31
+
32
+ # Set context for downstream operators
33
+ self.context.success_trajectories = classified['success']
34
+ self.context.failure_trajectories = classified['failure']
35
+ self.context.all_trajectories = classified['all']
36
+
37
+ def _classify_trajectories(self, trajectories: List[Trajectory]) -> Dict[str, List[Trajectory]]:
38
+ """Classify trajectories based on score threshold"""
39
+ success_trajectories = []
40
+ failure_trajectories = []
41
+
42
+ success_threshold = self.op_params.get("success_threshold", 1.0)
43
+
44
+ for traj in trajectories:
45
+ is_success = traj.score >= success_threshold
46
+
47
+ if is_success:
48
+ success_trajectories.append(traj)
49
+ else:
50
+ failure_trajectories.append(traj)
51
+
52
+ return {
53
+ 'success': success_trajectories,
54
+ 'failure': failure_trajectories,
55
+ 'all': trajectories
56
+ }
57
+
58
+ def _modify_tool_calls(self, messages: List[Dict]) -> List[Dict]:
59
+ new_messages = []
60
+
61
+ for msg in messages:
62
+ if 'tool_calls' in msg:
63
+ processed_tool_calls = []
64
+ for tool_call in msg['tool_calls']:
65
+ tool_type = tool_call.get("type", "function")
66
+ nested_data = tool_call.get(tool_type, {})
67
+ tool_call.update({
68
+ "arguments": json.loads(nested_data.get("arguments", "")),
69
+ "name": nested_data.get("name", "")
70
+ })
71
+ tool_call.pop(tool_type)
72
+ processed_tool_calls.append(tool_call)
73
+ msg['tool_calls'] = processed_tool_calls
74
+ new_messages.append(msg)
75
+
76
+ return new_messages
@@ -0,0 +1,118 @@
1
+ import json
2
+ import re
3
+ from typing import List
4
+
5
+ from flowllm import C, BaseLLMOp
6
+ from loguru import logger
7
+
8
+ from reme_ai.schema import Message, Trajectory
9
+
10
+
11
+ @C.register_op()
12
+ class TrajectorySegmentationOp(BaseLLMOp):
13
+ file_path: str = __file__
14
+
15
+ def execute(self):
16
+ """Segment trajectories into meaningful steps"""
17
+ # Get trajectories from context
18
+ all_trajectories: List[Trajectory] = self.context.get("all_trajectories", [])
19
+ success_trajectories: List[Trajectory] = self.context.get("success_trajectories", [])
20
+ failure_trajectories: List[Trajectory] = self.context.get("failure_trajectories", [])
21
+
22
+ if not all_trajectories:
23
+ logger.warning("No trajectories found in context")
24
+ return
25
+
26
+ # Determine which trajectories to segment
27
+ target_trajectories = self._get_target_trajectories(all_trajectories, success_trajectories,
28
+ failure_trajectories)
29
+
30
+ # Add segmentation info to trajectories
31
+ segmented_count = 0
32
+ for trajectory in target_trajectories:
33
+ segments = self._llm_segment_trajectory(trajectory)
34
+ trajectory.metadata["segments"] = segments
35
+ segmented_count += 1
36
+
37
+ logger.info(f"Segmented {segmented_count} trajectories")
38
+
39
+ # Update context with segmented trajectories
40
+
41
+ def _get_target_trajectories(self, all_trajectories: List[Trajectory],
42
+ success_trajectories: List[Trajectory],
43
+ failure_trajectories: List[Trajectory]) -> List[Trajectory]:
44
+ """Determine which trajectories to segment based on configuration"""
45
+ segment_target = self.op_params.get("segment_target", "all")
46
+
47
+ if segment_target == "success":
48
+ return success_trajectories
49
+ elif segment_target == "failure":
50
+ return failure_trajectories
51
+ else:
52
+ return all_trajectories
53
+
54
+ def _llm_segment_trajectory(self, trajectory: Trajectory) -> List[List[Message]]:
55
+ """Use LLM for trajectory segmentation"""
56
+ trajectory_content = self._format_trajectory_content(trajectory)
57
+
58
+ prompt = self.prompt_format(
59
+ prompt_name="step_segmentation_prompt",
60
+ query=trajectory.metadata.get('query', ''),
61
+ trajectory_content=trajectory_content,
62
+ total_steps=len(trajectory.messages))
63
+
64
+ def parse_segmentation(message: Message) -> List[List[Message]]:
65
+ content = message.content
66
+ segment_points = self._parse_segmentation_response(content)
67
+
68
+ # Segment trajectory based on segmentation points
69
+ segments = []
70
+ start_idx = 0
71
+
72
+ for end_idx in segment_points:
73
+ if start_idx < end_idx <= len(trajectory.messages):
74
+ segments.append(trajectory.messages[start_idx:end_idx])
75
+ start_idx = end_idx
76
+
77
+ # Add remaining steps
78
+ if start_idx < len(trajectory.messages):
79
+ segments.append(trajectory.messages[start_idx:])
80
+
81
+ return segments if segments else [trajectory.messages]
82
+
83
+ return self.llm.chat(messages=[Message(content=prompt)], callback_fn=parse_segmentation,
84
+ default_value=[trajectory.messages])
85
+
86
+ @staticmethod
87
+ def _format_trajectory_content(trajectory: Trajectory) -> str:
88
+ """Format trajectory content for LLM processing"""
89
+ content = ""
90
+ for i, step in enumerate(trajectory.messages):
91
+ content += f"Step {i + 1} ({step.role.value}):\n{step.content}\n\n"
92
+ return content
93
+
94
+ @staticmethod
95
+ def _parse_segmentation_response(response: str) -> List[int]:
96
+ """Parse segmentation response from LLM"""
97
+ segment_points = []
98
+
99
+ # Try to extract JSON format
100
+ json_pattern = r'```json\s*([\s\S]*?)\s*```'
101
+ json_blocks = re.findall(json_pattern, response)
102
+
103
+ if json_blocks:
104
+ try:
105
+ parsed = json.loads(json_blocks[0])
106
+ if isinstance(parsed, dict) and "segment_points" in parsed:
107
+ segment_points = parsed["segment_points"]
108
+ elif isinstance(parsed, list):
109
+ segment_points = parsed
110
+ except json.JSONDecodeError:
111
+ pass
112
+
113
+ # Fallback: extract numbers
114
+ if not segment_points:
115
+ numbers = re.findall(r'\b\d+\b', response)
116
+ segment_points = [int(num) for num in numbers if int(num) > 0]
117
+
118
+ return sorted(list(set(segment_points)))
File without changes