noesium 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/core/__init__.py +4 -0
- noesium/core/agent/__init__.py +14 -0
- noesium/core/agent/base.py +227 -0
- noesium/core/consts.py +6 -0
- noesium/core/goalith/conflict/conflict.py +104 -0
- noesium/core/goalith/conflict/detector.py +53 -0
- noesium/core/goalith/decomposer/__init__.py +6 -0
- noesium/core/goalith/decomposer/base.py +46 -0
- noesium/core/goalith/decomposer/callable_decomposer.py +65 -0
- noesium/core/goalith/decomposer/llm_decomposer.py +326 -0
- noesium/core/goalith/decomposer/prompts.py +140 -0
- noesium/core/goalith/decomposer/simple_decomposer.py +61 -0
- noesium/core/goalith/errors.py +22 -0
- noesium/core/goalith/goalgraph/graph.py +526 -0
- noesium/core/goalith/goalgraph/node.py +179 -0
- noesium/core/goalith/replanner/base.py +31 -0
- noesium/core/goalith/replanner/replanner.py +36 -0
- noesium/core/goalith/service.py +26 -0
- noesium/core/llm/__init__.py +154 -0
- noesium/core/llm/base.py +152 -0
- noesium/core/llm/litellm.py +528 -0
- noesium/core/llm/llamacpp.py +487 -0
- noesium/core/llm/message.py +184 -0
- noesium/core/llm/ollama.py +459 -0
- noesium/core/llm/openai.py +520 -0
- noesium/core/llm/openrouter.py +89 -0
- noesium/core/llm/prompt.py +551 -0
- noesium/core/memory/__init__.py +11 -0
- noesium/core/memory/base.py +464 -0
- noesium/core/memory/memu/__init__.py +24 -0
- noesium/core/memory/memu/config/__init__.py +26 -0
- noesium/core/memory/memu/config/activity/config.py +46 -0
- noesium/core/memory/memu/config/event/config.py +46 -0
- noesium/core/memory/memu/config/markdown_config.py +241 -0
- noesium/core/memory/memu/config/profile/config.py +48 -0
- noesium/core/memory/memu/llm_adapter.py +129 -0
- noesium/core/memory/memu/memory/__init__.py +31 -0
- noesium/core/memory/memu/memory/actions/__init__.py +40 -0
- noesium/core/memory/memu/memory/actions/add_activity_memory.py +299 -0
- noesium/core/memory/memu/memory/actions/base_action.py +342 -0
- noesium/core/memory/memu/memory/actions/cluster_memories.py +262 -0
- noesium/core/memory/memu/memory/actions/generate_suggestions.py +198 -0
- noesium/core/memory/memu/memory/actions/get_available_categories.py +66 -0
- noesium/core/memory/memu/memory/actions/link_related_memories.py +515 -0
- noesium/core/memory/memu/memory/actions/run_theory_of_mind.py +254 -0
- noesium/core/memory/memu/memory/actions/update_memory_with_suggestions.py +514 -0
- noesium/core/memory/memu/memory/embeddings.py +130 -0
- noesium/core/memory/memu/memory/file_manager.py +306 -0
- noesium/core/memory/memu/memory/memory_agent.py +578 -0
- noesium/core/memory/memu/memory/recall_agent.py +376 -0
- noesium/core/memory/memu/memory_store.py +628 -0
- noesium/core/memory/models.py +149 -0
- noesium/core/msgbus/__init__.py +12 -0
- noesium/core/msgbus/base.py +395 -0
- noesium/core/orchestrix/__init__.py +0 -0
- noesium/core/py.typed +0 -0
- noesium/core/routing/__init__.py +20 -0
- noesium/core/routing/base.py +66 -0
- noesium/core/routing/router.py +241 -0
- noesium/core/routing/strategies/__init__.py +9 -0
- noesium/core/routing/strategies/dynamic_complexity.py +361 -0
- noesium/core/routing/strategies/self_assessment.py +147 -0
- noesium/core/routing/types.py +38 -0
- noesium/core/toolify/__init__.py +39 -0
- noesium/core/toolify/base.py +360 -0
- noesium/core/toolify/config.py +138 -0
- noesium/core/toolify/mcp_integration.py +275 -0
- noesium/core/toolify/registry.py +214 -0
- noesium/core/toolify/toolkits/__init__.py +1 -0
- noesium/core/tracing/__init__.py +37 -0
- noesium/core/tracing/langgraph_hooks.py +308 -0
- noesium/core/tracing/opik_tracing.py +144 -0
- noesium/core/tracing/token_tracker.py +166 -0
- noesium/core/utils/__init__.py +10 -0
- noesium/core/utils/logging.py +172 -0
- noesium/core/utils/statistics.py +12 -0
- noesium/core/utils/typing.py +17 -0
- noesium/core/vector_store/__init__.py +79 -0
- noesium/core/vector_store/base.py +94 -0
- noesium/core/vector_store/pgvector.py +304 -0
- noesium/core/vector_store/weaviate.py +383 -0
- noesium-0.1.0.dist-info/METADATA +525 -0
- noesium-0.1.0.dist-info/RECORD +86 -0
- noesium-0.1.0.dist-info/WHEEL +5 -0
- noesium-0.1.0.dist-info/licenses/LICENSE +21 -0
- noesium-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Update Memory with Suggestions Action
|
|
3
|
+
|
|
4
|
+
Updates memory categories based on new memory items and suggestions, supporting different operation types:
|
|
5
|
+
- ADD: Add new content
|
|
6
|
+
- UPDATE: Modify existing content
|
|
7
|
+
- DELETE: Delete specific content
|
|
8
|
+
- TOUCH: Use current content but don't update (mark as accessed)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import logging
|
|
13
|
+
from datetime import datetime
|
|
14
|
+
from typing import Any, Dict, List
|
|
15
|
+
|
|
16
|
+
from .base_action import BaseAction
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class UpdateMemoryWithSuggestionsAction(BaseAction):
|
|
22
|
+
"""
|
|
23
|
+
Update memory categories based on new memory items and suggestions,
|
|
24
|
+
supporting different operation types (ADD, UPDATE, DELETE, TOUCH).
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
def action_name(self) -> str:
|
|
29
|
+
return "update_memory_with_suggestions"
|
|
30
|
+
|
|
31
|
+
def get_schema(self) -> Dict[str, Any]:
|
|
32
|
+
"""Return OpenAI-compatible function schema"""
|
|
33
|
+
return {
|
|
34
|
+
"name": self.action_name,
|
|
35
|
+
"description": "Update memory categories with different operation types (ADD, UPDATE, DELETE, TOUCH)",
|
|
36
|
+
"parameters": {
|
|
37
|
+
"type": "object",
|
|
38
|
+
"properties": {
|
|
39
|
+
"character_name": {
|
|
40
|
+
"type": "string",
|
|
41
|
+
"description": "Name of the character",
|
|
42
|
+
},
|
|
43
|
+
"category": {
|
|
44
|
+
"type": "string",
|
|
45
|
+
"description": "Memory category to update",
|
|
46
|
+
},
|
|
47
|
+
"suggestion": {
|
|
48
|
+
"type": "string",
|
|
49
|
+
"description": "Suggestion for what content should be processed in this category",
|
|
50
|
+
},
|
|
51
|
+
"session_date": {
|
|
52
|
+
"type": "string",
|
|
53
|
+
"description": "Session date for the memory items (format: YYYY-MM-DD)",
|
|
54
|
+
"default": None,
|
|
55
|
+
},
|
|
56
|
+
"generate_embeddings": {
|
|
57
|
+
"type": "boolean",
|
|
58
|
+
"default": True,
|
|
59
|
+
"description": "Whether to generate embeddings for the new content",
|
|
60
|
+
},
|
|
61
|
+
},
|
|
62
|
+
"required": ["character_name", "category", "suggestion"],
|
|
63
|
+
},
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
def execute(
|
|
67
|
+
self,
|
|
68
|
+
character_name: str,
|
|
69
|
+
category: str,
|
|
70
|
+
suggestion: str,
|
|
71
|
+
session_date: str = None,
|
|
72
|
+
generate_embeddings: bool = True,
|
|
73
|
+
) -> Dict[str, Any]:
|
|
74
|
+
"""
|
|
75
|
+
Update memory category with different operation types based on suggestions
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
character_name: Name of the character
|
|
79
|
+
category: Memory category to update
|
|
80
|
+
suggestion: Suggestion for what content should be processed
|
|
81
|
+
session_date: Session date for the memory items (format: YYYY-MM-DD)
|
|
82
|
+
generate_embeddings: Whether to generate embeddings
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Dict containing the operations performed in structured format
|
|
86
|
+
"""
|
|
87
|
+
try:
|
|
88
|
+
# Validate category
|
|
89
|
+
if category not in self.basic_memory_types:
|
|
90
|
+
return self._add_metadata(
|
|
91
|
+
{
|
|
92
|
+
"success": False,
|
|
93
|
+
"error": f"Invalid category '{category}'. Available: {list(self.basic_memory_types.keys())}",
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if not session_date:
|
|
98
|
+
session_date = datetime.now().strftime("%Y-%m-%d")
|
|
99
|
+
|
|
100
|
+
# Load existing content
|
|
101
|
+
existing_content = self._read_memory_content(character_name, category)
|
|
102
|
+
existing_memory_items = self._extract_memory_items_from_content(existing_content)
|
|
103
|
+
formatted_existing_content = self._format_existing_content(existing_memory_items)
|
|
104
|
+
|
|
105
|
+
operation_response = self._analyze_memory_operation_from_suggestion(
|
|
106
|
+
category, character_name, formatted_existing_content, suggestion
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
if not operation_response.strip():
|
|
110
|
+
return {
|
|
111
|
+
"success": False,
|
|
112
|
+
"error": f"LLM returned empty operation analysis for {category}",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
# Parse operation response
|
|
116
|
+
operation_list = self._parse_operation_response(operation_response)
|
|
117
|
+
operation_executed, new_items = self._execute_operations(
|
|
118
|
+
character_name,
|
|
119
|
+
category,
|
|
120
|
+
operation_list,
|
|
121
|
+
session_date,
|
|
122
|
+
existing_memory_items,
|
|
123
|
+
generate_embeddings,
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
return self._add_metadata(
|
|
127
|
+
{
|
|
128
|
+
"success": True,
|
|
129
|
+
"character_name": character_name,
|
|
130
|
+
"category": category,
|
|
131
|
+
"operation_executed": operation_executed,
|
|
132
|
+
"new_memory_items": new_items,
|
|
133
|
+
"message": f"Successfully performed {len(operation_executed)} memory operations for {category}",
|
|
134
|
+
}
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
except Exception as e:
|
|
138
|
+
return self._handle_error(e)
|
|
139
|
+
|
|
140
|
+
def _format_existing_content(self, existing_memory_items: List[Dict[str, str]]) -> str:
|
|
141
|
+
"""Format existing content into a list of memory items"""
|
|
142
|
+
return "\n".join([f"[Memory ID: {item['memory_id']}] {item['content']}" for item in existing_memory_items])
|
|
143
|
+
|
|
144
|
+
def _analyze_memory_operation_from_suggestion(
|
|
145
|
+
self,
|
|
146
|
+
category: str,
|
|
147
|
+
character_name: str,
|
|
148
|
+
existing_content: str,
|
|
149
|
+
suggestion: str,
|
|
150
|
+
) -> str:
|
|
151
|
+
"""Analyze memory update scenario and determine the operations that should be performed"""
|
|
152
|
+
|
|
153
|
+
operation_prompt = f"""You are an expert in analyzing the following memory update scenario and determining the memory operations that should be performed.
|
|
154
|
+
|
|
155
|
+
Character: {character_name}
|
|
156
|
+
Memory Category: {category}
|
|
157
|
+
|
|
158
|
+
Existing Memory Items in {category}:
|
|
159
|
+
{existing_content if existing_content else "No existing content"}
|
|
160
|
+
|
|
161
|
+
Memory Update Suggestion:
|
|
162
|
+
{suggestion}
|
|
163
|
+
|
|
164
|
+
**CRITICAL REQUIREMENT: The object of memory operations must be SELF-CONTAINED MEMORY ITEMS**
|
|
165
|
+
|
|
166
|
+
**SELF-CONTAINED MEMORY REQUIREMENTS:**
|
|
167
|
+
- EVERY activity item must be complete and standalone
|
|
168
|
+
- ALWAYS include the full subject (do not use "she/he/they/it")
|
|
169
|
+
- NEVER use pronouns that depend on context (no "she", "he", "they", "it")
|
|
170
|
+
- Include specific names, places, dates, and full context in each item
|
|
171
|
+
- Each activity should be understandable without reading other items
|
|
172
|
+
- Include all relevant details, emotions, and outcomes in the activity description
|
|
173
|
+
|
|
174
|
+
**OPERATION TYPES:**
|
|
175
|
+
1. **ADD**: Add completely new memory items that doesn't exist in Existing Memory Items
|
|
176
|
+
2. **UPDATE**: Modify or enhance existing memory items with new details
|
|
177
|
+
3. **DELETE**: Remove outdated, incorrect, or irrelevant memory items
|
|
178
|
+
4. **TOUCH**: Touch memory items that already exists in current content (only for updating last-mentioned timestamp)
|
|
179
|
+
|
|
180
|
+
**ANALYSIS GUIDELINES:**
|
|
181
|
+
- Read the Memory Update Suggestion carefully to determine what new memory items are offered
|
|
182
|
+
- Read the Existing Memory Items to view all memory items that are already present
|
|
183
|
+
- Determine the most appropriate operation type FOR EACH NEW MEMORY ITEM based on the new information and existing content
|
|
184
|
+
- **Use ADD for:** New memory items that are not covered in existing content
|
|
185
|
+
- **Use UPDATE for:** New memory items that provide updated details for existing memory items
|
|
186
|
+
- **Use DELETE for:** Existing memory items that are outdated/incorrect based on new memory items
|
|
187
|
+
- **Use TOUCH for:** Existing memory items that already covers the new memory items adequately
|
|
188
|
+
|
|
189
|
+
**OUTPUT INSTRUCTIONS:**
|
|
190
|
+
- **IMPORTANT** Output ALL necessary memory operations. It is common that you should perform different operations for different specific memory items
|
|
191
|
+
- For ADD and UPDATE operations, provide the content of the new memory items following the self-contained memory requirements
|
|
192
|
+
- For UPDATE, DELETE, and TOUCH operations, provide the target memory IDs associated with the memory items
|
|
193
|
+
- If there are multiple actions for an operation type (e.g, multiple ADDs), output them separately, do not put them in a single **OPERATION:** block
|
|
194
|
+
- **IMPORTANT** If a memory item in suggestion uses modal adverbs (perhaps, probably, likely, etc.) to indicate an uncertain inference, keep the modal adverbs as-is in your output
|
|
195
|
+
|
|
196
|
+
**OUTPUT FORMAT:**
|
|
197
|
+
|
|
198
|
+
**OPERATION:** [ADD/UPDATE/DELETE/TOUCH]
|
|
199
|
+
- Target Memory ID: [Only if operation is UPDATE, DELETE, or TOUCH][Memory ID of the memory item that is the target of the operation]
|
|
200
|
+
- Memory Item Content: [Only if operation is ADD or UPDATE][Content of the new memory item]
|
|
201
|
+
|
|
202
|
+
**OPERATION:** [ADD/UPDATE/DELETE/TOUCH]
|
|
203
|
+
- Target Memory ID: [Only if operation is UPDATE, DELETE, or TOUCH][Memory ID of the memory item that is the target of the operation]
|
|
204
|
+
- Memory Item Content: [Only if operation is ADD or UPDATE][Content of the new memory item]
|
|
205
|
+
|
|
206
|
+
... other operations ...
|
|
207
|
+
"""
|
|
208
|
+
|
|
209
|
+
# Call LLM to determine operation type and content
|
|
210
|
+
operation_response = self.llm_client.simple_chat(operation_prompt)
|
|
211
|
+
return operation_response
|
|
212
|
+
|
|
213
|
+
def _parse_operation_response(self, response: str) -> Dict[str, Any]:
|
|
214
|
+
"""Parse LLM response to extract operation info"""
|
|
215
|
+
lines = response.strip().split("\n")
|
|
216
|
+
|
|
217
|
+
operation_list = []
|
|
218
|
+
current_operation = None
|
|
219
|
+
|
|
220
|
+
for line in lines:
|
|
221
|
+
line = line.strip()
|
|
222
|
+
|
|
223
|
+
if line.startswith("**OPERATION:**"):
|
|
224
|
+
operation = line.replace("**OPERATION:**", "").strip()
|
|
225
|
+
if operation in ["ADD", "UPDATE", "DELETE", "TOUCH"]:
|
|
226
|
+
if current_operation:
|
|
227
|
+
"""cleanup and completeness checks are not conducted here, they will be done in the execute function"""
|
|
228
|
+
operation_list.append(current_operation)
|
|
229
|
+
current_operation = {
|
|
230
|
+
"operation": operation,
|
|
231
|
+
"target_id": None,
|
|
232
|
+
"memory_content": None,
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
if line.startswith("- Target Memory ID:"):
|
|
236
|
+
target_id = line.replace("- Target Memory ID:", "").strip()
|
|
237
|
+
current_operation["target_id"] = target_id
|
|
238
|
+
|
|
239
|
+
if line.startswith("- Memory Item Content:"):
|
|
240
|
+
memory_content = line.replace("- Memory Item Content:", "").strip()
|
|
241
|
+
current_operation["memory_content"] = memory_content
|
|
242
|
+
|
|
243
|
+
if current_operation:
|
|
244
|
+
operation_list.append(current_operation)
|
|
245
|
+
|
|
246
|
+
return operation_list
|
|
247
|
+
|
|
248
|
+
def _execute_operations(
|
|
249
|
+
self,
|
|
250
|
+
character_name: str,
|
|
251
|
+
category: str,
|
|
252
|
+
operation_list: List[Dict[str, Any]],
|
|
253
|
+
session_date: str,
|
|
254
|
+
existing_items: List[Dict[str, str]],
|
|
255
|
+
generate_embeddings: bool,
|
|
256
|
+
) -> List[Dict[str, Any]]:
|
|
257
|
+
"""Execute all operations in the list"""
|
|
258
|
+
|
|
259
|
+
all_items = existing_items
|
|
260
|
+
new_items = []
|
|
261
|
+
updated_items = []
|
|
262
|
+
operation_executed = []
|
|
263
|
+
|
|
264
|
+
for operation in operation_list:
|
|
265
|
+
if operation["operation"] == "ADD":
|
|
266
|
+
if not operation["memory_content"]:
|
|
267
|
+
continue
|
|
268
|
+
|
|
269
|
+
memory_id = self._generate_memory_id()
|
|
270
|
+
memory_item = {
|
|
271
|
+
"memory_id": memory_id,
|
|
272
|
+
"mentioned_at": session_date,
|
|
273
|
+
"content": operation["memory_content"],
|
|
274
|
+
"links": "",
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
all_items.append(memory_item)
|
|
278
|
+
new_items.append(memory_item)
|
|
279
|
+
updated_items.append(memory_item)
|
|
280
|
+
operation_executed.append(operation)
|
|
281
|
+
|
|
282
|
+
if operation["operation"] == "UPDATE":
|
|
283
|
+
if not operation["target_id"] or not operation["memory_content"]:
|
|
284
|
+
continue
|
|
285
|
+
|
|
286
|
+
for item in all_items:
|
|
287
|
+
if item["memory_id"] == operation["target_id"]:
|
|
288
|
+
item["content"] = operation["memory_content"]
|
|
289
|
+
updated_items.append(item)
|
|
290
|
+
break
|
|
291
|
+
|
|
292
|
+
operation_executed.append(operation)
|
|
293
|
+
|
|
294
|
+
if operation["operation"] == "DELETE":
|
|
295
|
+
if not operation["target_id"]:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
for item in all_items:
|
|
299
|
+
if item["memory_id"] == operation["target_id"]:
|
|
300
|
+
all_items.remove(item)
|
|
301
|
+
|
|
302
|
+
operation_executed.append(operation)
|
|
303
|
+
|
|
304
|
+
if operation["operation"] == "TOUCH":
|
|
305
|
+
if not operation["target_id"]:
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
for item in all_items:
|
|
309
|
+
if item["memory_id"] == operation["target_id"]:
|
|
310
|
+
"""should update "updated_at" """
|
|
311
|
+
|
|
312
|
+
operation_executed.append(operation)
|
|
313
|
+
|
|
314
|
+
new_content = self._format_memory_items(all_items)
|
|
315
|
+
self._save_memory_content(character_name, category, new_content)
|
|
316
|
+
|
|
317
|
+
if generate_embeddings and self.embeddings_enabled and updated_items:
|
|
318
|
+
self._add_memory_item_embedding(character_name, category, updated_items)
|
|
319
|
+
|
|
320
|
+
return operation_executed, new_items
|
|
321
|
+
|
|
322
|
+
def _extract_memory_items_from_content(self, content: str) -> List[Dict[str, str]]:
|
|
323
|
+
"""Extract memory items with IDs from content, supporting both old and new timestamp formats"""
|
|
324
|
+
import re
|
|
325
|
+
|
|
326
|
+
items = []
|
|
327
|
+
lines = content.split("\n")
|
|
328
|
+
|
|
329
|
+
for line in lines:
|
|
330
|
+
line = line.strip()
|
|
331
|
+
|
|
332
|
+
pattern = r"^\[([^\]]+)\]\[mentioned at ([^\]]+)\]\s*(.*?)(?:\s*\[([^\]]*)\])?$"
|
|
333
|
+
match = re.match(pattern, line)
|
|
334
|
+
if match:
|
|
335
|
+
memory_id = match.group(1)
|
|
336
|
+
mentioned_at = match.group(2)
|
|
337
|
+
clean_content = match.group(3).strip()
|
|
338
|
+
links = match.group(4) if match.group(4) else ""
|
|
339
|
+
|
|
340
|
+
if memory_id and clean_content:
|
|
341
|
+
items.append(
|
|
342
|
+
{
|
|
343
|
+
"memory_id": memory_id,
|
|
344
|
+
"mentioned_at": mentioned_at,
|
|
345
|
+
"content": clean_content,
|
|
346
|
+
"links": links,
|
|
347
|
+
}
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
return items
|
|
351
|
+
|
|
352
|
+
def _add_memory_item_embedding(self, character_name: str, category: str, new_items: list[dict]) -> Dict[str, Any]:
|
|
353
|
+
"""Add embedding for new memory items"""
|
|
354
|
+
try:
|
|
355
|
+
if not self.embeddings_enabled or not new_items:
|
|
356
|
+
return {"success": False, "error": "Embeddings disabled or empty item"}
|
|
357
|
+
|
|
358
|
+
# Get character embeddings directory from storage manager
|
|
359
|
+
char_embeddings_dir = self.storage_manager.get_char_embeddings_dir()
|
|
360
|
+
embeddings_file = char_embeddings_dir / f"{category}_embeddings.json"
|
|
361
|
+
|
|
362
|
+
existing_embeddings = []
|
|
363
|
+
if embeddings_file.exists():
|
|
364
|
+
with open(embeddings_file, "r", encoding="utf-8") as f:
|
|
365
|
+
embeddings_data = json.load(f)
|
|
366
|
+
existing_embeddings = embeddings_data.get("embeddings", [])
|
|
367
|
+
|
|
368
|
+
# Generate embeddings for new items
|
|
369
|
+
for item in new_items:
|
|
370
|
+
if not item["content"].strip():
|
|
371
|
+
continue
|
|
372
|
+
|
|
373
|
+
try:
|
|
374
|
+
embedding_vector = self.embedding_client.embed(item["content"])
|
|
375
|
+
|
|
376
|
+
new_item_id = f"{character_name}_{category}_item_{len(existing_embeddings)}"
|
|
377
|
+
|
|
378
|
+
new_embedding = {
|
|
379
|
+
"item_id": new_item_id,
|
|
380
|
+
"memory_id": item["memory_id"],
|
|
381
|
+
"text": item["content"],
|
|
382
|
+
"full_line": f"[{item['memory_id']}][mentioned at {item['mentioned_at']}] {item['content']} [{item['links']}]",
|
|
383
|
+
"embedding": embedding_vector,
|
|
384
|
+
"line_number": len(existing_embeddings) + 1,
|
|
385
|
+
"metadata": {
|
|
386
|
+
"character": character_name,
|
|
387
|
+
"category": category,
|
|
388
|
+
"length": len(item["content"]),
|
|
389
|
+
"mentioned_at": item["mentioned_at"],
|
|
390
|
+
"timestamp": datetime.now().isoformat(),
|
|
391
|
+
},
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
# Add to existing embeddings
|
|
395
|
+
existing_embeddings.append(new_embedding)
|
|
396
|
+
|
|
397
|
+
except Exception as e:
|
|
398
|
+
logger.warning(f"Failed to generate embedding for memory item {item.get('memory_id')}: {repr(e)}")
|
|
399
|
+
continue
|
|
400
|
+
|
|
401
|
+
# Save updated embeddings
|
|
402
|
+
embeddings_data = {
|
|
403
|
+
"category": category,
|
|
404
|
+
"timestamp": datetime.now().isoformat(),
|
|
405
|
+
# "content_hash": hashlib.md5(new_content.encode()).hexdigest(),
|
|
406
|
+
"embeddings": existing_embeddings,
|
|
407
|
+
"total_embeddings": len(existing_embeddings),
|
|
408
|
+
}
|
|
409
|
+
|
|
410
|
+
with open(embeddings_file, "w", encoding="utf-8") as f:
|
|
411
|
+
json.dump(embeddings_data, f, indent=2, ensure_ascii=False)
|
|
412
|
+
|
|
413
|
+
return {
|
|
414
|
+
"success": True,
|
|
415
|
+
"embedding_count": len(existing_embeddings),
|
|
416
|
+
"new_items_count": len(new_items),
|
|
417
|
+
"message": f"Added embeddings for {len(new_items)} new memory items in {category}",
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
except Exception as e:
|
|
421
|
+
logger.error(f"Failed to add memory item embedding: {e}")
|
|
422
|
+
return {"success": False, "error": str(e)}
|
|
423
|
+
|
|
424
|
+
def _format_memory_items(self, items: List[Dict[str, str]]) -> str:
|
|
425
|
+
"""Format memory items into a string"""
|
|
426
|
+
return "\n".join(
|
|
427
|
+
[
|
|
428
|
+
f"[{item['memory_id']}][mentioned at {item['mentioned_at']}] {item['content']} [{item['links']}]"
|
|
429
|
+
for item in items
|
|
430
|
+
]
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
def _load_category_extract_prompt(
|
|
434
|
+
self,
|
|
435
|
+
category: str,
|
|
436
|
+
character_name: str,
|
|
437
|
+
existing_content: str,
|
|
438
|
+
memory_items_text: str,
|
|
439
|
+
suggestion: str,
|
|
440
|
+
) -> str:
|
|
441
|
+
"""
|
|
442
|
+
Load category-specific prompt template to extract NEW content only
|
|
443
|
+
|
|
444
|
+
Args:
|
|
445
|
+
category: Memory category (profile, event, activity, etc.)
|
|
446
|
+
character_name: Name of the character
|
|
447
|
+
existing_content: Existing content in the category
|
|
448
|
+
memory_items_text: Source activity content to extract from
|
|
449
|
+
suggestion: Suggestion for what to extract
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
Formatted prompt for extracting new content only
|
|
453
|
+
"""
|
|
454
|
+
from pathlib import Path
|
|
455
|
+
|
|
456
|
+
# Load category-specific prompt
|
|
457
|
+
config_dir = Path(__file__).parent.parent.parent / "config" / category
|
|
458
|
+
prompt_file = config_dir / "prompt.txt"
|
|
459
|
+
|
|
460
|
+
if not prompt_file.exists():
|
|
461
|
+
raise FileNotFoundError(f"Prompt file not found: {prompt_file}")
|
|
462
|
+
|
|
463
|
+
# Load and format the prompt template
|
|
464
|
+
with open(prompt_file, "r", encoding="utf-8") as f:
|
|
465
|
+
prompt_template = f.read()
|
|
466
|
+
|
|
467
|
+
# Format the prompt with variables for extracting NEW content only
|
|
468
|
+
extract_prompt = f"""Based on the following category-specific requirements, extract ONLY NEW information for the {category} memory:
|
|
469
|
+
|
|
470
|
+
{prompt_template}
|
|
471
|
+
|
|
472
|
+
=== EXTRACTION CONTEXT ===
|
|
473
|
+
|
|
474
|
+
EXISTING {category} content (DO NOT DUPLICATE):
|
|
475
|
+
{existing_content if existing_content else "No existing content"}
|
|
476
|
+
|
|
477
|
+
Source activity content to extract from:
|
|
478
|
+
{memory_items_text}
|
|
479
|
+
|
|
480
|
+
Suggestion for this category: {suggestion}
|
|
481
|
+
|
|
482
|
+
=== CRITICAL EXTRACTION REQUIREMENTS ===
|
|
483
|
+
|
|
484
|
+
**ONLY EXTRACT NEW INFORMATION**
|
|
485
|
+
- CAREFULLY review the existing {category} content above
|
|
486
|
+
- ONLY extract information that is NOT already present in existing content
|
|
487
|
+
- If information is already covered in existing content, DO NOT extract it again
|
|
488
|
+
- Focus on completely NEW facts, details, or updates
|
|
489
|
+
|
|
490
|
+
**NO PRONOUNS - COMPLETE SENTENCES ONLY**
|
|
491
|
+
- EVERY memory item must be a complete, standalone sentence
|
|
492
|
+
- ALWAYS include the full subject "{character_name}"
|
|
493
|
+
- NEVER use pronouns that depend on context (no "she", "he", "they", "it")
|
|
494
|
+
- Each memory item should be understandable without reading other items
|
|
495
|
+
|
|
496
|
+
**CRITICAL: NO "NOT SPECIFIED" OR "NOT MENTIONED" CONTENT**
|
|
497
|
+
- NEVER create memory items saying information is "not specified", "not mentioned", "not available", or "unknown"
|
|
498
|
+
- ONLY extract and record information that is ACTUALLY present in the source content
|
|
499
|
+
- If information is missing, simply DON'T create a memory item for that topic
|
|
500
|
+
- Empty/missing information should result in NO memory item, not a "not specified" item
|
|
501
|
+
|
|
502
|
+
**OUTPUT FORMAT:**
|
|
503
|
+
1. Each line should be one complete, self-contained statement
|
|
504
|
+
2. NO markdown headers, bullets, or structure
|
|
505
|
+
3. Write in plain text only
|
|
506
|
+
4. Each line will automatically get a memory ID [xxx] prefix
|
|
507
|
+
5. ONLY include lines with actual, factual NEW information
|
|
508
|
+
6. If no new information is found, return empty content
|
|
509
|
+
|
|
510
|
+
Extract ONLY NEW relevant information according to the category requirements above and write each piece as a complete, self-contained sentence:
|
|
511
|
+
|
|
512
|
+
NEW {category} content to append:"""
|
|
513
|
+
|
|
514
|
+
return extract_prompt
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding Client for Memory Operations
|
|
3
|
+
|
|
4
|
+
This module provides embedding generation capabilities using BaseLLMClient,
|
|
5
|
+
replacing the previous multi-provider EmbeddingClient with a simpler approach.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
10
|
+
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from noesium.core.llm import BaseLLMClient
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class EmbeddingClient:
|
|
18
|
+
"""
|
|
19
|
+
Embedding client wrapper for BaseLLMClient.
|
|
20
|
+
|
|
21
|
+
This class provides a compatibility layer for the memory system
|
|
22
|
+
to use BaseLLMClient.embed and embed_batch methods.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, llm_client: "BaseLLMClient"):
|
|
26
|
+
"""
|
|
27
|
+
Initialize embedding client with a BaseLLMClient
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
llm_client: The LLM client with embed/embed_batch capabilities
|
|
31
|
+
"""
|
|
32
|
+
self.llm_client = llm_client
|
|
33
|
+
logger.info(f"EmbeddingClient initialized with LLM client: {type(llm_client).__name__}")
|
|
34
|
+
|
|
35
|
+
def embed(self, text: str) -> List[float]:
|
|
36
|
+
"""
|
|
37
|
+
Generate embedding for text using the LLM client
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
text: Text to embed
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
List of float values representing the embedding vector
|
|
44
|
+
"""
|
|
45
|
+
if not text or not text.strip():
|
|
46
|
+
logger.warning("Empty text provided for embedding")
|
|
47
|
+
return []
|
|
48
|
+
|
|
49
|
+
try:
|
|
50
|
+
return self.llm_client.embed(text)
|
|
51
|
+
except Exception as e:
|
|
52
|
+
logger.error(f"Failed to generate embedding: {e}")
|
|
53
|
+
raise
|
|
54
|
+
|
|
55
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
56
|
+
"""
|
|
57
|
+
Generate embeddings for multiple texts using the LLM client
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
texts: List of texts to embed
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
List of embedding vectors
|
|
64
|
+
"""
|
|
65
|
+
if not texts:
|
|
66
|
+
return []
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
return self.llm_client.embed_batch(texts)
|
|
70
|
+
except Exception as e:
|
|
71
|
+
logger.error(f"Failed to generate batch embeddings: {e}")
|
|
72
|
+
# Fallback to individual embedding calls
|
|
73
|
+
logger.info("Falling back to individual embed calls")
|
|
74
|
+
embeddings = []
|
|
75
|
+
for text in texts:
|
|
76
|
+
try:
|
|
77
|
+
embedding = self.embed(text)
|
|
78
|
+
embeddings.append(embedding)
|
|
79
|
+
except Exception as individual_e:
|
|
80
|
+
logger.error(f"Failed to embed text individually: {individual_e}")
|
|
81
|
+
# Add zero vector as placeholder
|
|
82
|
+
embeddings.append([0.0] * self.get_embedding_dimension())
|
|
83
|
+
return embeddings
|
|
84
|
+
|
|
85
|
+
def get_embedding_dimension(self) -> int:
|
|
86
|
+
"""Get the dimension of embeddings produced by this client"""
|
|
87
|
+
try:
|
|
88
|
+
return self.llm_client.get_embedding_dimensions()
|
|
89
|
+
except Exception as e:
|
|
90
|
+
logger.warning(f"Failed to get embedding dimensions from LLM client: {e}")
|
|
91
|
+
return 1536 # Default fallback
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def create_embedding_client(llm_client: "BaseLLMClient") -> EmbeddingClient:
|
|
95
|
+
"""
|
|
96
|
+
Create an embedding client using the provided LLM client
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
llm_client: BaseLLMClient with embedding capabilities
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
EmbeddingClient instance
|
|
103
|
+
"""
|
|
104
|
+
return EmbeddingClient(llm_client)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def get_default_embedding_client() -> Optional[EmbeddingClient]:
|
|
108
|
+
"""
|
|
109
|
+
Get a default embedding client using environment-based LLM client
|
|
110
|
+
|
|
111
|
+
This function creates an LLM client from environment variables
|
|
112
|
+
and wraps it with EmbeddingClient for compatibility.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
EmbeddingClient if LLM client can be created, None otherwise
|
|
116
|
+
"""
|
|
117
|
+
try:
|
|
118
|
+
# Import here to avoid circular imports
|
|
119
|
+
from ..llm_adapter import _get_llm_client_memu_compatible
|
|
120
|
+
|
|
121
|
+
llm_client = _get_llm_client_memu_compatible()
|
|
122
|
+
if llm_client is None:
|
|
123
|
+
logger.warning("Failed to create LLM client from environment")
|
|
124
|
+
return None
|
|
125
|
+
|
|
126
|
+
return create_embedding_client(llm_client)
|
|
127
|
+
|
|
128
|
+
except Exception as e:
|
|
129
|
+
logger.warning(f"Failed to create default embedding client: {e}")
|
|
130
|
+
return None
|