alpha-avatar-plugins-memory 0.5.2__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (15) hide show
  1. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/PKG-INFO +2 -1
  2. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/alphaavatar/plugins/memory/__init__.py +21 -7
  3. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/alphaavatar/plugins/memory/memory_langchain.py +249 -112
  4. alpha_avatar_plugins_memory-0.5.3/alphaavatar/plugins/memory/memory_markdown.py +291 -0
  5. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/alphaavatar/plugins/memory/memory_op.py +2 -2
  6. alpha_avatar_plugins_memory-0.5.3/alphaavatar/plugins/memory/memory_prompts.py +560 -0
  7. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/alphaavatar/plugins/memory/runner/__init__.py +2 -1
  8. alpha_avatar_plugins_memory-0.5.3/alphaavatar/plugins/memory/runner/lancedb_runner.py +204 -0
  9. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/alphaavatar/plugins/memory/runner/qdrant_runner.py +3 -3
  10. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/alphaavatar/plugins/memory/version.py +1 -1
  11. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/pyproject.toml +1 -0
  12. alpha_avatar_plugins_memory-0.5.2/alphaavatar/plugins/memory/memory_prompts.py +0 -214
  13. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/.gitignore +0 -0
  14. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/README.md +0 -0
  15. {alpha_avatar_plugins_memory-0.5.2 → alpha_avatar_plugins_memory-0.5.3}/alphaavatar/plugins/memory/log.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alpha-avatar-plugins-memory
3
- Version: 0.5.2
3
+ Version: 0.5.3
4
4
  Summary: AlphaAvatar Framework plugin for memory service
5
5
  Project-URL: Source, https://github.com/AlphaAvatar/AlphaAvatar
6
6
  License-Expression: Apache-2.0
@@ -17,6 +17,7 @@ Classifier: Topic :: Multimedia :: Sound/Audio
17
17
  Classifier: Topic :: Multimedia :: Video
18
18
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
19
19
  Requires-Python: >=3.10
20
+ Requires-Dist: lancedb
20
21
  Requires-Dist: langchain-community
21
22
  Requires-Dist: langchain-core
22
23
  Requires-Dist: langchain-openai
@@ -11,13 +11,14 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
+ import os
15
+
14
16
  from livekit.agents.inference_runner import _InferenceRunner
15
17
 
16
18
  from alphaavatar.agents import AvatarModule, AvatarPlugin
17
19
 
18
20
  from .log import logger
19
21
  from .memory_langchain import MemoryLangchain
20
- from .runner import QdrantRunner
21
22
  from .version import __version__
22
23
 
23
24
  __all__ = [
@@ -33,6 +34,7 @@ class MemoryLangchainPlugin(AvatarPlugin):
33
34
 
34
35
  def get_plugin(
35
36
  self,
37
+ working_dir: str,
36
38
  memory_search_context: int,
37
39
  memory_recall_num: int,
38
40
  maximum_memory_num: int,
@@ -42,20 +44,32 @@ class MemoryLangchainPlugin(AvatarPlugin):
42
44
  ) -> MemoryLangchain:
43
45
  try:
44
46
  return MemoryLangchain(
47
+ working_dir=working_dir,
45
48
  memory_search_context=memory_search_context,
46
49
  memory_recall_num=memory_recall_num,
47
50
  maximum_memory_num=maximum_memory_num,
48
51
  memory_init_config=memory_init_config,
49
52
  )
50
- except Exception:
51
- raise ImportError(
52
- "The 'langchain[default]' Memory plugin is required but is not installed.\n"
53
- "To fix this, install the optional dependency: `pip install alphaavatar-plugins-memory`"
54
- )
53
+ except Exception as e:
54
+ raise ImportError(f"Failed to initialize MemoryLangchain plugin: {e}")
55
55
 
56
56
 
57
57
  # plugin init
58
58
  AvatarPlugin.register_avatar_plugin(AvatarModule.MEMORY, "default", MemoryLangchainPlugin())
59
59
 
60
60
  # runner init
61
- _InferenceRunner.register_runner(QdrantRunner)
61
+ memory_vdb_type = os.getenv("MEMORY_VDB_TYPE", None)
62
+ match memory_vdb_type:
63
+ case "qdrant":
64
+ from . import memory_langchain
65
+ from .runner import QdrantRunner
66
+
67
+ memory_langchain.MEMORY_INFERENCE_METHOD = QdrantRunner.INFERENCE_METHOD
68
+ _InferenceRunner.register_runner(QdrantRunner)
69
+
70
+ case "lancedb":
71
+ from . import memory_langchain
72
+ from .runner import LanceDBRunner
73
+
74
+ memory_langchain.MEMORY_INFERENCE_METHOD = LanceDBRunner.INFERENCE_METHOD
75
+ _InferenceRunner.register_runner(LanceDBRunner)
@@ -14,6 +14,7 @@
14
14
  import asyncio
15
15
  import hashlib
16
16
  import json
17
+ import pathlib
17
18
  import re
18
19
  from typing import Any
19
20
 
@@ -34,32 +35,63 @@ from alphaavatar.agents.memory import (
34
35
  from alphaavatar.agents.utils import format_current_time
35
36
 
36
37
  from .log import logger
38
+ from .memory_markdown import save_memory_items_to_markdown
37
39
  from .memory_op import MemoryDelta, PatchOp, flatten_items, norm_token, rebuild_from_items
38
- from .memory_prompts import MEMORY_EXTRACT_PROMPT
39
- from .runner import QdrantRunner
40
+ from .memory_prompts import (
41
+ CONVERSATION_MEMORY_EXTRACT_PROMPT,
42
+ TOOL_MEMORY_EXTRACT_PROMPT,
43
+ )
44
+
45
+ CONVERSATION_DELTA_PROMPT = ChatPromptTemplate.from_messages(
46
+ [
47
+ (
48
+ "system",
49
+ CONVERSATION_MEMORY_EXTRACT_PROMPT,
50
+ ),
51
+ (
52
+ "human",
53
+ "NEW TURN TYPE: {type}\n"
54
+ "NEW TURN CONTENT:\n```{message_content}```\n\n"
55
+ "Output only `MemoryDelta`.\n\n"
56
+ "### WRITING RULES\n"
57
+ "- Each user_or_tool_memory_entries PatchOp.value MUST be exactly one [MEMORY]...[/MEMORY] card for conversation memory.\n"
58
+ "- Each assistant_memory_entries PatchOp.value MUST be exactly one [MEMORY]...[/MEMORY] card for avatar memory.\n"
59
+ "- summary must preserve user intent, assistant response, and any continuing context.\n"
60
+ "- Do NOT write raw tool logs, request IDs, file paths, actions, or next_steps unless absolutely necessary.\n"
61
+ "- If tools were used, describe only the user-facing result at a high level.\n"
62
+ "- entities must include high-signal nouns.\n"
63
+ "- topic must be a stable short label.\n"
64
+ "- Avoid duplication: only record new conversational facts or new details in this turn.\n"
65
+ "- Do not invent details not supported by the content.\n",
66
+ ),
67
+ ]
68
+ )
40
69
 
41
- DELTA_PROMPT = ChatPromptTemplate.from_messages(
70
+ TOOL_DELTA_PROMPT = ChatPromptTemplate.from_messages(
42
71
  [
43
72
  (
44
73
  "system",
45
- MEMORY_EXTRACT_PROMPT,
74
+ TOOL_MEMORY_EXTRACT_PROMPT,
46
75
  ),
47
76
  (
48
77
  "human",
49
78
  "NEW TURN TYPE: {type}\n"
50
79
  "NEW TURN CONTENT:\n```{message_content}```\n\n"
51
- "Output only MemoryDelta.\n\n"
80
+ "Output only `MemoryDelta`.\n\n"
52
81
  "### WRITING RULES\n"
53
- "- Each PatchOp.value MUST be exactly one [EVENT]...[/EVENT] card described in the system prompt.\n"
54
- "- Do NOT write vague summaries. Include tool/component, operation, outcome, and evidence IDs when available.\n"
55
- "- entities must include high-signal nouns (tool names, ops, error codes, env cues).\n"
56
- "- topic must be a stable short label (e.g., 'rag indexing', 'web search', 'file storage', 'tool error').\n"
57
- "- Avoid duplication: only record new events or new details in this turn.\n"
82
+ "- Each user_or_tool_memory_entries PatchOp.value MUST be exactly one [EVENT]...[/EVENT] card for tool memory.\n"
83
+ "- Each assistant_memory_entries PatchOp.value MUST be exactly one [EVENT]...[/EVENT] card for avatar memory derived from tool events.\n"
84
+ "- Include concrete tool/component, operation, outcome, and relevant sanitized details.\n"
85
+ "- Include evidence IDs only when actually present.\n"
86
+ "- entities must include high-signal nouns such as tool names, operations, error identifiers, or artifact types.\n"
87
+ "- topic must be a stable short label.\n"
88
+ "- Avoid duplication: only record new tool events or new details in this turn.\n"
58
89
  "- Do not invent details not supported by the content.\n",
59
90
  ),
60
91
  ]
61
92
  )
62
93
 
94
+ MEMORY_INFERENCE_METHOD = None
63
95
 
64
96
  # ===============================
65
97
  # For Memory Normalization and Dedupe
@@ -104,6 +136,14 @@ EVENT_TYPE_RE = re.compile(r"(?im)^\s*type:\s*([a-zA-Z_]+)\s*$")
104
136
  OUTCOME_RE = re.compile(r"(?im)^\s*outcome:\s*([a-zA-Z_]+)\s*$")
105
137
  TOPIC_RE = re.compile(r"(?im)^\s*topic:\s*(.+?)\s*$")
106
138
  ERROR_RE = re.compile(r"(?im)^\s*error:\s*(.+?)\s*$")
139
+ KIND_RE = re.compile(r"(?im)^\s*kind:\s*([a-zA-Z_]+)\s*$")
140
+ SUMMARY_RE = re.compile(r"(?ims)^\s*summary:\s*(.+?)\s*$")
141
+ CONTEXT_RE = re.compile(r"(?ims)^\s*context:\s*(.+?)\s*$")
142
+
143
+
144
+ def _memory_field(value: str, regex: re.Pattern[str]) -> str | None:
145
+ m = regex.search(value or "")
146
+ return m.group(1).strip() if m else None
107
147
 
108
148
 
109
149
  def _event_field(value: str, regex: re.Pattern[str]) -> str | None:
@@ -114,13 +154,16 @@ def _event_field(value: str, regex: re.Pattern[str]) -> str | None:
114
154
  def _memory_priority(item: "MemoryItem") -> int:
115
155
  """
116
156
  Higher is more important.
117
- Works even if value is not an event card (but your prompt aims to always produce one).
157
+ Supports both:
158
+ - [EVENT] cards for tool memory
159
+ - [MEMORY] cards for conversation/avatar memory
118
160
  """
119
161
  v = (item.value or "").lower()
120
162
  t = (item.topic or "").lower()
121
163
 
122
164
  etype = _event_field(item.value, EVENT_TYPE_RE) or ""
123
165
  outcome = _event_field(item.value, OUTCOME_RE) or ""
166
+ kind = (_memory_field(item.value, KIND_RE) or "").strip().lower()
124
167
 
125
168
  # 1) Hard signals: failures/incidents
126
169
  if "outcome: failed" in v or outcome == "failed":
@@ -132,28 +175,67 @@ def _memory_priority(item: "MemoryItem") -> int:
132
175
  if "error:" in v or _event_field(item.value, ERROR_RE):
133
176
  return 92
134
177
 
135
- # 2) High-value operational memories
136
- if etype in ("decision", "config_change"):
137
- return 88
138
- if etype in ("indexing", "retrieval"):
139
- return 85
140
- if t in (
141
- "rag indexing",
142
- "tool error",
143
- "qdrant memory",
144
- "async debugging",
145
- "dependency install",
146
- "gpu detection",
147
- ):
148
- return 82
149
-
150
- # 3) Medium: user intent / tasks / important interactions
151
- if etype in ("interaction", "file_storage", "web_search", "tool_run"):
152
- return 60
153
-
154
- # 4) Social context: keep but lower priority
178
+ # 2) Avatar memory is usually high-value global memory
179
+ if item.memory_type == MemoryType.Avatar:
180
+ if kind == "avatar":
181
+ return 90
182
+ if etype in ("decision", "config_change"):
183
+ return 88
184
+ return 86
185
+
186
+ # 3) Tool-side operational memories
187
+ if item.memory_type == MemoryType.TOOLS:
188
+ if etype in ("decision", "config_change"):
189
+ return 88
190
+ if etype in ("indexing", "retrieval", "web_search", "tool_run", "artifact_generation"):
191
+ return 82
192
+ if t in (
193
+ "rag indexing",
194
+ "tool error",
195
+ "qdrant memory",
196
+ "async debugging",
197
+ "dependency install",
198
+ "gpu detection",
199
+ "memory prompt inspection",
200
+ "tool memory policy",
201
+ ):
202
+ return 80
203
+ return 70
204
+
205
+ # 4) Conversation memories
206
+ if item.memory_type == MemoryType.CONVERSATION:
207
+ if kind == "conversation":
208
+ if t in (
209
+ "response preference",
210
+ "user response preference",
211
+ "memory prompt design",
212
+ "alphaavatar architecture",
213
+ "social context",
214
+ ):
215
+ return 72
216
+
217
+ # If it contains explicit preference/emotion/project context, slightly higher
218
+ if any(
219
+ k in v
220
+ for k in [
221
+ "prefers",
222
+ "preference",
223
+ "short and direct",
224
+ "concise",
225
+ "building",
226
+ "redesigning",
227
+ "stressed",
228
+ "tired",
229
+ "excited",
230
+ "frustrated",
231
+ ]
232
+ ):
233
+ return 68
234
+
235
+ return 60
236
+
237
+ # 5) Fallbacks
155
238
  if t in ("social context", "small talk", "chitchat", "chat"):
156
- # if contains emotion keywords, slightly higher
157
239
  if any(
158
240
  k in v
159
241
  for k in ["tired", "exhausted", "stressed", "anxious", "happy", "excited", "frustrated"]
@@ -218,7 +300,7 @@ def _select_by_priority(
218
300
  return picked
219
301
 
220
302
 
221
- class MemmoryInitConfig(BaseModel):
303
+ class MemoryInitConfig(BaseModel):
222
304
  chat_model: str = Field(default="gpt-4o-mini")
223
305
  temperature: float = Field(default=0.0)
224
306
 
@@ -227,19 +309,21 @@ class MemoryLangchain(MemoryBase):
227
309
  def __init__(
228
310
  self,
229
311
  *,
312
+ working_dir: pathlib.Path,
230
313
  memory_search_context: int = 3,
231
314
  memory_recall_num: int = 10,
232
315
  maximum_memory_num: int = 24,
233
316
  memory_init_config: dict[str, Any] | None = None,
234
317
  ) -> None:
235
318
  super().__init__(
319
+ working_dir=working_dir,
236
320
  memory_search_context=memory_search_context,
237
321
  memory_recall_num=memory_recall_num,
238
322
  maximum_memory_num=maximum_memory_num,
239
323
  )
240
324
 
241
325
  self._memory_init_config = (
242
- MemmoryInitConfig(**memory_init_config) if memory_init_config else MemmoryInitConfig()
326
+ MemoryInitConfig(**memory_init_config) if memory_init_config else MemoryInitConfig()
243
327
  )
244
328
 
245
329
  llm = ChatOpenAI(
@@ -248,41 +332,66 @@ class MemoryLangchain(MemoryBase):
248
332
  ) # type: ignore
249
333
 
250
334
  self._delta_llm = llm.with_structured_output(MemoryDelta)
251
- self._delta_chain = DELTA_PROMPT | self._delta_llm # ✅ build once
335
+ self._conversation_delta_chain = CONVERSATION_DELTA_PROMPT | self._delta_llm
336
+ self._tool_delta_chain = TOOL_DELTA_PROMPT | self._delta_llm
252
337
  self._executor = get_job_context().inference_executor
253
338
 
254
339
  @property
255
- def memory_init_config(self) -> MemmoryInitConfig:
340
+ def memory_init_config(self) -> MemoryInitConfig:
256
341
  return self._memory_init_config
257
342
 
258
- async def _safe_ainvoke_delta(
343
+ async def _safe_ainvoke_conversation_delta(
344
+ self,
345
+ *,
346
+ message_content: str,
347
+ timeout: float = 12.0,
348
+ ) -> MemoryDelta:
349
+ payload = {
350
+ "type": MemoryType.CONVERSATION,
351
+ "message_content": message_content,
352
+ }
353
+ try:
354
+ return await asyncio.wait_for(
355
+ self._conversation_delta_chain.ainvoke(payload), timeout=timeout
356
+ ) # type: ignore
357
+ except asyncio.TimeoutError:
358
+ logger.warning("[Memory] conversation delta extraction timeout")
359
+ return MemoryDelta()
360
+ except Exception:
361
+ logger.exception("[Memory] conversation delta extraction failed")
362
+ return MemoryDelta()
363
+
364
+ async def _safe_ainvoke_tool_delta(
259
365
  self,
260
366
  *,
261
- memory_type: MemoryType,
262
367
  message_content: str,
263
368
  timeout: float = 12.0,
264
369
  ) -> MemoryDelta:
265
- """Robust delta extraction with timeout and fallback."""
266
370
  payload = {
267
- "type": memory_type,
371
+ "type": MemoryType.TOOLS,
268
372
  "message_content": message_content,
269
373
  }
270
374
  try:
271
- return await asyncio.wait_for(self._delta_chain.ainvoke(payload), timeout=timeout) # type: ignore
375
+ return await asyncio.wait_for(self._tool_delta_chain.ainvoke(payload), timeout=timeout) # type: ignore
272
376
  except asyncio.TimeoutError:
273
- logger.warning(f"[Memory] delta extraction timeout (type={memory_type})")
377
+ logger.warning("[Memory] tool delta extraction timeout")
274
378
  return MemoryDelta()
275
379
  except Exception:
276
- logger.exception(f"[Memory] delta extraction failed (type={memory_type})")
380
+ logger.exception("[Memory] tool delta extraction failed")
277
381
  return MemoryDelta()
278
382
 
279
- def _apply_delta(self, avatar_id: str, delta: MemoryDelta, memory_cache: MemoryCache):
383
+ def _apply_delta_to_bucket(
384
+ self,
385
+ *,
386
+ avatar_id: str,
387
+ delta: MemoryDelta,
388
+ memory_cache: MemoryCache,
389
+ user_or_tool_memory_type: MemoryType,
390
+ ):
280
391
  updated_time = format_current_time().time_str
281
392
  assistant_memories: list[MemoryItem] = []
282
- user_memories: list[MemoryItem] = []
283
- tool_memories: list[MemoryItem] = []
393
+ target_memories: list[MemoryItem] = []
284
394
 
285
- # local dedupe per update call
286
395
  seen_keys: set[str] = set()
287
396
 
288
397
  def _maybe_add(
@@ -292,7 +401,6 @@ class MemoryLangchain(MemoryBase):
292
401
  mem_type: MemoryType,
293
402
  item: PatchOp,
294
403
  ):
295
- # normalize
296
404
  item.topic = _norm_topic(item.topic)
297
405
  item.entities = _norm_entities(item.entities)
298
406
 
@@ -317,7 +425,6 @@ class MemoryLangchain(MemoryBase):
317
425
  )
318
426
  )
319
427
 
320
- # assistant memory
321
428
  for item in delta.assistant_memory_entries:
322
429
  _maybe_add(
323
430
  bucket=assistant_memories,
@@ -326,25 +433,15 @@ class MemoryLangchain(MemoryBase):
326
433
  item=item,
327
434
  )
328
435
 
329
- # user or tool memory
330
- if memory_cache.type == MemoryType.CONVERSATION:
331
- for item in delta.user_or_tool_memory_entries:
332
- _maybe_add(
333
- bucket=user_memories,
334
- object_id=memory_cache.user_or_tool_id,
335
- mem_type=MemoryType.CONVERSATION,
336
- item=item,
337
- )
338
- else:
339
- for item in delta.user_or_tool_memory_entries:
340
- _maybe_add(
341
- bucket=tool_memories,
342
- object_id=memory_cache.user_or_tool_id,
343
- mem_type=MemoryType.TOOLS,
344
- item=item,
345
- )
436
+ for item in delta.user_or_tool_memory_entries:
437
+ _maybe_add(
438
+ bucket=target_memories,
439
+ object_id=memory_cache.user_or_tool_id,
440
+ mem_type=user_or_tool_memory_type,
441
+ item=item,
442
+ )
346
443
 
347
- return assistant_memories, user_memories, tool_memories
444
+ return assistant_memories, target_memories
348
445
 
349
446
  async def search_by_context(
350
447
  self, *, avatar_id: str, session_id: str, chat_context: list[ChatItem], timeout: float = 3
@@ -357,23 +454,19 @@ class MemoryLangchain(MemoryBase):
357
454
  if not context_str:
358
455
  return
359
456
 
360
- if self.memory_cache[session_id].type == MemoryType.CONVERSATION:
361
- json_data = {
362
- "op": VectorRunnerOP.search_by_context,
363
- "param": {
364
- "context_str": context_str,
365
- "avatar_id": avatar_id,
366
- "user_id": self.memory_cache[session_id].user_or_tool_id,
367
- "top_k": self.memory_recall_num,
368
- },
369
- }
370
- json_data = json.dumps(json_data).encode()
371
- else:
372
- # TODO: we will implement the part in the future
373
- raise NotImplementedError
457
+ json_data = {
458
+ "op": VectorRunnerOP.search_by_context,
459
+ "param": {
460
+ "context_str": context_str,
461
+ "avatar_id": avatar_id,
462
+ "user_or_tool_id": self.memory_cache[session_id].user_or_tool_id,
463
+ "top_k": self.memory_recall_num,
464
+ },
465
+ }
466
+ json_data = json.dumps(json_data).encode()
374
467
 
375
468
  result = await asyncio.wait_for(
376
- self._executor.do_inference(QdrantRunner.INFERENCE_METHOD, json_data),
469
+ self._executor.do_inference(MEMORY_INFERENCE_METHOD, json_data),
377
470
  timeout=timeout,
378
471
  )
379
472
 
@@ -383,19 +476,19 @@ class MemoryLangchain(MemoryBase):
383
476
 
384
477
  data: dict[str, Any] = json.loads(result.decode())
385
478
 
386
- # Avatar Memory
387
- if data.get("avatar_memory_items", None):
388
- self.avatar_memory = rebuild_from_items(data["avatar_memory_items"])
389
-
390
- # User Memory
391
- if data.get("user_rmemory_items", None):
392
- self.user_memory = rebuild_from_items(data["user_rmemory_items"])
479
+ # Update Current Memory
480
+ if data.get("memory_items", None):
481
+ memory_items = rebuild_from_items(data["memory_items"])
482
+ self.avatar_memory = [it for it in memory_items if it.memory_type == MemoryType.Avatar]
483
+ self.user_memory = [
484
+ it for it in memory_items if it.memory_type == MemoryType.CONVERSATION
485
+ ]
486
+ self.tool_memory = [it for it in memory_items if it.memory_type == MemoryType.TOOLS]
393
487
 
394
488
  if data.get("error", None):
395
489
  logger.warning(f"Memory [search_by_context] err: {data['error']}")
396
490
 
397
491
  async def update(self, *, avatar_id: str, session_id: str | None = None):
398
- """Update the memory database with the cached messages."""
399
492
  if session_id is not None and session_id not in self.memory_cache:
400
493
  raise ValueError(
401
494
  f"Session ID {session_id} not found in memory cache. You need to call 'init_cache' first."
@@ -407,7 +500,6 @@ class MemoryLangchain(MemoryBase):
407
500
  else [(session_id, self.memory_cache[session_id])]
408
501
  )
409
502
 
410
- # ✅ accumulate instead of overwrite
411
503
  all_assistant: list[MemoryItem] = []
412
504
  all_user: list[MemoryItem] = []
413
505
  all_tool: list[MemoryItem] = []
@@ -415,49 +507,82 @@ class MemoryLangchain(MemoryBase):
415
507
  for _sid, cache in memory_tuple:
416
508
  chat_context = cache.messages
417
509
  if not chat_context:
418
- logger.info(f"[sid: {_sid}] Memory message is empty, UPDATE skip!")
419
- continue # ✅ important
510
+ logger.warning(f"[sid: {_sid}] Memory message is empty, UPDATE skip!")
511
+ continue
420
512
 
421
513
  message_content: str = MemoryPluginsTemplate.apply_update_template(
422
514
  chat_context, cache.type
423
515
  )
424
516
 
425
- delta: MemoryDelta = await self._safe_ainvoke_delta(
426
- memory_type=cache.type,
427
- message_content=message_content,
428
- timeout=12.0,
429
- )
517
+ if cache.type == MemoryType.CONVERSATION:
518
+ conversation_delta, tool_delta = await asyncio.gather(
519
+ self._safe_ainvoke_conversation_delta(
520
+ message_content=message_content,
521
+ timeout=30.0,
522
+ ),
523
+ self._safe_ainvoke_tool_delta(
524
+ message_content=message_content,
525
+ timeout=30.0,
526
+ ),
527
+ )
430
528
 
431
- assistant_memories, user_memories, tool_memories = self._apply_delta(
432
- avatar_id, delta, cache
433
- )
529
+ conv_avatar, conv_user = self._apply_delta_to_bucket(
530
+ avatar_id=avatar_id,
531
+ delta=conversation_delta,
532
+ memory_cache=cache,
533
+ user_or_tool_memory_type=MemoryType.CONVERSATION,
534
+ )
535
+
536
+ tool_avatar, tool_memories = self._apply_delta_to_bucket(
537
+ avatar_id=avatar_id,
538
+ delta=tool_delta,
539
+ memory_cache=cache,
540
+ user_or_tool_memory_type=MemoryType.TOOLS,
541
+ )
434
542
 
435
- all_assistant.extend(assistant_memories)
436
- all_user.extend(user_memories)
437
- all_tool.extend(tool_memories)
543
+ all_assistant.extend(conv_avatar)
544
+ all_assistant.extend(tool_avatar)
545
+ all_user.extend(conv_user)
546
+ all_tool.extend(tool_memories)
547
+
548
+ else:
549
+ tool_delta = await self._safe_ainvoke_tool_delta(
550
+ message_content=message_content,
551
+ timeout=30.0,
552
+ )
553
+
554
+ tool_avatar, tool_memories = self._apply_delta_to_bucket(
555
+ avatar_id=avatar_id,
556
+ delta=tool_delta,
557
+ memory_cache=cache,
558
+ user_or_tool_memory_type=MemoryType.TOOLS,
559
+ )
560
+
561
+ all_assistant.extend(tool_avatar)
562
+ all_tool.extend(tool_memories)
438
563
 
439
564
  self.avatar_memory = all_assistant
440
565
  self.user_memory = all_user
441
566
  self.tool_memory = all_tool
442
567
 
443
568
  async def save(self, timeout: float = 3):
444
- # 1) Collect updated MemoryItem objects (not dict yet)
569
+ # 1. Collect updated MemoryItem objects (not dict yet)
445
570
  updated_items: list[MemoryItem] = [item for item in self.memory_items if item.updated]
446
571
 
447
572
  if not updated_items:
448
573
  logger.info("Avatar Memory SAVE skip!")
449
574
  return
450
575
 
451
- # 2) Split buckets by memory_type (optional but recommended)
576
+ # 2. Split buckets by memory_type (optional but recommended)
452
577
  avatar_items = [x for x in updated_items if x.memory_type == MemoryType.Avatar]
453
578
  user_items = [x for x in updated_items if x.memory_type == MemoryType.CONVERSATION]
454
579
  tool_items = [x for x in updated_items if x.memory_type == MemoryType.TOOLS]
455
580
 
456
- # 3) Apply priority selection with quotas
457
- # You can tune these numbers; idea: keep incidents/decisions, allow small amount of social.
581
+ # 3. Apply priority selection with quotas
582
+ ## 3.1 You can tune these numbers; idea: keep incidents/decisions, allow small amount of social.
458
583
  max_total = getattr(self, "maximum_memory_num", 24)
459
584
 
460
- # Per bucket limits (sum can exceed max_total; we'll cap again later)
585
+ ## 3.2 Per bucket limits (sum can exceed max_total; we'll cap again later)
461
586
  avatar_selected = _select_by_priority(
462
587
  avatar_items, limit=min(10, max_total), social_limit=1
463
588
  )
@@ -466,17 +591,29 @@ class MemoryLangchain(MemoryBase):
466
591
 
467
592
  selected = avatar_selected + user_selected + tool_selected
468
593
 
469
- # 4) Global cap (final)
594
+ # 4. Global cap (final)
470
595
  selected.sort(key=lambda x: _memory_priority(x), reverse=True)
471
596
  selected = selected[:max_total]
472
597
 
473
- # 5) Convert to dict for storage
598
+ # 5. Convert to dict for storage
474
599
  memory_items: list[dict] = flatten_items(selected)
475
600
 
476
601
  if not memory_items:
477
602
  logger.info("Memory SAVE skip after priority filtering (no items selected).")
478
603
  return
479
604
 
605
+ ## 5.1 Save to local .md file for backup/debug
606
+ try:
607
+ md_result = save_memory_items_to_markdown(
608
+ avatar_memory_path=self._avatar_memory_path,
609
+ session_memory_path=self._session_memory_path,
610
+ memory_items=memory_items,
611
+ )
612
+ logger.info(f"Memory local markdown backup success: {md_result}")
613
+ except Exception as e:
614
+ logger.warning(f"Memory local markdown backup failed: {e}")
615
+
616
+ ## 5.2 Save to VDB via runner
480
617
  json_data = {
481
618
  "op": VectorRunnerOP.save,
482
619
  "param": {"memory_items": memory_items},
@@ -485,7 +622,7 @@ class MemoryLangchain(MemoryBase):
485
622
  try:
486
623
  result = await asyncio.wait_for(
487
624
  self._executor.do_inference(
488
- QdrantRunner.INFERENCE_METHOD, json.dumps(json_data).encode()
625
+ MEMORY_INFERENCE_METHOD, json.dumps(json_data).encode()
489
626
  ),
490
627
  timeout=timeout,
491
628
  )