keepsake-memory 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
keepsake/__init__.py ADDED
@@ -0,0 +1,558 @@
1
+ """
2
+ keepsake — Keepsake记忆系统 for Hermes Agent.
3
+
4
+ 每次对话自动检索相关记忆注入上下文,支持:
5
+ - 🔍 向量搜索 — RediSearch KNN 语义检索
6
+ - ⏳ 时间衰减 — 新记忆权重高,旧记忆逐步降权
7
+ - 📝 自动写入 — memory(action='add') 操作自动存档完整内容
8
+ - 🏷️ 标签过滤 — 可选按标签范围搜索
9
+
10
+ 安装: pip install keepsake
11
+ 激活: config.yaml 中设置 memory.provider: keepsake
12
+
13
+ 配置优先级: 环境变量 > 配置文件 > 默认值
14
+ 配置文件: ~/.config/keepsake/config.json (或 KEEPSAKE_CONFIG 自定义路径)
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import json
20
+ import logging
21
+ import os
22
+ from pathlib import Path
23
+ from typing import Any, Dict, List, Optional
24
+
25
+ from agent.memory_provider import MemoryProvider
26
+ from tools.registry import tool_error
27
+
28
+ from .embedder import create_embedder
29
+ from .storage import RedisStorage
30
+ from .consolidator import Consolidator
31
+ from .forgetter import Forgetter
32
+
33
+ # ---------------------------------------------------------------------------
34
+ # 工具扇区(供 Hermes MemoryProvider 注册)
35
+ # ---------------------------------------------------------------------------
36
+
37
+ FEEDBACK_SCHEMA = {
38
+ "name": "keepsake_feedback",
39
+ "description": (
40
+ "记录用户对一条记忆的反馈 — 标记有用/没用。"
41
+ "正反馈让该记忆在未来搜索中排名更高,"
42
+ "负反馈大幅降权(标记为没用的记忆几乎不会再出现)。"
43
+ ),
44
+ "parameters": {
45
+ "type": "object",
46
+ "properties": {
47
+ "fragment_key": {
48
+ "type": "string",
49
+ "description": "碎片的 Redis key(如 memory:frag:abc123),从相关碎片的 key 字段获得。",
50
+ },
51
+ "is_positive": {
52
+ "type": "boolean",
53
+ "description": "True = 这条记忆有用,False = 没用",
54
+ },
55
+ },
56
+ "required": ["fragment_key", "is_positive"],
57
+ },
58
+ }
59
+
60
+ HOT_TOPICS_SCHEMA = {
61
+ "name": "keepsake_topics",
62
+ "description": (
63
+ "查询全局热门话题统计。返回跨会话出现最频繁的话题词。"
64
+ "可选日榜/周榜/全局。"
65
+ ),
66
+ "parameters": {
67
+ "type": "object",
68
+ "properties": {
69
+ "limit": {
70
+ "type": "integer",
71
+ "description": "返回条数(默认 10,最大 30)",
72
+ "default": 10,
73
+ },
74
+ "period": {
75
+ "type": "string",
76
+ "enum": ["all", "daily", "weekly"],
77
+ "description": "统计周期:all=全局, daily=日榜, weekly=周榜",
78
+ "default": "all",
79
+ },
80
+ },
81
+ "required": [],
82
+ },
83
+ }
84
+
85
+
86
+ logger = logging.getLogger(__name__)
87
+
88
+ _DEFAULT_CONFIG_PATH = "~/.config/keepsake/config.json"
89
+
90
+
91
+ def _load_json_config() -> dict:
92
+ """从 JSON 配置文件加载配置。
93
+
94
+ 路径来源(优先级高到低):
95
+ 1. 环境变量 KEEPSAKE_CONFIG
96
+ 2. ~/.config/keepsake/config.json
97
+ 文件不存在时返回空 dict。
98
+ """
99
+ path_str = os.environ.get("KEEPSAKE_CONFIG") or _DEFAULT_CONFIG_PATH
100
+ path = Path(path_str).expanduser()
101
+ if not path.exists():
102
+ logger.debug("keepsake: config file not found at %s", path)
103
+ return {}
104
+ try:
105
+ with open(path) as f:
106
+ cfg: dict = json.load(f)
107
+ logger.info("keepsake: loaded config from %s", path)
108
+ return cfg
109
+ except (json.JSONDecodeError, OSError) as e:
110
+ logger.warning("keepsake: failed to load config from %s: %s", path, e)
111
+ return {}
112
+
113
+
114
+ def _deep_merge(base: dict, override: dict) -> dict:
115
+ """递归合并两个 dict,override 覆盖 base。"""
116
+ result = base.copy()
117
+ for key, val in override.items():
118
+ if key in result and isinstance(result[key], dict) and isinstance(val, dict):
119
+ result[key] = _deep_merge(result[key], val)
120
+ else:
121
+ result[key] = val
122
+ return result
123
+
124
+
125
+ class KeepsakeProvider(MemoryProvider):
126
+ """
127
+ Keepsake记忆提供者。
128
+
129
+ 和 Hermes builtin 内存共存,不冲突。每轮对话自动检索相关记忆
130
+ 注入上下文。仅 memory(action='add') 操作时存储完整内容。
131
+
132
+ 配置优先级(高→低):
133
+ 1. 环境变量 (KEEPSAKE_REDIS_HOST, KEEPSAKE_EMBEDDER 等)
134
+ 2. JSON 配置文件 (~/.config/keepsake/config.json)
135
+ 3. config.yaml memory.keepsake 节(由 Hermes 传入)
136
+ 4. 硬编码默认值
137
+ """
138
+
139
+ _initialized: bool = False
140
+ _storage: Optional[RedisStorage] = None
141
+ _tag_filter: str = ""
142
+ _consolidator: Optional[Consolidator] = None
143
+ _forgetter: Optional[Forgetter] = None
144
+ _last_maintenance: float = 0.0
145
+ _maintenance_interval: float = 7200.0 # 每 2h 跑一次维护
146
+
147
+ def __init__(self, **config):
148
+ """
149
+ 参数(通过 config.yaml memory 节传入):
150
+
151
+ memory:
152
+ provider: keepsake
153
+ redis_host: 127.0.0.1
154
+ redis_port: 6379
155
+ top_k: 5
156
+ candidate_k: 10
157
+ tag_filter: ""
158
+ embedder:
159
+ provider: openai
160
+ api_key: sk-xxx
161
+ base_url: https://api.openai.com/v1
162
+ model: text-embedding-3-small
163
+ """
164
+ super().__init__()
165
+ self._config = config
166
+
167
+ # ------------------------------------------------------------------
168
+ # 配置合并
169
+ # ------------------------------------------------------------------
170
+
171
+ @staticmethod
172
+ def _resolve_config(inline_cfg: dict) -> dict:
173
+ """按优先级合并配置源,返回最终配置。
174
+
175
+ 合并顺序(后覆盖前): 默认值 ← JSON 文件 ← 环境变量 ← inline
176
+ inline = Hermes 的 config.yaml memory.keepsake 或 __init__ 传参
177
+ """
178
+ # 1. 硬编码默认值(不含 embedder — 由配置文件/环境变量按需开启)
179
+ cfg: dict = {
180
+ "redis_host": "127.0.0.1",
181
+ "redis_port": 6379,
182
+ "redis_password": "",
183
+ "top_k": 5,
184
+ "candidate_k": 10,
185
+ "tag_filter": "",
186
+ "synonym_min_word_freq": 10,
187
+ "synonym_jaccard_threshold": 0.5,
188
+ "synonym_min_co_occurrence": 3,
189
+ "entity_cooc_top_n": 3,
190
+ "entity_cooc_min_count": 2,
191
+ }
192
+
193
+ # 2. JSON 配置文件覆盖
194
+ json_cfg = _load_json_config()
195
+ cfg = _deep_merge(cfg, json_cfg)
196
+
197
+ # 3. 环境变量覆盖
198
+ env_overrides = {
199
+ "redis_host": os.environ.get("KEEPSAKE_REDIS_HOST"),
200
+ "redis_port": os.environ.get("KEEPSAKE_REDIS_PORT"),
201
+ "redis_password": os.environ.get("KEEPSAKE_REDIS_PASSWORD"),
202
+ "top_k": os.environ.get("KEEPSAKE_TOP_K"),
203
+ "candidate_k": os.environ.get("KEEPSAKE_CANDIDATE_K"),
204
+ "tag_filter": os.environ.get("KEEPSAKE_TAG_FILTER"),
205
+ "agent_id": os.environ.get("KEEPSAKE_AGENT_ID"),
206
+ "is_primary": os.environ.get("KEEPSAKE_IS_PRIMARY"),
207
+ }
208
+ for key, val in env_overrides.items():
209
+ if val is not None:
210
+ cfg[key] = val
211
+
212
+ # 4. inline(Hermes 传入的 config.yaml 配置)覆盖
213
+ cfg = _deep_merge(cfg, inline_cfg)
214
+
215
+ # 5. 验证 agent_id 必须配置
216
+ agent_id = cfg.get("agent_id")
217
+ if agent_id is None or agent_id == "":
218
+ raise ValueError("agent_id must be configured in config file, environment variable, or inline config")
219
+
220
+ # 6. 解析 is_primary,默认为 false
221
+ is_primary = cfg.get("is_primary", False)
222
+ if isinstance(is_primary, str):
223
+ is_primary = is_primary.lower() in ("true", "1", "yes", "on")
224
+ cfg["is_primary"] = bool(is_primary)
225
+
226
+ # 7. 加载 skip patterns 配置
227
+ # skip_min_length: int,默认 2,从 config.json 的 skip_min_length 读取
228
+ skip_min_length = cfg.get("skip_min_length", 2)
229
+ cfg["skip_min_length"] = skip_min_length
230
+
231
+ # skip_patterns_file: str,默认空字符串,从 config.json 的 skip_patterns_file 读取
232
+ skip_patterns_file = cfg.get("skip_patterns_file", "")
233
+ if skip_patterns_file:
234
+ skip_patterns_file = Path(skip_patterns_file).expanduser()
235
+ if skip_patterns_file.exists():
236
+ try:
237
+ with open(skip_patterns_file) as f:
238
+ patterns = set()
239
+ for line in f:
240
+ line = line.strip()
241
+ if line and not line.startswith("#"):
242
+ patterns.add(line.lower())
243
+ cfg["skip_patterns"] = patterns
244
+ except Exception as e:
245
+ logger.warning("keepsake: failed to load skip patterns from %s: %s", skip_patterns_file, e)
246
+ else:
247
+ cfg["skip_patterns"] = set()
248
+ else:
249
+ cfg["skip_patterns"] = set()
250
+
251
+ return cfg
252
+
253
+ def _should_search(self, query: str) -> bool:
254
+ """判断当前用户消息是否需要检索碎片。
255
+
256
+ 跳过条件:
257
+ 1. 长度 < skip_min_length(默认 2)
258
+ 2. query 精确匹配外部文件中的 skip pattern(忽略大小写)
259
+ """
260
+ q = query.strip()
261
+ min_len = int(getattr(self, '_skip_min_length', 2))
262
+ if len(q) < min_len:
263
+ return False
264
+ patterns = getattr(self, '_skip_patterns', [])
265
+ if q.lower() in patterns:
266
+ return False
267
+ return True
268
+
269
+ # ------------------------------------------------------------------
270
+ # MemoryProvider 接口
271
+ # ------------------------------------------------------------------
272
+
273
+ @property
274
+ def name(self) -> str:
275
+ return "keepsake"
276
+
277
+ def is_available(self) -> bool:
278
+ try:
279
+ import redis as _ # noqa: F401
280
+ except ImportError:
281
+ return False
282
+ return True
283
+
284
+ def initialize(self, session_id: str, **kwargs) -> None:
285
+ """初始化 — 加载配置、连接 Redis、自动创建 index。"""
286
+ cfg = self._resolve_config(self._config)
287
+
288
+ # 加载/重载 jieba 自定义词典(发 /new 时生效)
289
+ from .splitter import init_domain_dict
290
+ init_domain_dict()
291
+
292
+ redis_host = cfg.get("redis_host", "127.0.0.1")
293
+ redis_port = int(cfg.get("redis_port", 6379))
294
+ top_k = int(cfg.get("top_k", 5))
295
+ candidate_k = int(cfg.get("candidate_k", 10))
296
+ self._tag_filter = cfg.get("tag_filter", "")
297
+
298
+ embed_cfg = cfg.get("embedder", {})
299
+ embed_provider = embed_cfg.get("provider", "").strip().lower()
300
+ # 只有显式配置了 embedder provider 才创建,否则走 BM25-only 模式
301
+ if embed_provider and embed_provider not in ("", "default", "none"):
302
+ embedder = create_embedder(
303
+ provider=embed_cfg.get("provider", ""),
304
+ api_key=embed_cfg.get("api_key", ""),
305
+ base_url=embed_cfg.get("base_url", ""),
306
+ model=embed_cfg.get("model", ""),
307
+ )
308
+ embed_dim = embedder.dimension
309
+ logger.info(
310
+ "keepsake: embedder enabled (%s, dim=%d)",
311
+ embed_provider, embed_dim,
312
+ )
313
+ else:
314
+ embedder = None
315
+ embed_dim = 1536
316
+ logger.info("keepsake: BM25-only mode (no embedder configured)")
317
+
318
+ self._storage = RedisStorage(
319
+ embedder=embedder,
320
+ host=redis_host,
321
+ port=redis_port,
322
+ password=cfg.get("redis_password") or None,
323
+ candidate_count=candidate_k,
324
+ final_limit=top_k,
325
+ embed_dim=embed_dim,
326
+ bm25_limit=int(cfg.get("bm25_limit", 10)),
327
+ decay_half_days=int(cfg.get("decay_half_days", 60)),
328
+ embed_cache_ttl=int(cfg.get("embed_cache_ttl", 3600)),
329
+ sentiment_boost_positive=float(cfg.get("sentiment_boost_positive", 1.5)),
330
+ sentiment_boost_negative=float(cfg.get("sentiment_boost_negative", 1.3)),
331
+ feedback_positive_boost=float(cfg.get("feedback_positive_boost", 1.3)),
332
+ feedback_negative_penalty=float(cfg.get("feedback_negative_penalty", 0.5)),
333
+ hot_topic_boost=float(cfg.get("hot_topic_boost", 1.2)),
334
+ hot_topic_decay_half_days=int(cfg.get("hot_topic_decay_half_days", 30)),
335
+ emotion_intensity_factor=float(cfg.get("emotion_intensity_factor", 0.4)),
336
+ attention_boost_max=float(cfg.get("attention_boost_max", 1.5)),
337
+ attention_base_increment=float(cfg.get("attention_base_increment", 2.0)),
338
+ attention_emotion_factor=float(cfg.get("attention_emotion_factor", 1.5)),
339
+ agent_id=cfg.get("agent_id", ""),
340
+ is_primary=cfg.get("is_primary", False),
341
+ synonym_min_word_freq=int(cfg.get("synonym_min_word_freq", 10)),
342
+ synonym_jaccard_threshold=float(cfg.get("synonym_jaccard_threshold", 0.5)),
343
+ synonym_min_co_occurrence=int(cfg.get("synonym_min_co_occurrence", 3)),
344
+ entity_cooc_top_n=int(cfg.get("entity_cooc_top_n", 3)),
345
+ entity_cooc_min_count=int(cfg.get("entity_cooc_min_count", 2)),
346
+ )
347
+
348
+ # 自动创建/验证 index
349
+ if not self._storage.ensure_index():
350
+ logger.warning(
351
+ "keepsake: Redis / RediSearch not ready at %s:%s",
352
+ redis_host, redis_port,
353
+ )
354
+ return
355
+
356
+ self._initialized = True
357
+ logger.info(
358
+ "keepsake: connected (session=%s, top_k=%d, tag_filter=%s)",
359
+ session_id, top_k, self._tag_filter or "(none)",
360
+ )
361
+
362
+ # 初始化 skip patterns 配置
363
+ self._skip_min_length = cfg.get("skip_min_length", 2)
364
+ self._skip_patterns = cfg.get("skip_patterns", set())
365
+
366
+ # 初始化 Consolidator 和 Forgetter(守护模式)
367
+ self._consolidator = Consolidator(
368
+ storage=self._storage,
369
+ min_group_size=int(cfg.get("consolidate_min_group", 2)),
370
+ max_age_hours=int(cfg.get("consolidate_max_age_hours", 72)),
371
+ )
372
+ self._forgetter = Forgetter(
373
+ storage=self._storage,
374
+ max_age_days=int(cfg.get("forget_max_age_days", 30)),
375
+ dry_run=bool(cfg.get("forget_dry_run", True)),
376
+ )
377
+ logger.info("keepsake: maintenance engines initialized")
378
+
379
+ def system_prompt_block(self) -> str:
380
+ parts = [
381
+ "你有Keepsake记忆系统(keepsake),连接在 Redis + RediSearch 上。",
382
+ "当执行 memory(action='add') 操作时,系统会自动存储完整内容并支持后续检索。",
383
+ "相关的记忆条目就在下面「相关记忆」段落里,直接使用即可。",
384
+ "记忆综合排序 = BM25相似度 × 时间衰减 × 情感权重 × 反馈权重 × 热门话题权重。",
385
+ "正反馈用 keepsake_feedback(key, positive=True) 标记有用,",
386
+ "负反馈用 keepsake_feedback(key, positive=False) 标记没用。",
387
+ "热门话题用 keepsake_topics() 查询。",
388
+ ]
389
+ return "\n".join(parts)
390
+
391
+ def prefetch(self, query: str, *, session_id: str = "") -> str:
392
+ """根据用户消息检索相关碎片,注入到上下文。"""
393
+ if not self._should_search(query):
394
+ return ""
395
+
396
+ if not query or not self._storage:
397
+ return ""
398
+
399
+ import time as _time
400
+
401
+ # 检查工作流锁(复用 Redis client)
402
+ lock_client = None
403
+ try:
404
+ lock_client = self._storage._get_client()
405
+ if lock_client and lock_client.exists("keepsake:workflow_lock"):
406
+ logger.debug("keepsake: workflow lock active, skipping search")
407
+ return ""
408
+ except Exception:
409
+ pass
410
+
411
+ start = _time.time()
412
+ fragments = self._storage.search(
413
+ query.strip(),
414
+ tag_filter=self._tag_filter,
415
+ )
416
+ elapsed = _time.time() - start
417
+
418
+ if not fragments:
419
+ return ""
420
+
421
+ lines = ["<keepsake>"]
422
+ lines.append(f"# 相关记忆 (检索耗时 {elapsed:.1f}s)")
423
+ lines.append("")
424
+ for i, frag in enumerate(fragments, 1):
425
+ lines.append(f"[{i}] {frag.get('content', '')}")
426
+ tags = frag.get("tags", "")
427
+ combined = frag.get("_combined_score", 0)
428
+ weights = frag.get("_weights", {})
429
+ info_parts = []
430
+ if tags:
431
+ info_parts.append(f"标签: {tags}")
432
+ info_parts.append(f"综合: {combined:.2f}")
433
+ if weights:
434
+ info_parts.append(f"w: sim={weights.get('sim',0):.2f} decay={weights.get('decay',0):.2f} "
435
+ f"emotion={weights.get('emotion',1):.1f} fb={weights.get('feedback',1):.1f} "
436
+ f"hot={weights.get('hot_topic',1):.1f}")
437
+ # 情感标签可视化
438
+ sent_label = frag.get("sentiment_label", "")
439
+ if sent_label and sent_label != "neutral":
440
+ sent_score = frag.get("sentiment_score", "0")
441
+ icon = "😊" if sent_label == "positive" else "😠"
442
+ info_parts.append(f"{icon} {sent_label}({sent_score})")
443
+ lines.append(f" ({', '.join(info_parts)})")
444
+ lines.append("")
445
+
446
+ lines.append("</keepsake>")
447
+ return "\\n".join(lines)
448
+
449
+ def _maybe_maintain(self) -> None:
450
+ """检查是否该执行维护,执行 Consolidation + Forget。"""
451
+ import time as _time
452
+ now = _time.time()
453
+ if now - self._last_maintenance < self._maintenance_interval:
454
+ return
455
+ self._last_maintenance = now
456
+ self.maintenance()
457
+
458
+ def maintenance(self) -> Dict[str, Any]:
459
+ """执行一轮完整维护:Consolidation → Forget。
460
+
461
+ 返回:
462
+ 维护统计
463
+ """
464
+ stats: Dict[str, Any] = {
465
+ "consolidator": {"status": "skipped"},
466
+ "forgetter": {"status": "skipped"},
467
+ }
468
+
469
+ # Step 1: Consolidation
470
+ if self._consolidator:
471
+ try:
472
+ result = self._consolidator.consolidate()
473
+ stats["consolidator"] = result
474
+ logger.info("keepsake: consolidation done — %s", result)
475
+ except Exception as e:
476
+ logger.warning("keepsake: consolidation error: %s", e)
477
+ stats["consolidator"] = {"status": "error", "reason": str(e)}
478
+
479
+ # Step 2: Selective Forgetting
480
+ if self._forgetter:
481
+ try:
482
+ result = self._forgetter.forget()
483
+ stats["forgetter"] = result
484
+ logger.info("keepsake: forgetting done — %s", result)
485
+ except Exception as e:
486
+ logger.warning("keepsake: forgetting error: %s", e)
487
+ stats["forgetter"] = {"status": "error", "reason": str(e)}
488
+
489
+
490
+ return stats
491
+
492
+ def get_tool_schemas(self) -> List[Dict[str, Any]]:
493
+ return [FEEDBACK_SCHEMA, HOT_TOPICS_SCHEMA]
494
+
495
+ def handle_tool_call(
496
+ self,
497
+ tool_name: str,
498
+ args: Dict[str, Any],
499
+ **kwargs,
500
+ ) -> str:
501
+ """Route tool calls to the appropriate handler."""
502
+ import json as _json
503
+
504
+ if tool_name == "keepsake_feedback":
505
+ return self._handle_feedback(args, _json)
506
+ elif tool_name == "keepsake_topics":
507
+ return self._handle_hot_topics(args, _json)
508
+ return tool_error(f"Unknown keepsake memory tool: '{tool_name}'")
509
+
510
+ # ------------------------------------------------------------------
511
+ # Tool handlers
512
+ # ------------------------------------------------------------------
513
+
514
+ def _handle_feedback(self, args: Dict[str, Any], _json) -> str:
515
+ key = args.get("fragment_key", "")
516
+ is_pos = bool(args.get("is_positive", True))
517
+ if not key:
518
+ return tool_error("fragment_key is required")
519
+ if not self._storage:
520
+ return tool_error("Memory storage not initialized")
521
+ ok = self._storage.record_feedback(key, is_pos)
522
+ if ok:
523
+ action = "有用 👍" if is_pos else "没用 👎"
524
+ return _json.dumps({"success": True, "action": action, "key": key})
525
+ return tool_error("Failed to record feedback")
526
+
527
+ def _handle_hot_topics(self, args: Dict[str, Any], _json) -> str:
528
+ limit = min(int(args.get("limit", 10)), 30)
529
+ period = args.get("period", "all")
530
+ if not self._storage:
531
+ return tool_error("Memory storage not initialized")
532
+ topics = self._storage.get_hot_topics(limit=limit, period=period)
533
+ return _json.dumps({"topics": topics, "count": len(topics)}, ensure_ascii=False)
534
+
535
+ def shutdown(self) -> None:
536
+ if self._storage:
537
+ self._storage.close()
538
+ logger.info("keepsake memory provider shutdown")
539
+
540
+ def on_memory_write(
541
+ self,
542
+ action: str,
543
+ target: str,
544
+ content: str,
545
+ metadata: Optional[Dict[str, Any]] = None,
546
+ ) -> None:
547
+ """builtin memory 写入时同步存到碎片库(完整内容,不做切分)。"""
548
+ if action != "add" or not content or not self._storage:
549
+ return
550
+
551
+ raw_text = content.strip()
552
+ self._storage.store(
553
+ text=raw_text,
554
+ tags=target,
555
+ category="memory_tool",
556
+ source="hermes_agent",
557
+ fragment_type="memory",
558
+ )