keepsake-memory 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
keepsake/emotion.py ADDED
@@ -0,0 +1,136 @@
1
+ """情绪烈度分析 — 检测用户表达中的情绪强度,不判断正负。
2
+
3
+ 输入一段文本,返回情绪烈度值 (0.0~2.0),越高表示用户越激动。
4
+
5
+ 检测维度:
6
+ - 标点密度: !! 和 ?? 的数量
7
+ - 中文程度副词: 太、非常、极其、到底、完全、真的
8
+ - 重复字符: 啊啊啊、对对对、真的真的
9
+ - 反复问: 连续两个以上问句
10
+ - 英文全大写词 (English only)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import re
16
+ from typing import Tuple
17
+
18
+ # 中文程度副词(单个字 + 常见组合)
19
+ _INTENSITY_ADVERBS = frozenset({
20
+ "太", "超", "极", "巨", "贼", "老", "特", "爆",
21
+ "非常", "极其", "超级", "格外", "分外", "过于",
22
+ "无比", "绝顶", "十分", "相当", "特别",
23
+ "完全", "根本", "彻底", "绝对",
24
+ "真的", "真是", "简直", "实在",
25
+ "到底", "究竟", "明明",
26
+ })
27
+
28
+ # 重复字符检测(连续重复 3+ 次)
29
+ _RE_REPEATED_CHAR = re.compile(r"(.)\1{2,}")
30
+
31
+ # 重复词检测(连续重复 2+ 次)
32
+ _RE_REPEATED_WORD = re.compile(r"(.{2,4})\1{1,}")
33
+
34
+ # 问号/叹号簇
35
+ _RE_EXCLAMATION_CLUSTER = re.compile(r"!{2,}")
36
+ _RE_QUESTION_CLUSTER = re.compile(r"\?{2,}")
37
+ _RE_MIXED_PUNCT = re.compile(r"[!?!?]{2,}")
38
+
39
+ # 全大写英文词(至少 3 字母)
40
+ _RE_CAPS_WORD = re.compile(r"\b[A-Z]{3,}\b")
41
+
42
+ # 否定词(用于检测负面)
43
+ _NEGATORS = frozenset({"不", "没", "别", "勿", "无", "not", "no", "never"})
44
+
45
+
46
+ def analyze_emotion(text: str) -> Tuple[float, str]:
47
+ """分析一段文本的情绪烈度和情感极性。
48
+
49
+ 返回:
50
+ (intensity, label):
51
+ intensity: 0.0~2.0 情绪烈度
52
+ label: "strong_positive", "positive", "negative", "strong_negative", "neutral"
53
+ """
54
+ if not text or not text.strip():
55
+ return 0.0, "neutral"
56
+
57
+ raw = text.strip()
58
+
59
+ # ---- 1. 情绪烈度计算 ----
60
+ intensity = 0.0
61
+
62
+ # 1a: 感叹号/问号密度
63
+ excl = _RE_EXCLAMATION_CLUSTER.findall(raw)
64
+ qst = _RE_QUESTION_CLUSTER.findall(raw)
65
+ mixed = _RE_MIXED_PUNCT.findall(raw)
66
+
67
+ # 每个重复标点簇贡献 0.15,最多 0.6
68
+ punct_score = (len(excl) + len(qst) + len(mixed)) * 0.15
69
+ intensity += min(punct_score, 0.6)
70
+
71
+ # 混合 !? 连用额外加分(如 "真的吗?!")
72
+ for m in mixed:
73
+ if "!" in m and "?" in m:
74
+ intensity += 0.2
75
+
76
+ # 1b: 程度副词密度
77
+ adverb_hits = sum(1 for adv in _INTENSITY_ADVERBS if adv in raw)
78
+ intensity += min(adverb_hits * 0.2, 0.6)
79
+
80
+ # 1c: 重复字符(啊啊啊、对对对)
81
+ repeated_chars = _RE_REPEATED_CHAR.findall(raw)
82
+ intensity += min(len(repeated_chars) * 0.15, 0.3)
83
+
84
+ # 1d: 重复词(真的真的、完全完全)
85
+ repeated_words = _RE_REPEATED_WORD.findall(raw)
86
+ intensity += min(len(repeated_words) * 0.2, 0.4)
87
+
88
+ # 1e: 全大写英文词
89
+ caps_words = _RE_CAPS_WORD.findall(raw)
90
+ intensity += min(len(caps_words) * 0.15, 0.3)
91
+
92
+ # 1f: 反问+连续问句
93
+ q_markers = sum(1 for ch in raw if ch in ("?", "?"))
94
+ if q_markers >= 2:
95
+ intensity += min((q_markers - 1) * 0.1, 0.3)
96
+
97
+ # 裁剪到 0.0~2.0
98
+ intensity = max(0.0, min(2.0, intensity))
99
+
100
+ # ---- 2. 情感极性判断(弱化版关键词检测) ----
101
+ # 仍然检测关键词,但只影响极性标签,不决定强度
102
+ pos_kw = {"喜欢", "不错", "满意", "nice", "good", "great",
103
+ "太棒", "爱了", "绝了", "牛逼", "推荐", "值得"}
104
+ neg_kw = {"垃圾", "恶心", "讨厌", "烂", "烦", "失望", "差",
105
+ "bad", "terrible", "hate", "useless", "废物"}
106
+
107
+ text_lower = raw.lower()
108
+ pos_score = 0.0
109
+ neg_score = 0.0
110
+
111
+ for kw in pos_kw:
112
+ if kw in text_lower:
113
+ pos_score += 1.0
114
+
115
+ for kw in neg_kw:
116
+ if kw in text_lower:
117
+ neg_score += 1.0
118
+
119
+ # 否定词检测:作为独立词出现时才生效(排除"不错"里嵌入的"不")
120
+ negators_set = set()
121
+ if "不" in text_lower and not any(kw in text_lower for kw in ("不错", "不错过")):
122
+ negators_set.add("不")
123
+ for n in _NEGATORS:
124
+ if n in ("不",): # already handled
125
+ continue
126
+ if n in text_lower:
127
+ negators_set.add(n)
128
+
129
+ if pos_score > 0 and not negators_set:
130
+ label = "strong_positive" if intensity >= 1.2 else "positive"
131
+ elif neg_score > 0 or negators_set:
132
+ label = "strong_negative" if intensity >= 1.2 else "negative"
133
+ else:
134
+ label = "neutral"
135
+
136
+ return round(intensity, 4), label
keepsake/forgetter.py ADDED
@@ -0,0 +1,262 @@
1
+ """选择性遗忘 — 主动清理低价值碎片。
2
+
3
+ 价值判断维度:
4
+ 1. 年龄: 创建 > max_age_days 的碎片
5
+ 2. 反馈: feedback_score 为负或为零
6
+ 3. 情绪烈度: intensity 低(用户不激动的内容)
7
+ 4. 注意力: 从未被命中过高注意力话题
8
+ 5. 召回率: 从未被检索召回过(如果有关联字段追踪)
9
+
10
+ 只有多个维度同时低,才会被遗忘。防止误删有用信息。
11
+
12
+ 配置参数:
13
+ - max_age_days: 最大保留天数(默认 30)
14
+ - min_feedback_score: 最低反馈分(低于此值可遗忘,默认 0)
15
+ - batch_size: 每轮扫描数(默认 200)
16
+ - dry_run: 仅统计不删除(默认 True,安全模式)
17
+ - min_intensity: 最低情绪烈度(低于此值且其他维度也低才删,默认 0.3)
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import logging
23
+ from datetime import datetime, timezone
24
+ from typing import Any, Dict, List, Optional
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # 默认参数
29
+ DEFAULT_MAX_AGE_DAYS = 30
30
+ DEFAULT_MIN_FEEDBACK_SCORE = 0
31
+ DEFAULT_BATCH_SIZE = 200
32
+ DEFAULT_DRY_RUN = True
33
+ DEFAULT_MIN_INTENSITY = 0.3
34
+
35
+
36
+ class Forgetter:
37
+ """选择性遗忘引擎。"""
38
+
39
+ def __init__(
40
+ self,
41
+ storage: Any,
42
+ max_age_days: int = DEFAULT_MAX_AGE_DAYS,
43
+ min_feedback_score: int = DEFAULT_MIN_FEEDBACK_SCORE,
44
+ batch_size: int = DEFAULT_BATCH_SIZE,
45
+ dry_run: bool = DEFAULT_DRY_RUN,
46
+ min_intensity: float = DEFAULT_MIN_INTENSITY,
47
+ full_max_age_days: int = 60,
48
+ ):
49
+ self._storage = storage
50
+ self._max_age_days = max_age_days
51
+ self._min_feedback_score = min_feedback_score
52
+ self._batch_size = batch_size
53
+ self._dry_run = dry_run
54
+ self._min_intensity = min_intensity
55
+ self._full_max_age_days = full_max_age_days
56
+
57
+ def forget(self, force: bool = False) -> Dict[str, Any]:
58
+ """执行一轮遗忘操作。
59
+
60
+ 参数:
61
+ force: True 时忽略 dry_run 设置,实际删除
62
+
63
+ 返回:
64
+ 操作统计
65
+ """
66
+ client = self._storage._get_client()
67
+ if not client:
68
+ return {"status": "error", "reason": "Redis not available"}
69
+
70
+ stats = {
71
+ "scanned": 0,
72
+ "candidates": 0,
73
+ "deleted": 0,
74
+ "skipped_protected": 0,
75
+ "dry_run": self._dry_run and not force,
76
+ }
77
+
78
+ forgettable = self._find_forgettable(client, stats)
79
+
80
+ # 扫描完整记忆(memory:full:*),只按年龄判断
81
+ forgettable_full = self._find_forgettable_full(client, stats)
82
+ forgettable.extend(forgettable_full)
83
+
84
+ stats["candidates"] = len(forgettable)
85
+
86
+ if not forgettable:
87
+ return stats
88
+
89
+ if self._dry_run and not force:
90
+ # 只统计不删除
91
+ stats["deleted"] = 0
92
+ logger.info(
93
+ "forgetter: [DRY RUN] would delete %d fragments (skipped %d protected)",
94
+ len(forgettable), stats["skipped_protected"],
95
+ )
96
+ return stats
97
+
98
+ # 实际删除
99
+ deleted = 0
100
+ for key in forgettable:
101
+ try:
102
+ client.delete(key)
103
+ deleted += 1
104
+ except Exception as e:
105
+ logger.debug("forgetter: delete %s failed: %s", key, e)
106
+
107
+ stats["deleted"] = deleted
108
+ logger.info(
109
+ "forgetter: deleted %d/%d forgettable fragments",
110
+ deleted, len(forgettable),
111
+ )
112
+ return stats
113
+
114
+ def _find_forgettable(
115
+ self,
116
+ client,
117
+ stats: Dict[str, Any],
118
+ ) -> List[str]:
119
+ """扫描并筛选可遗忘的碎片。"""
120
+ now = datetime.now(timezone.utc)
121
+ cutoff_ts = now.timestamp() - self._max_age_days * 86400
122
+ forgettable_keys: List[str] = []
123
+
124
+ cursor = 0
125
+ protected = 0
126
+
127
+ while True:
128
+ cursor, keys = client.scan(
129
+ cursor=cursor,
130
+ match="memory:frag:*",
131
+ count=self._batch_size,
132
+ )
133
+
134
+ if not keys:
135
+ if cursor == 0:
136
+ break
137
+ continue
138
+
139
+ # 用 pipeline 批量 HMGET,减少网络往返
140
+ pipe = client.pipeline()
141
+ hmget_fields = ["created", "feedback_score", "sentiment_score",
142
+ "fragment_type", "source", "category", "content"]
143
+ for key_b in keys:
144
+ pipe.hmget(key_b, hmget_fields)
145
+ pipe_results = pipe.execute()
146
+
147
+ for key_b, fields in zip(keys, pipe_results):
148
+ key = key_b.decode("utf-8") if isinstance(key_b, bytes) else key_b
149
+ stats["scanned"] += 1
150
+
151
+ if not fields or not any(fields):
152
+ continue # key 不存在或空
153
+
154
+ def _d(v):
155
+ if v is None:
156
+ return ""
157
+ return v.decode("utf-8") if isinstance(v, bytes) else str(v)
158
+
159
+ created_str = _d(fields[0])
160
+ fb_str = _d(fields[1])
161
+ sent_str = _d(fields[2])
162
+ frag_type = _d(fields[3])
163
+ source = _d(fields[4])
164
+ category = _d(fields[5])
165
+ content = _d(fields[6])
166
+
167
+ # ---- 保护规则 ----
168
+ # 1. 不删 consolidated 碎片
169
+ if frag_type == "consolidated":
170
+ protected += 1
171
+ continue
172
+
173
+ # 2. 不删用户手动存的 memory
174
+ if source == "hermes_agent":
175
+ fb = self._parse_float(fb_str, 0)
176
+ if fb >= 0:
177
+ protected += 1
178
+ continue
179
+
180
+ # 3. 不删正反馈碎片
181
+ fb = self._parse_float(fb_str, 0)
182
+ if fb > self._min_feedback_score:
183
+ protected += 1
184
+ continue
185
+
186
+ # ---- 年龄检查 ----
187
+ if created_str:
188
+ try:
189
+ created_ts = datetime.fromisoformat(created_str).timestamp()
190
+ if created_ts > cutoff_ts:
191
+ continue # 还不够老
192
+ except (ValueError, TypeError):
193
+ pass
194
+
195
+ # ---- 情绪烈度检查 ----
196
+ intensity = self._parse_float(sent_str, 0)
197
+ if intensity >= self._min_intensity:
198
+ continue
199
+
200
+ # ---- 注意力检查 ----
201
+ if content:
202
+ try:
203
+ attn_w = self._storage.match_attention(content)
204
+ if attn_w and attn_w > 1.1:
205
+ continue # 高关注度话题,保留
206
+ except Exception:
207
+ pass
208
+
209
+ # 所有条件都满足 → 可遗忘
210
+ forgettable_keys.append(key)
211
+
212
+ if cursor == 0:
213
+ break
214
+
215
+ stats["skipped_protected"] = protected
216
+ return forgettable_keys
217
+
218
+ @staticmethod
219
+ def _parse_float(val, default: float = 0.0) -> float:
220
+ """安全转 float。"""
221
+ if val is None or val == "":
222
+ return default
223
+ try:
224
+ return float(val)
225
+ except (ValueError, TypeError):
226
+ return default
227
+
228
+ def _find_forgettable_full(
229
+ self,
230
+ client,
231
+ stats: Dict[str, Any],
232
+ ) -> List[str]:
233
+ """扫描完整记忆(memory:full:*),只按年龄判断是否可遗忘。"""
234
+ now = datetime.now(timezone.utc)
235
+ cutoff_ts = now.timestamp() - self._full_max_age_days * 86400
236
+ forgettable_keys: List[str] = []
237
+
238
+ cursor = 0
239
+ while True:
240
+ cursor, keys = client.scan(
241
+ cursor=cursor,
242
+ match="memory:full:*",
243
+ count=self._batch_size,
244
+ )
245
+ for key_b in keys:
246
+ key = key_b.decode("utf-8") if isinstance(key_b, bytes) else key_b
247
+ stats["scanned"] += 1
248
+ try:
249
+ created_data = client.hget(key, "last_accessed") or client.hget(key, "created")
250
+ if not created_data:
251
+ continue
252
+ created_ts = float(created_data)
253
+ if created_ts > cutoff_ts:
254
+ continue # 最近被访问过或创建不久,保留
255
+ forgettable_keys.append(key)
256
+ except Exception as e:
257
+ logger.debug("forgetter: skip full memory key %s: %s", key, e)
258
+ continue
259
+ if cursor == 0:
260
+ break
261
+
262
+ return forgettable_keys
keepsake/py.typed ADDED
File without changes