@scotthuang/engram 0.9.8 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/profile.d.ts +75 -10
- package/dist/profile.js +514 -118
- package/dist/profile.js.map +1 -1
- package/dist/settle.d.ts +2 -0
- package/dist/settle.js +111 -24
- package/dist/settle.js.map +1 -1
- package/openclaw.plugin.json +12 -1
- package/package.json +1 -1
package/dist/profile.js
CHANGED
|
@@ -1,66 +1,200 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Memory System Plugin - Profile (
|
|
2
|
+
* Memory System Plugin - Profile (四层语义画像)
|
|
3
3
|
*
|
|
4
|
-
*
|
|
4
|
+
* 四层架构:
|
|
5
5
|
* identity — 核心身份(姓名/城市/职业/家人),几乎不变,不衰减
|
|
6
6
|
* pattern — 行为模式(作息/饮食习惯/工作风格),统计驱动,慢衰减
|
|
7
7
|
* interest — 动态兴趣(当前项目/近期关注),高频变化,快衰减
|
|
8
|
+
* event — 一次性事件(排查/bug/配置),带 TTL,短期内自动消除
|
|
8
9
|
*
|
|
9
|
-
* 画像 JSON 结构 + 读写 + 分层衰减 + 压缩摘要
|
|
10
|
+
* 画像 JSON 结构 + 读写 + 分层衰减 + 压缩摘要 + 受控维度 + LLM 自审
|
|
10
11
|
*/
|
|
11
12
|
import { promises as fs } from "node:fs";
|
|
12
13
|
import { logger } from "./logger.js";
|
|
13
14
|
import { join } from "node:path";
|
|
14
|
-
/**
|
|
15
|
+
/** 各层的衰减因子、淘汰阈值、TTL 天数 */
|
|
15
16
|
const LAYER_CONFIG = {
|
|
16
|
-
identity: { decayFactor: 1.0, pruneThreshold: 0.1, defaultConfidence: 0.95 },
|
|
17
|
-
pattern: { decayFactor: 0.995, pruneThreshold: 0.3, defaultConfidence: 0.7 },
|
|
18
|
-
interest: { decayFactor: 0.95, pruneThreshold: 0.
|
|
17
|
+
identity: { decayFactor: 1.0, pruneThreshold: 0.1, defaultConfidence: 0.95, ttlDays: Infinity },
|
|
18
|
+
pattern: { decayFactor: 0.995, pruneThreshold: 0.3, defaultConfidence: 0.7, ttlDays: 180 },
|
|
19
|
+
interest: { decayFactor: 0.95, pruneThreshold: 0.25, defaultConfidence: 0.7, ttlDays: 30 },
|
|
20
|
+
event: { decayFactor: 0.85, pruneThreshold: 0.3, defaultConfidence: 0.55, ttlDays: 7 },
|
|
19
21
|
};
|
|
22
|
+
const LAYER_PRIORITY = {
|
|
23
|
+
identity: 4,
|
|
24
|
+
pattern: 3,
|
|
25
|
+
interest: 2,
|
|
26
|
+
event: 1,
|
|
27
|
+
};
|
|
28
|
+
// ============================================================================
|
|
29
|
+
// 受控维度表(Canonical Vocabulary)
|
|
30
|
+
// ============================================================================
|
|
31
|
+
/**
|
|
32
|
+
* 受控维度表:settle 时 LLM 只能从这些维度里选
|
|
33
|
+
* 目的:消除"技术/技术调试/技术运维/技术/调试"等近义维度碎片
|
|
34
|
+
*/
|
|
35
|
+
export const CANONICAL_DIMENSIONS = [
|
|
36
|
+
"身份", // 姓名、职业、家人
|
|
37
|
+
"作息", // 睡眠/饮食/运动规律
|
|
38
|
+
"技术", // 编程、技术栈、工具偏好
|
|
39
|
+
"项目", // 在做的/长期的项目
|
|
40
|
+
"兴趣", // 爱好、关注领域
|
|
41
|
+
"偏好", // 交互风格、决策模式
|
|
42
|
+
"人际", // 家庭、重要联系人
|
|
43
|
+
];
|
|
44
|
+
/**
|
|
45
|
+
* 维度别名映射:旧维度名(LLM 自由发挥产生的碎片)→ 受控维度
|
|
46
|
+
* 用于历史清理 + 兼容 LLM 偶尔的越界产出
|
|
47
|
+
*/
|
|
48
|
+
const DIMENSION_ALIAS = {
|
|
49
|
+
// 技术族
|
|
50
|
+
技术行为: "技术",
|
|
51
|
+
技术工具: "技术",
|
|
52
|
+
技术方向: "技术",
|
|
53
|
+
技术开发: "技术",
|
|
54
|
+
技术排查: "技术",
|
|
55
|
+
技术领域: "技术",
|
|
56
|
+
技术配置: "技术",
|
|
57
|
+
技术问题排查: "技术",
|
|
58
|
+
技术调试: "技术",
|
|
59
|
+
技术运维: "技术",
|
|
60
|
+
技术操作: "技术",
|
|
61
|
+
技术测试: "技术",
|
|
62
|
+
技术探索: "技术",
|
|
63
|
+
技术实践: "技术",
|
|
64
|
+
技术活动: "技术",
|
|
65
|
+
技术文档: "技术",
|
|
66
|
+
"技术/开发": "技术",
|
|
67
|
+
"技术/运维": "技术",
|
|
68
|
+
"技术/调试": "技术",
|
|
69
|
+
"技术/项目": "项目",
|
|
70
|
+
配置优化: "技术",
|
|
71
|
+
系统维护: "技术",
|
|
72
|
+
系统配置: "技术",
|
|
73
|
+
自动化运维: "技术",
|
|
74
|
+
功能规则: "技术",
|
|
75
|
+
新闻工具: "技术",
|
|
76
|
+
数据分析: "技术",
|
|
77
|
+
网络工具: "技术",
|
|
78
|
+
通信技术: "技术",
|
|
79
|
+
配置文件管理: "技术",
|
|
80
|
+
调试: "技术",
|
|
81
|
+
"调试/排查": "技术",
|
|
82
|
+
问题修复: "技术",
|
|
83
|
+
系统清理: "技术",
|
|
84
|
+
工具: "技术",
|
|
85
|
+
"工具/平台": "技术",
|
|
86
|
+
"工具/框架": "技术",
|
|
87
|
+
AI工具: "技术",
|
|
88
|
+
AI平台: "技术",
|
|
89
|
+
AI模型: "技术",
|
|
90
|
+
AI技术: "技术",
|
|
91
|
+
"AI/产品": "技术",
|
|
92
|
+
AI: "技术",
|
|
93
|
+
// 项目族
|
|
94
|
+
应用场景: "项目",
|
|
95
|
+
任务类型: "项目",
|
|
96
|
+
协作规划: "项目",
|
|
97
|
+
规划: "项目",
|
|
98
|
+
活动: "项目",
|
|
99
|
+
// 作息族
|
|
100
|
+
生活作息: "作息",
|
|
101
|
+
个人习惯: "作息",
|
|
102
|
+
// 兴趣族
|
|
103
|
+
兴趣爱好: "兴趣",
|
|
104
|
+
新闻资讯: "兴趣",
|
|
105
|
+
金融资讯: "兴趣",
|
|
106
|
+
科技新闻: "兴趣",
|
|
107
|
+
行业新闻: "兴趣",
|
|
108
|
+
资讯: "兴趣",
|
|
109
|
+
热点资讯: "兴趣",
|
|
110
|
+
信息获取: "兴趣",
|
|
111
|
+
投资: "兴趣",
|
|
112
|
+
// 偏好族
|
|
113
|
+
决策风格: "偏好",
|
|
114
|
+
生活: "偏好",
|
|
115
|
+
// 人际族
|
|
116
|
+
家庭: "人际",
|
|
117
|
+
// 非画像(降级为 event 或丢弃)
|
|
118
|
+
出行: "项目",
|
|
119
|
+
交通: "项目",
|
|
120
|
+
"生活/计划": "项目",
|
|
121
|
+
出行计划: "项目",
|
|
122
|
+
旅行出行: "项目",
|
|
123
|
+
地域活动: "项目",
|
|
124
|
+
地点关注: "项目",
|
|
125
|
+
工作动态: "项目",
|
|
126
|
+
// 位置类(会被特殊处理到 locations 字段,保留为 project 兜底)
|
|
127
|
+
位置: "项目",
|
|
128
|
+
};
|
|
129
|
+
/**
|
|
130
|
+
* 识别"事件类"关键词:命中时强制降级为 event 层
|
|
131
|
+
* 解决一次性排查/修复被误写成长期 interest 的问题
|
|
132
|
+
*/
|
|
133
|
+
const EVENT_KEYWORDS = [
|
|
134
|
+
"排查",
|
|
135
|
+
"修复",
|
|
136
|
+
"bug",
|
|
137
|
+
"Bug",
|
|
138
|
+
"BUG",
|
|
139
|
+
"调试",
|
|
140
|
+
"debug",
|
|
141
|
+
"验证",
|
|
142
|
+
"测试",
|
|
143
|
+
"超时",
|
|
144
|
+
"SIGTERM",
|
|
145
|
+
"报错",
|
|
146
|
+
"故障",
|
|
147
|
+
"异常",
|
|
148
|
+
"清理",
|
|
149
|
+
"恢复",
|
|
150
|
+
];
|
|
151
|
+
export function isLikelyEvent(value) {
|
|
152
|
+
return EVENT_KEYWORDS.some((kw) => value.includes(kw));
|
|
153
|
+
}
|
|
154
|
+
/**
|
|
155
|
+
* 受控维度规范化:将任意维度名映射到 CANONICAL_DIMENSIONS
|
|
156
|
+
* - 完全匹配 → 直接返回
|
|
157
|
+
* - 有别名 → 返回别名映射
|
|
158
|
+
* - 前缀匹配受控维度 → 映射到该维度
|
|
159
|
+
* - 其他 → 返回 null,由调用方决定丢弃或归为"其他"
|
|
160
|
+
*/
|
|
161
|
+
export function canonicalizeDimension(dim) {
|
|
162
|
+
const trimmed = dim.trim();
|
|
163
|
+
if (CANONICAL_DIMENSIONS.includes(trimmed)) {
|
|
164
|
+
return trimmed;
|
|
165
|
+
}
|
|
166
|
+
if (DIMENSION_ALIAS[trimmed])
|
|
167
|
+
return DIMENSION_ALIAS[trimmed];
|
|
168
|
+
// 前缀匹配:例如"技术/xxx"、"项目/xxx"
|
|
169
|
+
for (const canon of CANONICAL_DIMENSIONS) {
|
|
170
|
+
if (trimmed.startsWith(canon))
|
|
171
|
+
return canon;
|
|
172
|
+
}
|
|
173
|
+
return null;
|
|
174
|
+
}
|
|
20
175
|
export const EMPTY_PROFILE = {
|
|
21
176
|
summary: "",
|
|
22
177
|
coreTags: [],
|
|
23
178
|
tags: {},
|
|
179
|
+
locations: { recent: [] },
|
|
24
180
|
updatedAt: new Date().toISOString(),
|
|
25
181
|
};
|
|
182
|
+
// ============================================================================
|
|
183
|
+
// 工具函数
|
|
184
|
+
// ============================================================================
|
|
26
185
|
/** 获取标签的 layer(兼容旧数据) */
|
|
27
186
|
function getLayer(tag) {
|
|
28
187
|
return tag.layer || "interest";
|
|
29
188
|
}
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
"技术领域": "技术",
|
|
40
|
-
"技术配置": "技术",
|
|
41
|
-
"技术问题排查": "技术",
|
|
42
|
-
"配置优化": "技术",
|
|
43
|
-
"系统维护": "技术",
|
|
44
|
-
"自动化运维": "技术",
|
|
45
|
-
"功能规则": "技术",
|
|
46
|
-
"新闻工具": "技术",
|
|
47
|
-
"数据分析": "技术",
|
|
48
|
-
"出行计划": "出行",
|
|
49
|
-
"旅行出行": "出行",
|
|
50
|
-
"生活作息": "作息",
|
|
51
|
-
"个人习惯": "生活",
|
|
52
|
-
"决策风格": "生活",
|
|
53
|
-
"应用场景": "项目",
|
|
54
|
-
};
|
|
55
|
-
/** 语义去重组:[保留项, ...要合并删除的等价项] */
|
|
56
|
-
const DEDUP_GROUPS = [
|
|
57
|
-
["talk-to-shadow", "talk-to-shadow语音交互项目", "talk-to-shadow方案"],
|
|
58
|
-
["声纹识别", "声纹识别逻辑修正", "置信度阈值调试"],
|
|
59
|
-
["语音交互/声纹识别", "TTS语音合成", "语音系统优化", "语音引擎切换"],
|
|
60
|
-
["家庭成员身份识别", "家庭成员声纹识别", "陌生人识别规则优化"],
|
|
61
|
-
["news-knowledge-base", "新闻知识库开发维护", "新闻知识库项目管理者", "AI-Agent新闻整理"],
|
|
62
|
-
["AI记忆系统研究者", "LLM应用技术关注者"],
|
|
63
|
-
];
|
|
189
|
+
/** 计算两个 ISO 日期之间的天数差 */
|
|
190
|
+
function daysBetween(iso1, iso2 = new Date().toISOString()) {
|
|
191
|
+
const d1 = new Date(iso1).getTime();
|
|
192
|
+
const d2 = new Date(iso2).getTime();
|
|
193
|
+
return Math.max(0, (d2 - d1) / 86400000);
|
|
194
|
+
}
|
|
195
|
+
// ============================================================================
|
|
196
|
+
// ProfileManager
|
|
197
|
+
// ============================================================================
|
|
64
198
|
export class ProfileManager {
|
|
65
199
|
profile = null;
|
|
66
200
|
profilePath;
|
|
@@ -75,14 +209,18 @@ export class ProfileManager {
|
|
|
75
209
|
return this.profile;
|
|
76
210
|
try {
|
|
77
211
|
const raw = await fs.readFile(this.profilePath, "utf-8");
|
|
78
|
-
|
|
212
|
+
const parsed = JSON.parse(raw);
|
|
213
|
+
// 向后兼容:旧文件没有 locations 字段
|
|
214
|
+
if (!parsed.locations)
|
|
215
|
+
parsed.locations = { recent: [] };
|
|
216
|
+
this.profile = parsed;
|
|
79
217
|
const tagCount = Object.values(this.profile.tags).reduce((sum, tags) => sum + tags.length, 0);
|
|
80
218
|
const layerCounts = this.countByLayer(this.profile);
|
|
81
|
-
logger.info(`[engram:profile] Loaded profile: ${tagCount} tags (identity=${layerCounts.identity} pattern=${layerCounts.pattern} interest=${layerCounts.interest}), ${Object.keys(this.profile.tags).length} dimensions, coreTags=[${this.profile.coreTags.join(", ")}]`);
|
|
219
|
+
logger.info(`[engram:profile] Loaded profile: ${tagCount} tags (identity=${layerCounts.identity} pattern=${layerCounts.pattern} interest=${layerCounts.interest} event=${layerCounts.event}), ${Object.keys(this.profile.tags).length} dimensions, coreTags=[${this.profile.coreTags.join(", ")}]`);
|
|
82
220
|
}
|
|
83
221
|
catch {
|
|
84
222
|
logger.info(`[engram:profile] Profile not found at ${this.profilePath}, using empty profile`);
|
|
85
|
-
this.profile = { ...EMPTY_PROFILE };
|
|
223
|
+
this.profile = { ...EMPTY_PROFILE, locations: { recent: [] } };
|
|
86
224
|
}
|
|
87
225
|
return this.profile;
|
|
88
226
|
}
|
|
@@ -101,7 +239,12 @@ export class ProfileManager {
|
|
|
101
239
|
}
|
|
102
240
|
/** 统计各层标签数量 */
|
|
103
241
|
countByLayer(profile) {
|
|
104
|
-
const counts = {
|
|
242
|
+
const counts = {
|
|
243
|
+
identity: 0,
|
|
244
|
+
pattern: 0,
|
|
245
|
+
interest: 0,
|
|
246
|
+
event: 0,
|
|
247
|
+
};
|
|
105
248
|
for (const tags of Object.values(profile.tags)) {
|
|
106
249
|
for (const t of tags) {
|
|
107
250
|
counts[getLayer(t)]++;
|
|
@@ -111,16 +254,19 @@ export class ProfileManager {
|
|
|
111
254
|
}
|
|
112
255
|
/**
|
|
113
256
|
* 获取召回用的摘要信息(控制 token 消耗)
|
|
114
|
-
* 优先展示
|
|
257
|
+
* 优先展示 summary + locations + coreTags
|
|
115
258
|
*/
|
|
116
259
|
getRecallContext(profile) {
|
|
117
|
-
if (!profile.summary && profile.coreTags.length === 0) {
|
|
260
|
+
if (!profile.summary && profile.coreTags.length === 0 && !profile.locations?.primary) {
|
|
118
261
|
return "";
|
|
119
262
|
}
|
|
120
263
|
const parts = [];
|
|
121
264
|
if (profile.summary) {
|
|
122
265
|
parts.push(`【用户画像】${profile.summary}`);
|
|
123
266
|
}
|
|
267
|
+
if (profile.locations?.primary) {
|
|
268
|
+
parts.push(`【常驻地】${profile.locations.primary}`);
|
|
269
|
+
}
|
|
124
270
|
if (profile.coreTags.length > 0) {
|
|
125
271
|
parts.push(`【核心标签】${profile.coreTags.join(", ")}`);
|
|
126
272
|
}
|
|
@@ -128,95 +274,122 @@ export class ProfileManager {
|
|
|
128
274
|
}
|
|
129
275
|
/**
|
|
130
276
|
* 添加标签(增量更新,默认 layer="interest")
|
|
277
|
+
* 新增:
|
|
278
|
+
* - 若 dimension 不在受控词表里,尝试 canonicalize,失败则丢弃
|
|
279
|
+
* - 若 value 命中事件关键词,强制降级为 event 层
|
|
131
280
|
*/
|
|
132
281
|
addTag(profile, dimension, value, layer = "interest") {
|
|
133
|
-
|
|
134
|
-
|
|
282
|
+
// 受控维度规范化
|
|
283
|
+
const canon = canonicalizeDimension(dimension);
|
|
284
|
+
if (!canon) {
|
|
285
|
+
logger.info(`[engram:profile] addTag: skip uncontrolled dimension "${dimension}" for value "${value}"`);
|
|
286
|
+
return profile;
|
|
135
287
|
}
|
|
136
|
-
|
|
288
|
+
// 事件类关键词强制降级为 event 层(除非 LLM 明确声明 identity/pattern)
|
|
289
|
+
let finalLayer = layer;
|
|
290
|
+
if (layer === "interest" && isLikelyEvent(value)) {
|
|
291
|
+
finalLayer = "event";
|
|
292
|
+
logger.info(`[engram:profile] addTag: "${value}" demoted to event layer (keyword match)`);
|
|
293
|
+
}
|
|
294
|
+
if (!profile.tags[canon]) {
|
|
295
|
+
profile.tags[canon] = [];
|
|
296
|
+
}
|
|
297
|
+
const existing = profile.tags[canon].find((t) => t.value === value);
|
|
137
298
|
if (existing) {
|
|
138
299
|
existing.confidence = Math.min(1.0, existing.confidence + 0.1);
|
|
139
300
|
existing.lastSeen = new Date().toISOString();
|
|
140
|
-
// 如果已有标签被提升层级(如 interest → identity),更新 layer
|
|
141
301
|
const existingLayer = getLayer(existing);
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
logger.info(`[engram:profile] Tag "${value}" promoted: ${existingLayer} → ${layer}`);
|
|
302
|
+
if (LAYER_PRIORITY[finalLayer] > LAYER_PRIORITY[existingLayer]) {
|
|
303
|
+
existing.layer = finalLayer;
|
|
304
|
+
logger.info(`[engram:profile] Tag "${value}" promoted: ${existingLayer} → ${finalLayer}`);
|
|
146
305
|
}
|
|
147
306
|
}
|
|
148
307
|
else {
|
|
149
|
-
const cfg = LAYER_CONFIG[
|
|
150
|
-
profile.tags[
|
|
308
|
+
const cfg = LAYER_CONFIG[finalLayer];
|
|
309
|
+
profile.tags[canon].push({
|
|
151
310
|
value,
|
|
152
311
|
confidence: cfg.defaultConfidence,
|
|
153
312
|
lastSeen: new Date().toISOString(),
|
|
154
|
-
layer,
|
|
313
|
+
layer: finalLayer,
|
|
155
314
|
});
|
|
156
315
|
}
|
|
157
316
|
return profile;
|
|
158
317
|
}
|
|
159
318
|
/**
|
|
160
319
|
* 分层衰减标签置信度
|
|
161
|
-
* identity 不衰减,pattern 慢衰减(0.995),interest 快衰减(0.95)
|
|
320
|
+
* identity 不衰减,pattern 慢衰减(0.995),interest 快衰减(0.95),event 最快(0.85)
|
|
321
|
+
* 同时基于 TTL 清理过期条目(event: 7d, interest: 30d, pattern: 180d)
|
|
162
322
|
*/
|
|
163
323
|
decayTags(profile, factor) {
|
|
164
324
|
let decayed = 0;
|
|
165
|
-
let
|
|
325
|
+
let prunedByConfidence = 0;
|
|
326
|
+
let prunedByTtl = 0;
|
|
166
327
|
const layerStats = {
|
|
167
|
-
identity:
|
|
168
|
-
pattern:
|
|
169
|
-
interest:
|
|
328
|
+
identity: 0,
|
|
329
|
+
pattern: 0,
|
|
330
|
+
interest: 0,
|
|
331
|
+
event: 0,
|
|
170
332
|
};
|
|
171
333
|
for (const dimension of Object.keys(profile.tags)) {
|
|
172
334
|
const before = profile.tags[dimension].length;
|
|
173
335
|
profile.tags[dimension] = profile.tags[dimension]
|
|
174
|
-
.map(t => {
|
|
336
|
+
.map((t) => {
|
|
175
337
|
const layer = getLayer(t);
|
|
176
338
|
const cfg = LAYER_CONFIG[layer];
|
|
177
|
-
//
|
|
178
|
-
// 如果没传 factor,全部用各层配置
|
|
339
|
+
// 调用方传了 factor 时仅对 interest 生效
|
|
179
340
|
const actualFactor = factor !== undefined && layer === "interest" ? factor : cfg.decayFactor;
|
|
180
341
|
return { ...t, confidence: t.confidence * actualFactor };
|
|
181
342
|
})
|
|
182
|
-
.filter(t => {
|
|
343
|
+
.filter((t) => {
|
|
183
344
|
const layer = getLayer(t);
|
|
184
345
|
const cfg = LAYER_CONFIG[layer];
|
|
185
|
-
|
|
346
|
+
// TTL 剪枝
|
|
347
|
+
if (Number.isFinite(cfg.ttlDays) && daysBetween(t.lastSeen) > cfg.ttlDays) {
|
|
348
|
+
prunedByTtl++;
|
|
349
|
+
return false;
|
|
350
|
+
}
|
|
351
|
+
// 置信度剪枝
|
|
352
|
+
if (t.confidence <= cfg.pruneThreshold) {
|
|
353
|
+
prunedByConfidence++;
|
|
354
|
+
return false;
|
|
355
|
+
}
|
|
356
|
+
return true;
|
|
186
357
|
});
|
|
187
|
-
// 统计
|
|
188
358
|
for (const t of profile.tags[dimension]) {
|
|
189
|
-
layerStats[getLayer(t)]
|
|
359
|
+
layerStats[getLayer(t)]++;
|
|
190
360
|
}
|
|
191
|
-
const prunedCount = before - profile.tags[dimension].length;
|
|
192
|
-
pruned += prunedCount;
|
|
193
361
|
decayed += profile.tags[dimension].length;
|
|
194
362
|
if (profile.tags[dimension].length === 0) {
|
|
195
363
|
delete profile.tags[dimension];
|
|
196
364
|
}
|
|
365
|
+
void before;
|
|
197
366
|
}
|
|
198
|
-
logger.info(`[engram:profile] decayTags: identity=${layerStats.identity
|
|
367
|
+
logger.info(`[engram:profile] decayTags: kept identity=${layerStats.identity} pattern=${layerStats.pattern} interest=${layerStats.interest} event=${layerStats.event}, pruned=${prunedByConfidence}(low-conf) + ${prunedByTtl}(TTL)`);
|
|
368
|
+
void decayed;
|
|
199
369
|
return profile;
|
|
200
370
|
}
|
|
201
371
|
/**
|
|
202
|
-
* 生成 coreTags:优先 identity → pattern → interest
|
|
372
|
+
* 生成 coreTags:优先 identity → pattern → interest,event 不计入
|
|
373
|
+
* 同时跳过纯经纬度和地址(交由 locations 字段独立展示)
|
|
203
374
|
*/
|
|
204
375
|
generateCoreTags(profile) {
|
|
205
376
|
const allTags = [];
|
|
206
377
|
for (const tags of Object.values(profile.tags)) {
|
|
207
378
|
for (const t of tags) {
|
|
208
|
-
|
|
379
|
+
const layer = getLayer(t);
|
|
380
|
+
if (layer === "event")
|
|
381
|
+
continue; // event 不参与 coreTags
|
|
382
|
+
if (isLikelyCoordinate(t.value))
|
|
383
|
+
continue; // 过滤纯经纬度
|
|
384
|
+
allTags.push({ value: t.value, confidence: t.confidence, layer });
|
|
209
385
|
}
|
|
210
386
|
}
|
|
211
|
-
// 排序:identity 优先 > pattern > interest,同层内按 confidence 降序
|
|
212
|
-
const layerPriority = { identity: 3, pattern: 2, interest: 1 };
|
|
213
387
|
allTags.sort((a, b) => {
|
|
214
|
-
const layerDiff =
|
|
388
|
+
const layerDiff = LAYER_PRIORITY[b.layer] - LAYER_PRIORITY[a.layer];
|
|
215
389
|
if (layerDiff !== 0)
|
|
216
390
|
return layerDiff;
|
|
217
391
|
return b.confidence - a.confidence;
|
|
218
392
|
});
|
|
219
|
-
// 去重,取前 10 个
|
|
220
393
|
const seen = new Set();
|
|
221
394
|
const result = [];
|
|
222
395
|
for (const t of allTags) {
|
|
@@ -224,37 +397,40 @@ export class ProfileManager {
|
|
|
224
397
|
continue;
|
|
225
398
|
seen.add(t.value);
|
|
226
399
|
result.push(t.value);
|
|
227
|
-
if (result.length >=
|
|
228
|
-
break;
|
|
400
|
+
if (result.length >= 8)
|
|
401
|
+
break; // 从 10 收紧到 8
|
|
229
402
|
}
|
|
230
403
|
return result;
|
|
231
404
|
}
|
|
232
405
|
/**
|
|
233
|
-
* 维度归一化:合并碎片维度 +
|
|
234
|
-
* 在月度 settle 中调用,防止 interest 更新时 LLM 自由发挥维度名导致碎片化
|
|
406
|
+
* 维度归一化:合并碎片维度 + 按受控表收敛 + 清理空维度
|
|
235
407
|
*
|
|
236
|
-
* @returns 统计信息 { merged, deduped, emptied }
|
|
408
|
+
* @returns 统计信息 { merged, deduped, emptied, demotedToEvent }
|
|
237
409
|
*/
|
|
238
410
|
normalizeDimensions(profile) {
|
|
239
411
|
let merged = 0;
|
|
240
412
|
let deduped = 0;
|
|
241
413
|
let emptied = 0;
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
414
|
+
let demotedToEvent = 0;
|
|
415
|
+
// ---- 1. 维度合并到受控表 ----
|
|
416
|
+
const dimsToProcess = Object.keys(profile.tags);
|
|
417
|
+
for (const srcDim of dimsToProcess) {
|
|
418
|
+
const targetDim = canonicalizeDimension(srcDim);
|
|
419
|
+
if (!targetDim || targetDim === srcDim)
|
|
245
420
|
continue;
|
|
246
421
|
if (!profile.tags[targetDim])
|
|
247
422
|
profile.tags[targetDim] = [];
|
|
248
423
|
for (const tag of profile.tags[srcDim]) {
|
|
249
|
-
const existing = profile.tags[targetDim].find(t => t.value === tag.value);
|
|
424
|
+
const existing = profile.tags[targetDim].find((t) => t.value === tag.value);
|
|
250
425
|
if (existing) {
|
|
251
426
|
if (tag.confidence > existing.confidence) {
|
|
252
427
|
existing.confidence = tag.confidence;
|
|
253
428
|
existing.lastSeen = tag.lastSeen;
|
|
254
429
|
}
|
|
255
|
-
const
|
|
256
|
-
|
|
257
|
-
|
|
430
|
+
const tagLayer = tag.layer || "interest";
|
|
431
|
+
const existingLayer = existing.layer || "interest";
|
|
432
|
+
if (LAYER_PRIORITY[tagLayer] > LAYER_PRIORITY[existingLayer]) {
|
|
433
|
+
existing.layer = tagLayer;
|
|
258
434
|
}
|
|
259
435
|
}
|
|
260
436
|
else {
|
|
@@ -264,46 +440,266 @@ export class ProfileManager {
|
|
|
264
440
|
}
|
|
265
441
|
delete profile.tags[srcDim];
|
|
266
442
|
}
|
|
267
|
-
// ----
|
|
268
|
-
for (const
|
|
269
|
-
const [
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
if (removeTags.length > 0) {
|
|
275
|
-
if (keepTag) {
|
|
276
|
-
for (const rt of removeTags) {
|
|
277
|
-
if (rt.confidence > keepTag.confidence)
|
|
278
|
-
keepTag.confidence = rt.confidence;
|
|
279
|
-
if (rt.lastSeen > keepTag.lastSeen)
|
|
280
|
-
keepTag.lastSeen = rt.lastSeen;
|
|
281
|
-
const lp = { identity: 3, pattern: 2, interest: 1 };
|
|
282
|
-
if ((lp[rt.layer || "interest"] || 1) > (lp[keepTag.layer || "interest"] || 1)) {
|
|
283
|
-
keepTag.layer = rt.layer;
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
}
|
|
287
|
-
profile.tags[dim] = tags.filter(t => !removeSet.has(t.value));
|
|
288
|
-
deduped += removeTags.length;
|
|
443
|
+
// ---- 2. 事件类关键词降级:interest → event ----
|
|
444
|
+
for (const dim of Object.keys(profile.tags)) {
|
|
445
|
+
for (const tag of profile.tags[dim]) {
|
|
446
|
+
if ((tag.layer || "interest") === "interest" && isLikelyEvent(tag.value)) {
|
|
447
|
+
tag.layer = "event";
|
|
448
|
+
tag.confidence = Math.min(tag.confidence, LAYER_CONFIG.event.defaultConfidence);
|
|
449
|
+
demotedToEvent++;
|
|
289
450
|
}
|
|
290
451
|
}
|
|
291
452
|
}
|
|
292
|
-
// ----
|
|
453
|
+
// ---- 3. 清理非受控维度 + 空维度 ----
|
|
293
454
|
for (const dim of Object.keys(profile.tags)) {
|
|
455
|
+
if (!CANONICAL_DIMENSIONS.includes(dim)) {
|
|
456
|
+
// 不在受控表也没别名映射 → 丢弃
|
|
457
|
+
logger.info(`[engram:profile] normalize: dropping uncontrolled dimension [${dim}] with ${profile.tags[dim].length} tags`);
|
|
458
|
+
deduped += profile.tags[dim].length;
|
|
459
|
+
delete profile.tags[dim];
|
|
460
|
+
continue;
|
|
461
|
+
}
|
|
294
462
|
if (profile.tags[dim].length === 0) {
|
|
295
463
|
delete profile.tags[dim];
|
|
296
464
|
emptied++;
|
|
297
465
|
}
|
|
298
466
|
}
|
|
299
|
-
// ---- 各维度内按 confidence 降序排列 ----
|
|
467
|
+
// ---- 4. 各维度内按 confidence 降序排列 ----
|
|
300
468
|
for (const dim of Object.keys(profile.tags)) {
|
|
301
469
|
profile.tags[dim].sort((a, b) => b.confidence - a.confidence);
|
|
302
470
|
}
|
|
303
|
-
if (merged > 0 || deduped > 0 || emptied > 0) {
|
|
304
|
-
logger.info(`[engram:profile] normalizeDimensions: merged=${merged} deduped=${deduped} emptied=${emptied}`);
|
|
471
|
+
if (merged > 0 || deduped > 0 || emptied > 0 || demotedToEvent > 0) {
|
|
472
|
+
logger.info(`[engram:profile] normalizeDimensions: merged=${merged} deduped=${deduped} emptied=${emptied} demotedToEvent=${demotedToEvent}`);
|
|
305
473
|
}
|
|
306
|
-
return { merged, deduped, emptied };
|
|
474
|
+
return { merged, deduped, emptied, demotedToEvent };
|
|
475
|
+
}
|
|
476
|
+
/**
|
|
477
|
+
* 更新位置信息
|
|
478
|
+
* - primary 由长期记忆或 LLM 自审确定(本函数不主动设)
|
|
479
|
+
* - recent 只保留最近的 3 个,按 updatedAt 排序
|
|
480
|
+
*/
|
|
481
|
+
updateRecentLocation(profile, place) {
|
|
482
|
+
if (!profile.locations)
|
|
483
|
+
profile.locations = { recent: [] };
|
|
484
|
+
const now = new Date().toISOString();
|
|
485
|
+
const existing = profile.locations.recent.find((r) => r.place === place);
|
|
486
|
+
if (existing) {
|
|
487
|
+
existing.updatedAt = now;
|
|
488
|
+
}
|
|
489
|
+
else {
|
|
490
|
+
profile.locations.recent.push({ place, updatedAt: now });
|
|
491
|
+
}
|
|
492
|
+
profile.locations.recent.sort((a, b) => (a.updatedAt < b.updatedAt ? 1 : -1));
|
|
493
|
+
profile.locations.recent = profile.locations.recent.slice(0, 3);
|
|
494
|
+
return profile;
|
|
495
|
+
}
|
|
496
|
+
/**
|
|
497
|
+
* LLM 自审:月度兜底清理,交由 LLM 发现 hard-coded 规则遗漏的问题
|
|
498
|
+
*
|
|
499
|
+
* @param profile 当前画像
|
|
500
|
+
* @param llmCall LLM 调用函数
|
|
501
|
+
* @returns 应用的变更统计
|
|
502
|
+
*/
|
|
503
|
+
async llmSelfAudit(profile, llmCall) {
|
|
504
|
+
const compactView = Object.entries(profile.tags)
|
|
505
|
+
.map(([dim, tags]) => `${dim}: ${tags.map((t) => `${t.value}(${t.layer ?? "interest"})`).join(" | ")}`)
|
|
506
|
+
.join("\n");
|
|
507
|
+
if (!compactView) {
|
|
508
|
+
return { mergedGroups: 0, demoted: 0, coreTagsRewritten: false };
|
|
509
|
+
}
|
|
510
|
+
const systemPrompt = `你是一个用户画像审核专家。分析当前画像,找出以下问题:
|
|
511
|
+
|
|
512
|
+
1. **语义重复的 tag**:同一意思的不同写法(如 "ACP subagent开发测试" 和 "ACP子代理spawn测试")。
|
|
513
|
+
对每组重复,挑选最规范的一条作为 keep,其他作为 remove。
|
|
514
|
+
|
|
515
|
+
2. **错误分层的 tag**:一次性事件(排查/bug/超时/配置修改)被错误地标为 interest 层,应该降级为 event。
|
|
516
|
+
罗列这些 tag 的 value。
|
|
517
|
+
|
|
518
|
+
3. **重写 coreTags**:从当前画像中挑选最代表用户身份和长期特征的 5 个 tag。
|
|
519
|
+
跳过经纬度、纯地址字符串、一次性事件。
|
|
520
|
+
|
|
521
|
+
只输出 JSON,不要任何解释:
|
|
522
|
+
{
|
|
523
|
+
"merges": [{"keep": "标准写法", "remove": ["变体1", "变体2"]}],
|
|
524
|
+
"demotions": ["事件类tag值", ...],
|
|
525
|
+
"newCoreTags": ["tag1", "tag2", ...]
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
如果某项无需变动,对应数组留空即可。`;
|
|
529
|
+
logger.info(`[engram:profile] llmSelfAudit: calling LLM (${compactView.length} chars of profile view)`);
|
|
530
|
+
let raw;
|
|
531
|
+
try {
|
|
532
|
+
raw = await llmCall(compactView, systemPrompt);
|
|
533
|
+
}
|
|
534
|
+
catch (err) {
|
|
535
|
+
logger.error(`[engram:profile] llmSelfAudit: LLM call failed: ${err}`);
|
|
536
|
+
return { mergedGroups: 0, demoted: 0, coreTagsRewritten: false };
|
|
537
|
+
}
|
|
538
|
+
// 容错 JSON 解析(移除 markdown code fence)
|
|
539
|
+
const cleaned = raw.replace(/^```(?:json)?\s*/i, "").replace(/\s*```\s*$/i, "").trim();
|
|
540
|
+
let audit;
|
|
541
|
+
try {
|
|
542
|
+
audit = JSON.parse(cleaned);
|
|
543
|
+
}
|
|
544
|
+
catch (err) {
|
|
545
|
+
logger.error(`[engram:profile] llmSelfAudit: JSON parse failed: ${err}, raw="${raw.slice(0, 200)}"`);
|
|
546
|
+
return { mergedGroups: 0, demoted: 0, coreTagsRewritten: false };
|
|
547
|
+
}
|
|
548
|
+
let mergedGroups = 0;
|
|
549
|
+
let demoted = 0;
|
|
550
|
+
// 应用合并
|
|
551
|
+
for (const group of audit.merges ?? []) {
|
|
552
|
+
if (!group.keep || !Array.isArray(group.remove) || group.remove.length === 0)
|
|
553
|
+
continue;
|
|
554
|
+
const removeSet = new Set(group.remove);
|
|
555
|
+
let applied = false;
|
|
556
|
+
for (const dim of Object.keys(profile.tags)) {
|
|
557
|
+
const keepTag = profile.tags[dim].find((t) => t.value === group.keep);
|
|
558
|
+
const removeTags = profile.tags[dim].filter((t) => removeSet.has(t.value));
|
|
559
|
+
if (removeTags.length === 0)
|
|
560
|
+
continue;
|
|
561
|
+
if (keepTag) {
|
|
562
|
+
for (const rt of removeTags) {
|
|
563
|
+
if (rt.confidence > keepTag.confidence)
|
|
564
|
+
keepTag.confidence = rt.confidence;
|
|
565
|
+
if (rt.lastSeen > keepTag.lastSeen)
|
|
566
|
+
keepTag.lastSeen = rt.lastSeen;
|
|
567
|
+
}
|
|
568
|
+
}
|
|
569
|
+
profile.tags[dim] = profile.tags[dim].filter((t) => !removeSet.has(t.value));
|
|
570
|
+
applied = true;
|
|
571
|
+
}
|
|
572
|
+
if (applied)
|
|
573
|
+
mergedGroups++;
|
|
574
|
+
}
|
|
575
|
+
// 应用降级
|
|
576
|
+
const demoteSet = new Set(audit.demotions ?? []);
|
|
577
|
+
if (demoteSet.size > 0) {
|
|
578
|
+
for (const dim of Object.keys(profile.tags)) {
|
|
579
|
+
for (const tag of profile.tags[dim]) {
|
|
580
|
+
if (demoteSet.has(tag.value) && (tag.layer ?? "interest") !== "event") {
|
|
581
|
+
tag.layer = "event";
|
|
582
|
+
tag.confidence = Math.min(tag.confidence, LAYER_CONFIG.event.defaultConfidence);
|
|
583
|
+
demoted++;
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
// 应用 coreTags 重写
|
|
589
|
+
let coreTagsRewritten = false;
|
|
590
|
+
if (Array.isArray(audit.newCoreTags) && audit.newCoreTags.length > 0) {
|
|
591
|
+
profile.coreTags = audit.newCoreTags.slice(0, 8);
|
|
592
|
+
coreTagsRewritten = true;
|
|
593
|
+
}
|
|
594
|
+
logger.info(`[engram:profile] llmSelfAudit: mergedGroups=${mergedGroups} demoted=${demoted} coreTagsRewritten=${coreTagsRewritten}`);
|
|
595
|
+
return { mergedGroups, demoted, coreTagsRewritten };
|
|
596
|
+
}
|
|
597
|
+
/**
|
|
598
|
+
* 基于 embedding 的同维度内语义去重
|
|
599
|
+
*
|
|
600
|
+
* 对每个维度内的 tag 两两计算 cosine 相似度,>= threshold 的合并为一组,
|
|
601
|
+
* 组内保留 confidence 最高的作为 canonical,其他并入(取 max confidence + 最新 lastSeen)。
|
|
602
|
+
*
|
|
603
|
+
* @param profile 当前画像
|
|
604
|
+
* @param embedder 将文本 -> 向量的函数
|
|
605
|
+
* @param threshold cosine 相似度阈值(默认 0.88)
|
|
606
|
+
*/
|
|
607
|
+
async dedupByEmbedding(profile, embedder, threshold = 0.88) {
|
|
608
|
+
let merged = 0;
|
|
609
|
+
for (const dim of Object.keys(profile.tags)) {
|
|
610
|
+
const tags = profile.tags[dim];
|
|
611
|
+
if (tags.length < 2)
|
|
612
|
+
continue;
|
|
613
|
+
// 批量 embed(串行,避免 API QPS 限制)
|
|
614
|
+
const vectors = [];
|
|
615
|
+
for (const t of tags) {
|
|
616
|
+
try {
|
|
617
|
+
vectors.push(await embedder(t.value));
|
|
618
|
+
}
|
|
619
|
+
catch (err) {
|
|
620
|
+
logger.error(`[engram:profile] dedupByEmbedding: embed failed for "${t.value}": ${err}`);
|
|
621
|
+
vectors.push([]); // 占位,跳过相似度计算
|
|
622
|
+
}
|
|
623
|
+
}
|
|
624
|
+
// 并查集式分组
|
|
625
|
+
const parent = tags.map((_, i) => i);
|
|
626
|
+
const find = (x) => (parent[x] === x ? x : (parent[x] = find(parent[x])));
|
|
627
|
+
const union = (a, b) => {
|
|
628
|
+
const ra = find(a);
|
|
629
|
+
const rb = find(b);
|
|
630
|
+
if (ra !== rb)
|
|
631
|
+
parent[ra] = rb;
|
|
632
|
+
};
|
|
633
|
+
for (let i = 0; i < tags.length; i++) {
|
|
634
|
+
if (vectors[i].length === 0)
|
|
635
|
+
continue;
|
|
636
|
+
for (let j = i + 1; j < tags.length; j++) {
|
|
637
|
+
if (vectors[j].length === 0)
|
|
638
|
+
continue;
|
|
639
|
+
const sim = cosineSimilarity(vectors[i], vectors[j]);
|
|
640
|
+
if (sim >= threshold)
|
|
641
|
+
union(i, j);
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
// 按组合并
|
|
645
|
+
const groups = new Map();
|
|
646
|
+
for (let i = 0; i < tags.length; i++) {
|
|
647
|
+
const root = find(i);
|
|
648
|
+
if (!groups.has(root))
|
|
649
|
+
groups.set(root, []);
|
|
650
|
+
groups.get(root).push(i);
|
|
651
|
+
}
|
|
652
|
+
const keptTags = [];
|
|
653
|
+
for (const idxs of groups.values()) {
|
|
654
|
+
if (idxs.length === 1) {
|
|
655
|
+
keptTags.push(tags[idxs[0]]);
|
|
656
|
+
continue;
|
|
657
|
+
}
|
|
658
|
+
// 组内合并:选 confidence 最高的为 canonical
|
|
659
|
+
idxs.sort((a, b) => tags[b].confidence - tags[a].confidence);
|
|
660
|
+
const canonical = { ...tags[idxs[0]] };
|
|
661
|
+
for (let k = 1; k < idxs.length; k++) {
|
|
662
|
+
const t = tags[idxs[k]];
|
|
663
|
+
if (t.confidence > canonical.confidence)
|
|
664
|
+
canonical.confidence = t.confidence;
|
|
665
|
+
if (t.lastSeen > canonical.lastSeen)
|
|
666
|
+
canonical.lastSeen = t.lastSeen;
|
|
667
|
+
const tLayer = t.layer || "interest";
|
|
668
|
+
const cLayer = canonical.layer || "interest";
|
|
669
|
+
if (LAYER_PRIORITY[tLayer] > LAYER_PRIORITY[cLayer])
|
|
670
|
+
canonical.layer = tLayer;
|
|
671
|
+
}
|
|
672
|
+
logger.info(`[engram:profile] dedupByEmbedding [${dim}] merge: keep="${canonical.value}", drop=[${idxs.slice(1).map((i) => `"${tags[i].value}"`).join(", ")}]`);
|
|
673
|
+
keptTags.push(canonical);
|
|
674
|
+
merged += idxs.length - 1;
|
|
675
|
+
}
|
|
676
|
+
profile.tags[dim] = keptTags;
|
|
677
|
+
}
|
|
678
|
+
logger.info(`[engram:profile] dedupByEmbedding: merged ${merged} tags`);
|
|
679
|
+
return { merged };
|
|
680
|
+
}
|
|
681
|
+
}
|
|
682
|
+
// ============================================================================
|
|
683
|
+
// 辅助函数
|
|
684
|
+
// ============================================================================
|
|
685
|
+
/** 判断字符串是否像纯经纬度 */
|
|
686
|
+
function isLikelyCoordinate(s) {
|
|
687
|
+
return /^-?\d+\.\d+\s*,\s*-?\d+\.\d+$/.test(s.trim());
|
|
688
|
+
}
|
|
689
|
+
/** cosine 相似度 */
|
|
690
|
+
function cosineSimilarity(a, b) {
|
|
691
|
+
if (a.length !== b.length || a.length === 0)
|
|
692
|
+
return 0;
|
|
693
|
+
let dot = 0;
|
|
694
|
+
let na = 0;
|
|
695
|
+
let nb = 0;
|
|
696
|
+
for (let i = 0; i < a.length; i++) {
|
|
697
|
+
dot += a[i] * b[i];
|
|
698
|
+
na += a[i] * a[i];
|
|
699
|
+
nb += b[i] * b[i];
|
|
307
700
|
}
|
|
701
|
+
if (na === 0 || nb === 0)
|
|
702
|
+
return 0;
|
|
703
|
+
return dot / (Math.sqrt(na) * Math.sqrt(nb));
|
|
308
704
|
}
|
|
309
705
|
//# sourceMappingURL=profile.js.map
|