@scotthuang/engram 0.6.8 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +79 -2
- package/dist/src/__tests__/profile.test.js +66 -22
- package/dist/src/__tests__/profile.test.js.map +1 -1
- package/dist/src/index.js +76 -8
- package/dist/src/index.js.map +1 -1
- package/dist/src/profile.d.ts +22 -5
- package/dist/src/profile.js +106 -14
- package/dist/src/profile.js.map +1 -1
- package/dist/src/settle.js +16 -14
- package/dist/src/settle.js.map +1 -1
- package/package.json +1 -1
- package/dist/bm25.d.ts +0 -60
- package/dist/bm25.js +0 -271
- package/dist/bm25.js.map +0 -1
- package/dist/config.d.ts +0 -47
- package/dist/config.js +0 -83
- package/dist/config.js.map +0 -1
- package/dist/image-store.d.ts +0 -146
- package/dist/image-store.js +0 -418
- package/dist/image-store.js.map +0 -1
- package/dist/index.d.ts +0 -7
- package/dist/index.js +0 -1138
- package/dist/index.js.map +0 -1
- package/dist/logger.d.ts +0 -32
- package/dist/logger.js +0 -106
- package/dist/logger.js.map +0 -1
- package/dist/profile.d.ts +0 -37
- package/dist/profile.js +0 -107
- package/dist/profile.js.map +0 -1
- package/dist/recall.d.ts +0 -98
- package/dist/recall.js +0 -729
- package/dist/recall.js.map +0 -1
- package/dist/settle.d.ts +0 -83
- package/dist/settle.js +0 -675
- package/dist/settle.js.map +0 -1
- package/dist/vector.d.ts +0 -66
- package/dist/vector.js +0 -275
- package/dist/vector.js.map +0 -1
package/dist/settle.js
DELETED
|
@@ -1,675 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Memory System Plugin - Settle (沉淀机制)
|
|
3
|
-
*
|
|
4
|
-
* 双层架构(v0.5):
|
|
5
|
-
*
|
|
6
|
-
* ■ 每日 settle (runSettlement):
|
|
7
|
-
* 1. 结构化短期记忆
|
|
8
|
-
* 2. 更新画像
|
|
9
|
-
* 3. 生成画像摘要
|
|
10
|
-
* 短期文件永久保留,不再归档删除
|
|
11
|
-
*
|
|
12
|
-
* ■ 月度 settle (runMonthlySettle):
|
|
13
|
-
* 1. 精选沉淀:只沉淀 recall-hits 中 hitCount ≥ 2 的短期记忆到向量库
|
|
14
|
-
* 2. 月度遗忘:清理向量库中低 effective_importance 的条目
|
|
15
|
-
*/
|
|
16
|
-
import { promises as fs } from "node:fs";
|
|
17
|
-
import { logger } from "./logger.js";
|
|
18
|
-
import { join } from "node:path";
|
|
19
|
-
import { ProfileManager } from "./profile.js";
|
|
20
|
-
/**
|
|
21
|
-
* 获取本地日期字符串 (YYYY-MM-DD)
|
|
22
|
-
* 使用本地时区而非 UTC,避免跨日期问题
|
|
23
|
-
*/
|
|
24
|
-
function getLocalDateString() {
|
|
25
|
-
const now = new Date();
|
|
26
|
-
const year = now.getFullYear();
|
|
27
|
-
const month = String(now.getMonth() + 1).padStart(2, "0");
|
|
28
|
-
const day = String(now.getDate()).padStart(2, "0");
|
|
29
|
-
return `${year}-${month}-${day}`;
|
|
30
|
-
}
|
|
31
|
-
const STRUCTURED_MARKER = "<!-- STRUCTURED -->";
|
|
32
|
-
const SETTLED_MARKER = "<!-- SETTLED -->";
|
|
33
|
-
/**
|
|
34
|
-
* 清理 LLM 返回的 JSON:去除 markdown 代码块包裹、前后空白等
|
|
35
|
-
* 处理常见格式:```json\n...\n```、```\n...\n```
|
|
36
|
-
*/
|
|
37
|
-
function cleanLLMJson(raw) {
|
|
38
|
-
let s = raw.trim();
|
|
39
|
-
// 去除 ```json ... ``` 或 ``` ... ``` 包裹
|
|
40
|
-
s = s.replace(/^```(?:json)?\s*\n?/i, "").replace(/\n?\s*```\s*$/, "");
|
|
41
|
-
return s.trim();
|
|
42
|
-
}
|
|
43
|
-
/**
|
|
44
|
-
* 步骤 1: 结构化短期记忆
|
|
45
|
-
* 读取 short-term 文件,对未结构化的段落进行 LLM 整理
|
|
46
|
-
* 已结构化的段落(含 STRUCTURED_MARKER)跳过
|
|
47
|
-
*/
|
|
48
|
-
export async function structurizeShortTerm(opts, date = getLocalDateString()) {
|
|
49
|
-
const shortTermDir = join(opts.workspaceDir, "memory-engram", "short-term");
|
|
50
|
-
await fs.mkdir(shortTermDir, { recursive: true });
|
|
51
|
-
const filePath = join(shortTermDir, `${date}.md`);
|
|
52
|
-
try {
|
|
53
|
-
const raw = await fs.readFile(filePath, "utf-8");
|
|
54
|
-
logger.info(`[engram:settle] Step1 structurize: reading ${filePath} (${raw.length} chars)`);
|
|
55
|
-
if (!raw.trim()) {
|
|
56
|
-
logger.info(`[engram:settle] Step1 structurize: file empty, skipping`);
|
|
57
|
-
return "No content to structurize.";
|
|
58
|
-
}
|
|
59
|
-
// 按 ### 拆分段落
|
|
60
|
-
const sections = raw.split(/(?=^### )/m);
|
|
61
|
-
logger.info(`[engram:settle] Step1 structurize: ${sections.length} sections found`);
|
|
62
|
-
let processedCount = 0;
|
|
63
|
-
let skippedStructured = 0;
|
|
64
|
-
let skippedSystem = 0;
|
|
65
|
-
const outputSections = [];
|
|
66
|
-
for (const section of sections) {
|
|
67
|
-
if (!section.trim())
|
|
68
|
-
continue;
|
|
69
|
-
// 已标记结构化的直接保留
|
|
70
|
-
if (section.includes(STRUCTURED_MARKER)) {
|
|
71
|
-
outputSections.push(section);
|
|
72
|
-
skippedStructured++;
|
|
73
|
-
continue;
|
|
74
|
-
}
|
|
75
|
-
// 纯系统/compact 记录,标记为已处理
|
|
76
|
-
if (section.includes("[系统]") && section.includes("Compaction triggered")) {
|
|
77
|
-
outputSections.push(section.trimEnd() + "\n" + STRUCTURED_MARKER + "\n");
|
|
78
|
-
skippedSystem++;
|
|
79
|
-
continue;
|
|
80
|
-
}
|
|
81
|
-
// 未结构化 → LLM 整理
|
|
82
|
-
if (!opts.llmCall) {
|
|
83
|
-
logger.info(`[engram:settle] Step1 structurize: no llmCall, keeping raw section`);
|
|
84
|
-
outputSections.push(section);
|
|
85
|
-
continue;
|
|
86
|
-
}
|
|
87
|
-
const systemPrompt = `你是一个记忆整理助手。请将以下原始对话记录整理成结构化格式。
|
|
88
|
-
|
|
89
|
-
规则:
|
|
90
|
-
1. 按时间倒序排列
|
|
91
|
-
2. 每条记录格式:### HH:MM [分类标签]\n摘要内容(一句话)
|
|
92
|
-
3. 过滤掉无意义的闲聊(打招呼、HEARTBEAT_OK、确认回复、"好的"、"嗯"等)
|
|
93
|
-
4. 保留所有有价值的信息(决策、偏好、事件、数字、人名、地点等)
|
|
94
|
-
5. 分类标签可选值:饮食、工作、家庭、技术、决策、健康、购物、出行、随聊
|
|
95
|
-
|
|
96
|
-
只输出整理后的内容,不要解释。`;
|
|
97
|
-
try {
|
|
98
|
-
logger.info(`[engram:settle] Step1: calling LLM to structurize section ${processedCount + skippedStructured + skippedSystem + 1} (${section.length} chars)`);
|
|
99
|
-
const result = await opts.llmCall(section, systemPrompt);
|
|
100
|
-
logger.info(`[engram:settle] Step1: LLM returned ${result.length} chars for section`);
|
|
101
|
-
outputSections.push(result.trimEnd() + "\n" + STRUCTURED_MARKER + "\n");
|
|
102
|
-
processedCount++;
|
|
103
|
-
}
|
|
104
|
-
catch (llmErr) {
|
|
105
|
-
logger.error(`[engram:settle] Step1: LLM structurize failed for section: ${llmErr}`);
|
|
106
|
-
// LLM 调用失败,保留原文
|
|
107
|
-
outputSections.push(section);
|
|
108
|
-
}
|
|
109
|
-
}
|
|
110
|
-
// 写回文件
|
|
111
|
-
await fs.writeFile(filePath, outputSections.join("\n"), "utf-8");
|
|
112
|
-
logger.info(`[engram:settle] Step1: done. processed=${processedCount} skippedStructured=${skippedStructured} skippedSystem=${skippedSystem}`);
|
|
113
|
-
return `Structurized short-term/${date}.md: ${processedCount} sections processed.`;
|
|
114
|
-
}
|
|
115
|
-
catch (err) {
|
|
116
|
-
logger.error(`[engram:settle] Step1 structurizeShortTerm failed: ${err}`);
|
|
117
|
-
return `Skipped: short-term/${date}.md not found or error: ${err}`;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
/**
|
|
121
|
-
* 步骤 2: 筛选有价值条目 + 向量化
|
|
122
|
-
* 只处理已结构化(STRUCTURED)但未沉淀(SETTLED)的段落,避免重复 LLM 调用
|
|
123
|
-
*/
|
|
124
|
-
export async function extractAndVectorize(opts, date = new Date().toISOString().split("T")[0]) {
|
|
125
|
-
const filePath = join(opts.workspaceDir, "memory-engram", "short-term", `${date}.md`);
|
|
126
|
-
logger.info(`[engram:settle] Step2 extractAndVectorize: date=${date}, file=${filePath}`);
|
|
127
|
-
try {
|
|
128
|
-
const content = await fs.readFile(filePath, "utf-8");
|
|
129
|
-
if (!content.trim()) {
|
|
130
|
-
logger.info(`[engram:settle] Step2: file empty`);
|
|
131
|
-
return "No content.";
|
|
132
|
-
}
|
|
133
|
-
if (!opts.llmCall || !opts.vectorStore) {
|
|
134
|
-
logger.info(`[engram:settle] Step2: missing llmCall=${!!opts.llmCall} vectorStore=${!!opts.vectorStore}`);
|
|
135
|
-
return "No LLM/vector configured.";
|
|
136
|
-
}
|
|
137
|
-
// 拆分段落,区分:未沉淀的新段落 vs 已沉淀的旧段落
|
|
138
|
-
const allSections = content.split(/(?=^### )/m);
|
|
139
|
-
const unsettledSections = [];
|
|
140
|
-
const unsettledIndices = [];
|
|
141
|
-
let alreadySettled = 0;
|
|
142
|
-
for (let i = 0; i < allSections.length; i++) {
|
|
143
|
-
const s = allSections[i];
|
|
144
|
-
if (s.includes(SETTLED_MARKER)) {
|
|
145
|
-
// 已沉淀,跳过
|
|
146
|
-
alreadySettled++;
|
|
147
|
-
}
|
|
148
|
-
else if (s.includes(STRUCTURED_MARKER)) {
|
|
149
|
-
// 已结构化但未沉淀 → 需要处理
|
|
150
|
-
unsettledSections.push(s);
|
|
151
|
-
unsettledIndices.push(i);
|
|
152
|
-
}
|
|
153
|
-
// 既不 STRUCTURED 也不 SETTLED 的段落 → 等 Step 1 结构化后再处理
|
|
154
|
-
}
|
|
155
|
-
logger.info(`[engram:settle] Step2: ${allSections.length} total, ${unsettledSections.length} unsettled, ${alreadySettled} already settled`);
|
|
156
|
-
if (unsettledSections.length === 0) {
|
|
157
|
-
logger.info(`[engram:settle] Step2: no new content to extract (all settled)`);
|
|
158
|
-
return "No new content to extract (all settled).";
|
|
159
|
-
}
|
|
160
|
-
const unsettledContent = unsettledSections.join("\n");
|
|
161
|
-
const systemPrompt = `你是一个信息筛选助手。从以下结构化记忆中筛选出值得长期保留的条目。
|
|
162
|
-
|
|
163
|
-
保留标准:
|
|
164
|
-
- 包含用户偏好、决策、重要事件、具体信息(地点、金额、人名)
|
|
165
|
-
- 包含用户行为模式和生活习惯(作息时间、运动习惯、饮食规律等)
|
|
166
|
-
- 去掉纯闲聊、临时信息
|
|
167
|
-
|
|
168
|
-
对每条保留的条目,输出 JSON 数组,每条包含:
|
|
169
|
-
- text: 条目摘要
|
|
170
|
-
- category: 分类
|
|
171
|
-
- importance: 重要度评分 0-1,标准如下:
|
|
172
|
-
- 0.9-1.0: 关键决策、重要事件(搬家、换工作、关系变化)
|
|
173
|
-
- 0.7-0.8: 明确偏好、项目里程碑、具体计划
|
|
174
|
-
- 0.4-0.6: 一般性讨论、技术问答
|
|
175
|
-
- 0.1-0.3: 日常闲聊、低价值内容
|
|
176
|
-
|
|
177
|
-
示例输出:
|
|
178
|
-
[{"text": "用户决定搬到深圳", "category": "生活", "importance": 0.9}, {"text": "用户在调试BM25分词", "category": "技术", "importance": 0.5}]
|
|
179
|
-
|
|
180
|
-
只输出 JSON,不要其他内容。`;
|
|
181
|
-
logger.info(`[engram:settle] Step2: calling LLM for extraction (${unsettledContent.length} chars)`);
|
|
182
|
-
const result = await opts.llmCall(unsettledContent, systemPrompt);
|
|
183
|
-
logger.info(`[engram:settle] Step2: LLM returned ${result.length} chars`);
|
|
184
|
-
let items;
|
|
185
|
-
try {
|
|
186
|
-
const cleaned = cleanLLMJson(result);
|
|
187
|
-
items = JSON.parse(cleaned);
|
|
188
|
-
logger.info(`[engram:settle] Step2: parsed ${items.length} candidate items`);
|
|
189
|
-
}
|
|
190
|
-
catch (parseErr) {
|
|
191
|
-
logger.error(`[engram:settle] Step2: JSON parse failed: ${parseErr}, raw="${result.slice(0, 200)}"`);
|
|
192
|
-
return `Skipped: JSON parse failed`;
|
|
193
|
-
}
|
|
194
|
-
let stored = 0;
|
|
195
|
-
let duplicates = 0;
|
|
196
|
-
let skippedInvalid = 0;
|
|
197
|
-
for (const item of items) {
|
|
198
|
-
if (item.text && item.category) {
|
|
199
|
-
const imp = typeof item.importance === "number" ? item.importance : 0.5;
|
|
200
|
-
logger.info(`[engram:settle] Step2: vectorizing [${item.category}] importance=${imp.toFixed(2)} "${item.text.slice(0, 60)}"`);
|
|
201
|
-
const storeResult = await opts.vectorStore(item.text, item.category, imp);
|
|
202
|
-
if (storeResult === "created")
|
|
203
|
-
stored++;
|
|
204
|
-
else
|
|
205
|
-
duplicates++;
|
|
206
|
-
}
|
|
207
|
-
else {
|
|
208
|
-
skippedInvalid++;
|
|
209
|
-
logger.info(`[engram:settle] Step2: skipping invalid item: ${JSON.stringify(item).slice(0, 100)}`);
|
|
210
|
-
}
|
|
211
|
-
}
|
|
212
|
-
// 向量化成功后,给已处理的段落打上 SETTLED 标记,写回文件
|
|
213
|
-
for (const idx of unsettledIndices) {
|
|
214
|
-
allSections[idx] = allSections[idx].replace(STRUCTURED_MARKER, STRUCTURED_MARKER + "\n" + SETTLED_MARKER);
|
|
215
|
-
}
|
|
216
|
-
await fs.writeFile(filePath, allSections.join(""), "utf-8");
|
|
217
|
-
logger.info(`[engram:settle] Step2: marked ${unsettledIndices.length} sections as SETTLED`);
|
|
218
|
-
logger.info(`[engram:settle] Step2: done. stored=${stored} duplicates=${duplicates} invalid=${skippedInvalid}`);
|
|
219
|
-
return `Extracted ${stored} items (${duplicates} dup, ${unsettledIndices.length} sections settled).`;
|
|
220
|
-
}
|
|
221
|
-
catch (err) {
|
|
222
|
-
logger.error(`[engram:settle] Step2 extractAndVectorize failed: ${err}`);
|
|
223
|
-
return `Skipped: ${err}`;
|
|
224
|
-
}
|
|
225
|
-
}
|
|
226
|
-
/**
|
|
227
|
-
* 步骤 3: 更新画像
|
|
228
|
-
* 只处理已结构化(STRUCTURED)但未沉淀(SETTLED)的段落
|
|
229
|
-
*/
|
|
230
|
-
export async function updateProfile(opts, date = new Date().toISOString().split("T")[0]) {
|
|
231
|
-
const filePath = join(opts.workspaceDir, "memory-engram", "short-term", `${date}.md`);
|
|
232
|
-
logger.info(`[engram:settle] Step3 updateProfile: date=${date}`);
|
|
233
|
-
try {
|
|
234
|
-
const content = await fs.readFile(filePath, "utf-8");
|
|
235
|
-
if (!content.trim()) {
|
|
236
|
-
logger.info(`[engram:settle] Step3: file empty`);
|
|
237
|
-
return "No content.";
|
|
238
|
-
}
|
|
239
|
-
if (!opts.llmCall) {
|
|
240
|
-
logger.info(`[engram:settle] Step3: no llmCall`);
|
|
241
|
-
return "No LLM configured.";
|
|
242
|
-
}
|
|
243
|
-
// 只取已结构化但未沉淀的段落(避免重复分析)
|
|
244
|
-
const unsettledContent = content
|
|
245
|
-
.split(/(?=^### )/m)
|
|
246
|
-
.filter(s => s.includes(STRUCTURED_MARKER) && !s.includes(SETTLED_MARKER))
|
|
247
|
-
.join("\n");
|
|
248
|
-
if (!unsettledContent.trim()) {
|
|
249
|
-
logger.info(`[engram:settle] Step3: no new content for profile (all settled)`);
|
|
250
|
-
return "No new content for profile.";
|
|
251
|
-
}
|
|
252
|
-
const profileManager = new ProfileManager(opts.workspaceDir);
|
|
253
|
-
const profile = await profileManager.load();
|
|
254
|
-
const existingTagCount = Object.values(profile.tags).reduce((sum, tags) => sum + tags.length, 0);
|
|
255
|
-
logger.info(`[engram:settle] Step3: loaded profile with ${existingTagCount} existing tags across ${Object.keys(profile.tags).length} dimensions`);
|
|
256
|
-
const systemPrompt = `你是一个用户画像分析助手。从以下记忆中抽取用户标签,更新画像。
|
|
257
|
-
|
|
258
|
-
当前画像:
|
|
259
|
-
${JSON.stringify(profile.tags, null, 2)}
|
|
260
|
-
|
|
261
|
-
请输出 JSON:
|
|
262
|
-
{
|
|
263
|
-
"added": [{"dimension": "分类", "value": "标签"}],
|
|
264
|
-
"removed": [{"dimension": "分类", "value": "标签"}],
|
|
265
|
-
"reason": "简要说明"
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
规则:
|
|
269
|
-
- 只添加有充分依据的标签(至少 2 条记忆佐证)
|
|
270
|
-
- 如果旧标签与新信息矛盾,放入 removed
|
|
271
|
-
- 不要重复添加已有标签
|
|
272
|
-
|
|
273
|
-
只输出 JSON。`;
|
|
274
|
-
logger.info(`[engram:settle] Step3: calling LLM for profile update (${unsettledContent.length} chars)`);
|
|
275
|
-
const result = await opts.llmCall(unsettledContent, systemPrompt);
|
|
276
|
-
logger.info(`[engram:settle] Step3: LLM returned ${result.length} chars`);
|
|
277
|
-
let changes;
|
|
278
|
-
try {
|
|
279
|
-
const cleaned = cleanLLMJson(result);
|
|
280
|
-
changes = JSON.parse(cleaned);
|
|
281
|
-
}
|
|
282
|
-
catch (parseErr) {
|
|
283
|
-
logger.error(`[engram:settle] Step3: JSON parse failed: ${parseErr}, raw="${result.slice(0, 200)}"`);
|
|
284
|
-
return `Skipped: JSON parse failed`;
|
|
285
|
-
}
|
|
286
|
-
let added = 0;
|
|
287
|
-
let removed = 0;
|
|
288
|
-
if (changes.added) {
|
|
289
|
-
for (const tag of changes.added) {
|
|
290
|
-
logger.info(`[engram:settle] Step3: adding tag [${tag.dimension}] "${tag.value}"`);
|
|
291
|
-
profileManager.addTag(profile, tag.dimension, tag.value);
|
|
292
|
-
added++;
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
if (changes.removed) {
|
|
296
|
-
for (const tag of changes.removed) {
|
|
297
|
-
if (profile.tags[tag.dimension]) {
|
|
298
|
-
logger.info(`[engram:settle] Step3: removing tag [${tag.dimension}] "${tag.value}"`);
|
|
299
|
-
profile.tags[tag.dimension] = profile.tags[tag.dimension].filter(t => t.value !== tag.value);
|
|
300
|
-
removed++;
|
|
301
|
-
}
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
if (changes.reason) {
|
|
305
|
-
logger.info(`[engram:settle] Step3: reason="${changes.reason}"`);
|
|
306
|
-
}
|
|
307
|
-
// 衰减未更新的标签
|
|
308
|
-
profileManager.decayTags(profile, 0.98);
|
|
309
|
-
await profileManager.save(profile);
|
|
310
|
-
logger.info(`[engram:settle] Step3: done. added=${added} removed=${removed}`);
|
|
311
|
-
return `Profile updated: +${added} -${removed}`;
|
|
312
|
-
}
|
|
313
|
-
catch (err) {
|
|
314
|
-
logger.error(`[engram:settle] Step3 updateProfile failed: ${err}`);
|
|
315
|
-
return `Skipped: ${err}`;
|
|
316
|
-
}
|
|
317
|
-
}
|
|
318
|
-
/**
|
|
319
|
-
* 步骤 4: 生成画像摘要
|
|
320
|
-
*/
|
|
321
|
-
export async function generateProfileSummary(opts) {
|
|
322
|
-
logger.info(`[engram:settle] Step4 generateProfileSummary: start`);
|
|
323
|
-
if (!opts.llmCall) {
|
|
324
|
-
logger.info(`[engram:settle] Step4: no llmCall configured`);
|
|
325
|
-
return "No LLM configured.";
|
|
326
|
-
}
|
|
327
|
-
const profileManager = new ProfileManager(opts.workspaceDir);
|
|
328
|
-
const profile = await profileManager.load();
|
|
329
|
-
const allTags = Object.entries(profile.tags)
|
|
330
|
-
.map(([dim, tags]) => `${dim}: ${tags.map(t => t.value).join(", ")}`)
|
|
331
|
-
.join("\n");
|
|
332
|
-
if (!allTags) {
|
|
333
|
-
logger.info(`[engram:settle] Step4: no tags to summarize`);
|
|
334
|
-
return "No tags to summarize.";
|
|
335
|
-
}
|
|
336
|
-
const systemPrompt = `将以下用户画像标签压缩为一段 100 字以内的中文摘要,用于 AI 检索时快速理解用户特征。
|
|
337
|
-
|
|
338
|
-
标签:
|
|
339
|
-
${allTags}
|
|
340
|
-
|
|
341
|
-
只输出摘要文本,不要其他内容。`;
|
|
342
|
-
try {
|
|
343
|
-
logger.info(`[engram:settle] Step4: calling LLM for summary (${allTags.length} chars of tags)`);
|
|
344
|
-
const summary = await opts.llmCall(allTags, systemPrompt);
|
|
345
|
-
logger.info(`[engram:settle] Step4: LLM returned summary (${summary.length} chars)`);
|
|
346
|
-
profile.summary = summary.trim();
|
|
347
|
-
// 更新 coreTags:取每个维度置信度最高的标签
|
|
348
|
-
const coreTags = [];
|
|
349
|
-
for (const tags of Object.values(profile.tags)) {
|
|
350
|
-
const sorted = [...tags].sort((a, b) => b.confidence - a.confidence);
|
|
351
|
-
if (sorted.length > 0) {
|
|
352
|
-
coreTags.push(sorted[0].value);
|
|
353
|
-
}
|
|
354
|
-
}
|
|
355
|
-
profile.coreTags = coreTags.slice(0, 10);
|
|
356
|
-
await profileManager.save(profile);
|
|
357
|
-
logger.info(`[engram:settle] Step4: done. coreTags=[${profile.coreTags.join(", ")}]`);
|
|
358
|
-
return `Summary generated: "${summary.trim()}"`;
|
|
359
|
-
}
|
|
360
|
-
catch (err) {
|
|
361
|
-
logger.error(`[engram:settle] Step4 generateProfileSummary failed: ${err}`);
|
|
362
|
-
return `Failed: ${err}`;
|
|
363
|
-
}
|
|
364
|
-
}
|
|
365
|
-
/**
|
|
366
|
-
* 步骤 5: 归档清理(支持动态保留)
|
|
367
|
-
* - 未被 recall 命中过的文件:shortTermDays 后归档
|
|
368
|
-
* - 被 recall 命中过的文件:shortTermDays * 3 后归档(延长保留)
|
|
369
|
-
*
|
|
370
|
-
* @param recallHits recall-hits.json 内容,由外部传入(可选)
|
|
371
|
-
*/
|
|
372
|
-
export async function archiveShortTerm(opts, recallHits) {
|
|
373
|
-
const shortTermDir = join(opts.workspaceDir, "memory-engram", "short-term");
|
|
374
|
-
const coldStorageDir = join(opts.workspaceDir, "memory-engram", "cold-storage");
|
|
375
|
-
const baseDays = opts.config.shortTermDays;
|
|
376
|
-
const extendedDays = baseDays * 3;
|
|
377
|
-
const baseCutoff = new Date();
|
|
378
|
-
baseCutoff.setDate(baseCutoff.getDate() - baseDays);
|
|
379
|
-
const extendedCutoff = new Date();
|
|
380
|
-
extendedCutoff.setDate(extendedCutoff.getDate() - extendedDays);
|
|
381
|
-
logger.info(`[engram:settle] Step5 archiveShortTerm: baseDays=${baseDays} extendedDays=${extendedDays} baseCutoff=${baseCutoff.toISOString().split("T")[0]} extendedCutoff=${extendedCutoff.toISOString().split("T")[0]}`);
|
|
382
|
-
// 如果没有传入 recallHits,尝试从文件读取
|
|
383
|
-
const hits = recallHits ?? await loadRecallHitsFile(opts.workspaceDir);
|
|
384
|
-
const hitFileCount = Object.keys(hits).length;
|
|
385
|
-
if (hitFileCount > 0) {
|
|
386
|
-
logger.info(`[engram:settle] Step5: ${hitFileCount} files have recall hits, will use extended retention`);
|
|
387
|
-
}
|
|
388
|
-
try {
|
|
389
|
-
await fs.mkdir(coldStorageDir, { recursive: true });
|
|
390
|
-
const files = await fs.readdir(shortTermDir);
|
|
391
|
-
const mdFiles = files.filter(f => f.endsWith(".md"));
|
|
392
|
-
logger.info(`[engram:settle] Step5: found ${mdFiles.length} md files in short-term`);
|
|
393
|
-
let archived = 0;
|
|
394
|
-
let retained = 0;
|
|
395
|
-
for (const file of files) {
|
|
396
|
-
if (!file.endsWith(".md"))
|
|
397
|
-
continue;
|
|
398
|
-
const filePath = join(shortTermDir, file);
|
|
399
|
-
const stat = await fs.stat(filePath);
|
|
400
|
-
// 判断该文件是否被 recall 命中过
|
|
401
|
-
const isHit = !!hits[file];
|
|
402
|
-
const cutoff = isHit ? extendedCutoff : baseCutoff;
|
|
403
|
-
if (stat.mtime < cutoff) {
|
|
404
|
-
// 按月归档
|
|
405
|
-
const monthMatch = file.match(/(\d{4}-\d{2})/);
|
|
406
|
-
const month = monthMatch ? monthMatch[1] : "unknown";
|
|
407
|
-
const destDir = join(coldStorageDir, month);
|
|
408
|
-
await fs.mkdir(destDir, { recursive: true });
|
|
409
|
-
await fs.rename(filePath, join(destDir, file));
|
|
410
|
-
logger.info(`[engram:settle] Step5: archived ${file} → cold-storage/${month}/ (hit=${isHit})`);
|
|
411
|
-
archived++;
|
|
412
|
-
// 归档后从 recall-hits 中清除
|
|
413
|
-
if (isHit) {
|
|
414
|
-
delete hits[file];
|
|
415
|
-
}
|
|
416
|
-
}
|
|
417
|
-
else if (isHit && stat.mtime < baseCutoff) {
|
|
418
|
-
// 超过基础天数但未超过扩展天数,被召回命中所以动态保留
|
|
419
|
-
retained++;
|
|
420
|
-
logger.info(`[engram:settle] Step5: retained ${file} (hit=true, extended retention)`);
|
|
421
|
-
}
|
|
422
|
-
}
|
|
423
|
-
// 写回更新后的 recall-hits(清除已归档的条目)
|
|
424
|
-
if (hitFileCount > 0) {
|
|
425
|
-
await saveRecallHitsFile(opts.workspaceDir, hits);
|
|
426
|
-
}
|
|
427
|
-
logger.info(`[engram:settle] Step5: done. archived=${archived} retained=${retained} files`);
|
|
428
|
-
return `Archived ${archived} files to cold-storage${retained > 0 ? `, retained ${retained} hit files` : ""}.`;
|
|
429
|
-
}
|
|
430
|
-
catch (err) {
|
|
431
|
-
logger.error(`[engram:settle] Step5 archiveShortTerm failed: ${err}`);
|
|
432
|
-
return `Archive skipped: ${err}`;
|
|
433
|
-
}
|
|
434
|
-
}
|
|
435
|
-
/**
|
|
436
|
-
* 读取 recall-hits.json(settle 模块内部辅助)
|
|
437
|
-
*/
|
|
438
|
-
async function loadRecallHitsFile(workspaceDir) {
|
|
439
|
-
try {
|
|
440
|
-
const filePath = join(workspaceDir, "memory-engram", "recall-hits.json");
|
|
441
|
-
const raw = await fs.readFile(filePath, "utf-8");
|
|
442
|
-
return JSON.parse(raw);
|
|
443
|
-
}
|
|
444
|
-
catch {
|
|
445
|
-
return {};
|
|
446
|
-
}
|
|
447
|
-
}
|
|
448
|
-
/**
|
|
449
|
-
* 写入 recall-hits.json
|
|
450
|
-
*/
|
|
451
|
-
async function saveRecallHitsFile(workspaceDir, hits) {
|
|
452
|
-
const filePath = join(workspaceDir, "memory-engram", "recall-hits.json");
|
|
453
|
-
await fs.writeFile(filePath, JSON.stringify(hits, null, 2), "utf-8");
|
|
454
|
-
}
|
|
455
|
-
/**
|
|
456
|
-
* 步骤 6: 清理过期长期记忆(遗忘机制)
|
|
457
|
-
* 使用 effective_importance = importance × decay(age) × log2(1 + accessCount)
|
|
458
|
-
* 低于阈值的记忆从向量库中删除
|
|
459
|
-
*/
|
|
460
|
-
export async function pruneStaleMemories(opts) {
|
|
461
|
-
logger.info(`[engram:settle] Step6 pruneStaleMemories: start`);
|
|
462
|
-
if (!opts.vectorPrune) {
|
|
463
|
-
logger.info(`[engram:settle] Step6: no vectorPrune callback, skipping`);
|
|
464
|
-
return "No vector prune configured.";
|
|
465
|
-
}
|
|
466
|
-
try {
|
|
467
|
-
logger.info(`[engram:settle] Step6: calling vectorPrune (threshold=0.05, maxPrune=50)`);
|
|
468
|
-
const pruned = await opts.vectorPrune(0.05, 50);
|
|
469
|
-
logger.info(`[engram:settle] Step6: done. pruned=${pruned} stale memories`);
|
|
470
|
-
return `Pruned ${pruned} stale memories from vector store.`;
|
|
471
|
-
}
|
|
472
|
-
catch (err) {
|
|
473
|
-
logger.error(`[engram:settle] Step6 pruneStaleMemories failed: ${err}`);
|
|
474
|
-
return `Prune skipped: ${err}`;
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
|
-
/**
|
|
478
|
-
* 执行每日沉淀流程(精简版)
|
|
479
|
-
* v0.5: 每日 settle 只做:
|
|
480
|
-
* 1. 结构化短期记忆
|
|
481
|
-
* 2. 更新画像
|
|
482
|
-
* 3. 生成画像摘要
|
|
483
|
-
* 不再执行 extractAndVectorize(改为月度精选沉淀)和 archiveShortTerm(文件永久保留)
|
|
484
|
-
*
|
|
485
|
-
* @param opts 沉淀选项
|
|
486
|
-
* @param targetDate 可选,指定要处理的日期 (YYYY-MM-DD),默认当天
|
|
487
|
-
*/
|
|
488
|
-
export async function runSettlement(opts, targetDate) {
|
|
489
|
-
const date = targetDate || getLocalDateString();
|
|
490
|
-
const results = [];
|
|
491
|
-
const startTime = Date.now();
|
|
492
|
-
logger.info(`[engram:settle] ====== Daily Settlement START for ${date} ======`);
|
|
493
|
-
logger.info(`[engram:settle] Config: shortTermDays=${opts.config.shortTermDays}, halfLifeDays=${opts.config.halfLifeDays}`);
|
|
494
|
-
logger.info(`[engram:settle] Has llmCall=${!!opts.llmCall}, has vectorStore=${!!opts.vectorStore}`);
|
|
495
|
-
// Step 1: 结构化短期记忆
|
|
496
|
-
const stepStart1 = Date.now();
|
|
497
|
-
results.push(await structurizeShortTerm(opts, date));
|
|
498
|
-
logger.info(`[engram:settle] Step1 (structurize) took ${Date.now() - stepStart1}ms: ${results[results.length - 1]}`);
|
|
499
|
-
// Step 2: 更新画像(基于新结构化的内容)
|
|
500
|
-
const stepStart2 = Date.now();
|
|
501
|
-
results.push(await updateProfile(opts, date));
|
|
502
|
-
logger.info(`[engram:settle] Step2 (profile update) took ${Date.now() - stepStart2}ms: ${results[results.length - 1]}`);
|
|
503
|
-
// Step 3: 生成画像摘要
|
|
504
|
-
const stepStart3 = Date.now();
|
|
505
|
-
results.push(await generateProfileSummary(opts));
|
|
506
|
-
logger.info(`[engram:settle] Step3 (profile summary) took ${Date.now() - stepStart3}ms: ${results[results.length - 1]}`);
|
|
507
|
-
// 注意:以下步骤已从每日 settle 中移除
|
|
508
|
-
// - extractAndVectorize → 改为月度精选沉淀 (monthlySettle)
|
|
509
|
-
// - archiveShortTerm → 短期记忆文件永久保留,不再归档删除
|
|
510
|
-
// - pruneStaleMemories → 改为月度遗忘 (monthlySettle)
|
|
511
|
-
logger.info(`[engram:settle] ====== Daily Settlement DONE (total ${Date.now() - startTime}ms) ======`);
|
|
512
|
-
return results;
|
|
513
|
-
}
|
|
514
|
-
/**
|
|
515
|
-
* 月度沉淀 + 遗忘
|
|
516
|
-
*
|
|
517
|
-
* 两阶段执行:
|
|
518
|
-
* 1. 精选沉淀:从 recall-hits.json 中筛选 hitCount ≥ minHitCount 的短期记忆,
|
|
519
|
-
* 读取对应文件内容,调用 LLM 精简后写入向量库
|
|
520
|
-
* 2. 月度遗忘:清理向量库中 effective_importance 低于阈值的长期记忆
|
|
521
|
-
*
|
|
522
|
-
* @param opts 沉淀选项(需要 llmCall + vectorStore + vectorPrune)
|
|
523
|
-
* @param minHitCount 最低命中次数,默认 2(被召回 ≥2 次才沉淀)
|
|
524
|
-
*/
|
|
525
|
-
export async function runMonthlySettle(opts, minHitCount = 2) {
|
|
526
|
-
const results = [];
|
|
527
|
-
const startTime = Date.now();
|
|
528
|
-
logger.info(`[engram:settle] ====== Monthly Settlement START ======`);
|
|
529
|
-
logger.info(`[engram:settle] minHitCount=${minHitCount}, has llmCall=${!!opts.llmCall}, has vectorStore=${!!opts.vectorStore}, has vectorPrune=${!!opts.vectorPrune}`);
|
|
530
|
-
// ---- Phase 1: 精选沉淀 ----
|
|
531
|
-
const phase1Start = Date.now();
|
|
532
|
-
const phase1Result = await monthlyExtractAndVectorize(opts, minHitCount);
|
|
533
|
-
results.push(phase1Result);
|
|
534
|
-
logger.info(`[engram:settle] Phase1 (extract) took ${Date.now() - phase1Start}ms: ${phase1Result}`);
|
|
535
|
-
// ---- Phase 2: 月度遗忘 ----
|
|
536
|
-
const phase2Start = Date.now();
|
|
537
|
-
const phase2Result = await pruneStaleMemories(opts);
|
|
538
|
-
results.push(phase2Result);
|
|
539
|
-
logger.info(`[engram:settle] Phase2 (prune) took ${Date.now() - phase2Start}ms: ${phase2Result}`);
|
|
540
|
-
logger.info(`[engram:settle] ====== Monthly Settlement DONE (total ${Date.now() - startTime}ms) ======`);
|
|
541
|
-
return results;
|
|
542
|
-
}
|
|
543
|
-
/**
|
|
544
|
-
* 月度精选沉淀:只沉淀被召回命中 ≥ minHitCount 次的短期记忆
|
|
545
|
-
*
|
|
546
|
-
* 流程:
|
|
547
|
-
* 1. 读取 recall-hits.json,筛选 hitCount >= minHitCount 且 settled === false
|
|
548
|
-
* 2. 对筛选出的文件,读取内容并拼接
|
|
549
|
-
* 3. 调用 LLM 提取值得长期保留的条目(复用 extractAndVectorize 的 prompt)
|
|
550
|
-
* 4. 写入向量库
|
|
551
|
-
* 5. 在 recall-hits.json 中标记 settled: true
|
|
552
|
-
*/
|
|
553
|
-
async function monthlyExtractAndVectorize(opts, minHitCount) {
|
|
554
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: minHitCount=${minHitCount}`);
|
|
555
|
-
if (!opts.llmCall || !opts.vectorStore) {
|
|
556
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: missing llmCall=${!!opts.llmCall} vectorStore=${!!opts.vectorStore}`);
|
|
557
|
-
return "No LLM/vector configured.";
|
|
558
|
-
}
|
|
559
|
-
// 1. 读取 recall-hits.json
|
|
560
|
-
const recallHitsPath = join(opts.workspaceDir, "memory-engram", "recall-hits.json");
|
|
561
|
-
let hits;
|
|
562
|
-
try {
|
|
563
|
-
const raw = await fs.readFile(recallHitsPath, "utf-8");
|
|
564
|
-
const parsed = JSON.parse(raw);
|
|
565
|
-
// 兼容 v1 格式
|
|
566
|
-
hits = {};
|
|
567
|
-
for (const [key, value] of Object.entries(parsed)) {
|
|
568
|
-
if (typeof value === "number") {
|
|
569
|
-
hits[key] = { hitCount: 1, firstHit: value, lastHit: value, settled: false };
|
|
570
|
-
}
|
|
571
|
-
else if (typeof value === "object" && value !== null) {
|
|
572
|
-
hits[key] = value;
|
|
573
|
-
}
|
|
574
|
-
}
|
|
575
|
-
}
|
|
576
|
-
catch {
|
|
577
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: no recall-hits.json, nothing to settle`);
|
|
578
|
-
return "No recall-hits.json found.";
|
|
579
|
-
}
|
|
580
|
-
// 2. 筛选:hitCount >= minHitCount 且 settled === false
|
|
581
|
-
const candidates = Object.entries(hits).filter(([_, entry]) => entry.hitCount >= minHitCount && !entry.settled);
|
|
582
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: ${Object.keys(hits).length} total entries, ${candidates.length} meet criteria (hitCount≥${minHitCount}, not settled)`);
|
|
583
|
-
if (candidates.length === 0) {
|
|
584
|
-
return "No short-term memories meet monthly settle criteria.";
|
|
585
|
-
}
|
|
586
|
-
// 3. 读取对应的短期记忆文件内容
|
|
587
|
-
const shortTermDir = join(opts.workspaceDir, "memory-engram", "short-term");
|
|
588
|
-
const contentParts = [];
|
|
589
|
-
const processedFiles = [];
|
|
590
|
-
for (const [fileName, _hitEntry] of candidates) {
|
|
591
|
-
const filePath = join(shortTermDir, fileName);
|
|
592
|
-
try {
|
|
593
|
-
const content = await fs.readFile(filePath, "utf-8");
|
|
594
|
-
if (content.trim()) {
|
|
595
|
-
contentParts.push(`--- ${fileName} ---\n${content}`);
|
|
596
|
-
processedFiles.push(fileName);
|
|
597
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: loaded ${fileName} (${content.length} chars, hitCount=${_hitEntry.hitCount})`);
|
|
598
|
-
}
|
|
599
|
-
}
|
|
600
|
-
catch {
|
|
601
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: file ${fileName} not found, skipping`);
|
|
602
|
-
}
|
|
603
|
-
}
|
|
604
|
-
if (contentParts.length === 0) {
|
|
605
|
-
return "All candidate files missing or empty.";
|
|
606
|
-
}
|
|
607
|
-
const allContent = contentParts.join("\n\n");
|
|
608
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: total content ${allContent.length} chars from ${processedFiles.length} files`);
|
|
609
|
-
// 4. 调用 LLM 提取值得长期保留的条目
|
|
610
|
-
const systemPrompt = `你是一个信息筛选助手。从以下短期记忆中筛选出值得长期保留的条目。
|
|
611
|
-
|
|
612
|
-
这些短期记忆已经被多次召回命中,说明它们有持续价值。请提炼出核心信息。
|
|
613
|
-
|
|
614
|
-
保留标准:
|
|
615
|
-
- 包含用户偏好、决策、重要事件、具体信息(地点、金额、人名)
|
|
616
|
-
- 包含用户行为模式和生活习惯(作息时间、运动习惯、饮食规律等)
|
|
617
|
-
- 包含技术决策、项目配置、工作流程等持续有用的信息
|
|
618
|
-
- 合并同一主题的多次记录为一条精炼表述
|
|
619
|
-
- 去掉纯闲聊、临时信息、已过时的内容
|
|
620
|
-
|
|
621
|
-
对每条保留的条目,输出 JSON 数组,每条包含:
|
|
622
|
-
- text: 条目摘要(精炼、信息密度高)
|
|
623
|
-
- category: 分类
|
|
624
|
-
- importance: 重要度评分 0-1,标准如下:
|
|
625
|
-
- 0.9-1.0: 关键决策、重要事件(搬家、换工作、关系变化)
|
|
626
|
-
- 0.7-0.8: 明确偏好、项目里程碑、具体计划
|
|
627
|
-
- 0.4-0.6: 一般性讨论、技术问答
|
|
628
|
-
- 0.1-0.3: 日常闲聊、低价值内容
|
|
629
|
-
|
|
630
|
-
只输出 JSON,不要其他内容。`;
|
|
631
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: calling LLM for extraction (${allContent.length} chars)`);
|
|
632
|
-
const result = await opts.llmCall(allContent, systemPrompt);
|
|
633
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: LLM returned ${result.length} chars`);
|
|
634
|
-
let items;
|
|
635
|
-
try {
|
|
636
|
-
const cleaned = cleanLLMJson(result);
|
|
637
|
-
items = JSON.parse(cleaned);
|
|
638
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: parsed ${items.length} candidate items`);
|
|
639
|
-
}
|
|
640
|
-
catch (parseErr) {
|
|
641
|
-
logger.error(`[engram:settle] monthlyExtractAndVectorize: JSON parse failed: ${parseErr}, raw="${result.slice(0, 200)}"`);
|
|
642
|
-
return `Monthly settle skipped: JSON parse failed`;
|
|
643
|
-
}
|
|
644
|
-
// 5. 写入向量库
|
|
645
|
-
let stored = 0;
|
|
646
|
-
let duplicates = 0;
|
|
647
|
-
let skippedInvalid = 0;
|
|
648
|
-
for (const item of items) {
|
|
649
|
-
if (item.text && item.category) {
|
|
650
|
-
const imp = typeof item.importance === "number" ? item.importance : 0.5;
|
|
651
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: vectorizing [${item.category}] importance=${imp.toFixed(2)} "${item.text.slice(0, 60)}"`);
|
|
652
|
-
const storeResult = await opts.vectorStore(item.text, item.category, imp);
|
|
653
|
-
if (storeResult === "created")
|
|
654
|
-
stored++;
|
|
655
|
-
else
|
|
656
|
-
duplicates++;
|
|
657
|
-
}
|
|
658
|
-
else {
|
|
659
|
-
skippedInvalid++;
|
|
660
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: skipping invalid item: ${JSON.stringify(item).slice(0, 100)}`);
|
|
661
|
-
}
|
|
662
|
-
}
|
|
663
|
-
// 6. 标记 settled: true
|
|
664
|
-
for (const fileName of processedFiles) {
|
|
665
|
-
if (hits[fileName]) {
|
|
666
|
-
hits[fileName].settled = true;
|
|
667
|
-
}
|
|
668
|
-
}
|
|
669
|
-
await fs.writeFile(recallHitsPath, JSON.stringify(hits, null, 2), "utf-8");
|
|
670
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: marked ${processedFiles.length} files as settled`);
|
|
671
|
-
const summary = `Monthly settle: ${stored} items stored (${duplicates} dup, ${skippedInvalid} invalid) from ${processedFiles.length} files.`;
|
|
672
|
-
logger.info(`[engram:settle] monthlyExtractAndVectorize: done. ${summary}`);
|
|
673
|
-
return summary;
|
|
674
|
-
}
|
|
675
|
-
//# sourceMappingURL=settle.js.map
|