@scotthuang/engram 0.5.8 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -0
- package/dist/face/src/face-api.d.ts +36 -0
- package/dist/face/src/face-api.js +90 -0
- package/dist/face/src/face-api.js.map +1 -0
- package/dist/face/src/face-store.d.ts +101 -0
- package/dist/face/src/face-store.js +361 -0
- package/dist/face/src/face-store.js.map +1 -0
- package/dist/index.js +11 -8
- package/dist/index.js.map +1 -1
- package/dist/src/__tests__/bm25.test.d.ts +1 -0
- package/dist/src/__tests__/bm25.test.js +198 -0
- package/dist/src/__tests__/bm25.test.js.map +1 -0
- package/dist/src/__tests__/config.test.d.ts +1 -0
- package/dist/src/__tests__/config.test.js +31 -0
- package/dist/src/__tests__/config.test.js.map +1 -0
- package/dist/src/__tests__/profile.test.d.ts +1 -0
- package/dist/src/__tests__/profile.test.js +130 -0
- package/dist/src/__tests__/profile.test.js.map +1 -0
- package/dist/src/__tests__/recall.test.d.ts +1 -0
- package/dist/src/__tests__/recall.test.js +162 -0
- package/dist/src/__tests__/recall.test.js.map +1 -0
- package/dist/src/bm25.d.ts +60 -0
- package/dist/src/bm25.js +271 -0
- package/dist/src/bm25.js.map +1 -0
- package/dist/src/config.d.ts +47 -0
- package/dist/src/config.js +83 -0
- package/dist/src/config.js.map +1 -0
- package/dist/src/image-store.d.ts +146 -0
- package/dist/src/image-store.js +418 -0
- package/dist/src/image-store.js.map +1 -0
- package/dist/src/index.d.ts +7 -0
- package/dist/src/index.js +1236 -0
- package/dist/src/index.js.map +1 -0
- package/dist/src/logger.d.ts +32 -0
- package/dist/src/logger.js +106 -0
- package/dist/src/logger.js.map +1 -0
- package/dist/src/profile.d.ts +37 -0
- package/dist/src/profile.js +107 -0
- package/dist/src/profile.js.map +1 -0
- package/dist/src/recall.d.ts +98 -0
- package/dist/src/recall.js +729 -0
- package/dist/src/recall.js.map +1 -0
- package/dist/src/settle.d.ts +83 -0
- package/dist/src/settle.js +687 -0
- package/dist/src/settle.js.map +1 -0
- package/dist/src/vector.d.ts +67 -0
- package/dist/src/vector.js +284 -0
- package/dist/src/vector.js.map +1 -0
- package/package.json +3 -3
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import { describe, it, expect, beforeEach } from "vitest";
|
|
2
|
+
import { ProfileManager, EMPTY_PROFILE } from "../profile.js";
|
|
3
|
+
import { promises as fs } from "node:fs";
|
|
4
|
+
import { join } from "node:path";
|
|
5
|
+
import { tmpdir } from "node:os";
|
|
6
|
+
describe("ProfileManager", () => {
|
|
7
|
+
let manager;
|
|
8
|
+
let tempDir;
|
|
9
|
+
beforeEach(async () => {
|
|
10
|
+
tempDir = join(tmpdir(), `engram-test-${Date.now()}`);
|
|
11
|
+
await fs.mkdir(tempDir, { recursive: true });
|
|
12
|
+
manager = new ProfileManager(tempDir);
|
|
13
|
+
});
|
|
14
|
+
describe("load", () => {
|
|
15
|
+
it("returns empty profile when no file exists", async () => {
|
|
16
|
+
const profile = await manager.load();
|
|
17
|
+
expect(profile.summary).toBe("");
|
|
18
|
+
expect(profile.coreTags).toEqual([]);
|
|
19
|
+
expect(profile.tags).toEqual({});
|
|
20
|
+
});
|
|
21
|
+
it("loads existing profile from file", async () => {
|
|
22
|
+
const profileDir = join(tempDir, "memory", "profile");
|
|
23
|
+
await fs.mkdir(profileDir, { recursive: true });
|
|
24
|
+
const saved = { ...EMPTY_PROFILE, summary: "test summary", coreTags: ["tag1"] };
|
|
25
|
+
await fs.writeFile(join(profileDir, "semantic_profile.json"), JSON.stringify(saved));
|
|
26
|
+
// 创建新的 manager 实例测试从文件加载
|
|
27
|
+
const manager2 = new ProfileManager(tempDir);
|
|
28
|
+
const loaded = await manager2.load();
|
|
29
|
+
expect(loaded.summary).toBe("test summary");
|
|
30
|
+
expect(loaded.coreTags).toEqual(["tag1"]);
|
|
31
|
+
});
|
|
32
|
+
});
|
|
33
|
+
describe("addTag", () => {
|
|
34
|
+
it("adds a new tag to a new dimension", async () => {
|
|
35
|
+
const profile = { ...EMPTY_PROFILE };
|
|
36
|
+
const result = manager.addTag(profile, "口味偏好", "喜欢辣");
|
|
37
|
+
expect(result.tags["口味偏好"]).toHaveLength(1);
|
|
38
|
+
expect(result.tags["口味偏好"][0].value).toBe("喜欢辣");
|
|
39
|
+
expect(result.tags["口味偏好"][0].confidence).toBe(0.7);
|
|
40
|
+
});
|
|
41
|
+
it("increases confidence for existing tag", async () => {
|
|
42
|
+
const profile = {
|
|
43
|
+
...EMPTY_PROFILE,
|
|
44
|
+
tags: { 口味偏好: [{ value: "喜欢辣", confidence: 0.5, lastSeen: "2026-01-01" }] },
|
|
45
|
+
};
|
|
46
|
+
const result = manager.addTag(profile, "口味偏好", "喜欢辣");
|
|
47
|
+
expect(result.tags["口味偏好"]).toHaveLength(1);
|
|
48
|
+
expect(result.tags["口味偏好"][0].confidence).toBeCloseTo(0.6);
|
|
49
|
+
});
|
|
50
|
+
it("adds multiple tags to same dimension", async () => {
|
|
51
|
+
const profile = { ...EMPTY_PROFILE };
|
|
52
|
+
manager.addTag(profile, "口味偏好", "喜欢辣");
|
|
53
|
+
manager.addTag(profile, "口味偏好", "不吃香菜");
|
|
54
|
+
expect(profile.tags["口味偏好"]).toHaveLength(2);
|
|
55
|
+
});
|
|
56
|
+
});
|
|
57
|
+
describe("decayTags", () => {
|
|
58
|
+
it("reduces confidence of all tags", () => {
|
|
59
|
+
const profile = {
|
|
60
|
+
...EMPTY_PROFILE,
|
|
61
|
+
tags: {
|
|
62
|
+
口味偏好: [
|
|
63
|
+
{ value: "喜欢辣", confidence: 0.9, lastSeen: "2026-03-17" },
|
|
64
|
+
],
|
|
65
|
+
},
|
|
66
|
+
};
|
|
67
|
+
const result = manager.decayTags(profile, 0.5);
|
|
68
|
+
expect(result.tags["口味偏好"][0].confidence).toBeCloseTo(0.45);
|
|
69
|
+
});
|
|
70
|
+
it("removes tags below threshold", () => {
|
|
71
|
+
const profile = {
|
|
72
|
+
...EMPTY_PROFILE,
|
|
73
|
+
tags: {
|
|
74
|
+
过时: [{ value: "旧标签", confidence: 0.15, lastSeen: "2026-01-01" }],
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
const result = manager.decayTags(profile, 1.0);
|
|
78
|
+
expect(result.tags["过时"]).toBeUndefined();
|
|
79
|
+
});
|
|
80
|
+
it("removes empty dimensions after filtering", () => {
|
|
81
|
+
const profile = {
|
|
82
|
+
...EMPTY_PROFILE,
|
|
83
|
+
tags: {
|
|
84
|
+
空维度: [{ value: "很低", confidence: 0.1, lastSeen: "2026-01-01" }],
|
|
85
|
+
},
|
|
86
|
+
};
|
|
87
|
+
const result = manager.decayTags(profile, 1.0);
|
|
88
|
+
expect("空维度" in result.tags).toBe(false);
|
|
89
|
+
});
|
|
90
|
+
});
|
|
91
|
+
describe("getRecallContext", () => {
|
|
92
|
+
it("returns empty string for empty profile", () => {
|
|
93
|
+
const ctx = manager.getRecallContext(EMPTY_PROFILE);
|
|
94
|
+
expect(ctx).toBe("");
|
|
95
|
+
});
|
|
96
|
+
it("returns summary and core tags", () => {
|
|
97
|
+
const profile = { ...EMPTY_PROFILE, summary: "辣味中餐爱好者", coreTags: ["辣味中餐", "天河"] };
|
|
98
|
+
const ctx = manager.getRecallContext(profile);
|
|
99
|
+
expect(ctx).toContain("辣味中餐爱好者");
|
|
100
|
+
expect(ctx).toContain("辣味中餐");
|
|
101
|
+
expect(ctx).toContain("天河");
|
|
102
|
+
});
|
|
103
|
+
it("works with only core tags", () => {
|
|
104
|
+
const profile = { ...EMPTY_PROFILE, summary: "", coreTags: ["标签1"] };
|
|
105
|
+
const ctx = manager.getRecallContext(profile);
|
|
106
|
+
expect(ctx).toContain("标签1");
|
|
107
|
+
});
|
|
108
|
+
});
|
|
109
|
+
describe("save", () => {
|
|
110
|
+
it("saves profile to file", async () => {
|
|
111
|
+
const profile = { ...EMPTY_PROFILE, summary: "test" };
|
|
112
|
+
await manager.save(profile);
|
|
113
|
+
const raw = await fs.readFile(join(tempDir, "memory", "profile", "semantic_profile.json"), "utf-8");
|
|
114
|
+
const loaded = JSON.parse(raw);
|
|
115
|
+
expect(loaded.summary).toBe("test");
|
|
116
|
+
});
|
|
117
|
+
it("updates updatedAt on save", async () => {
|
|
118
|
+
const profile = { ...EMPTY_PROFILE };
|
|
119
|
+
const before = new Date();
|
|
120
|
+
await manager.save(profile);
|
|
121
|
+
const after = new Date();
|
|
122
|
+
const raw = await fs.readFile(join(tempDir, "memory", "profile", "semantic_profile.json"), "utf-8");
|
|
123
|
+
const loaded = JSON.parse(raw);
|
|
124
|
+
const updated = new Date(loaded.updatedAt);
|
|
125
|
+
expect(updated.getTime()).toBeGreaterThanOrEqual(before.getTime());
|
|
126
|
+
expect(updated.getTime()).toBeLessThanOrEqual(after.getTime());
|
|
127
|
+
});
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
//# sourceMappingURL=profile.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"profile.test.js","sourceRoot":"","sources":["../../../src/__tests__/profile.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,QAAQ,CAAC;AAC1D,OAAO,EAAE,cAAc,EAAE,aAAa,EAAE,MAAM,eAAe,CAAC;AAC9D,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,MAAM,EAAE,MAAM,SAAS,CAAC;AAEjC,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,IAAI,OAAuB,CAAC;IAC5B,IAAI,OAAe,CAAC;IAEpB,UAAU,CAAC,KAAK,IAAI,EAAE;QACpB,OAAO,GAAG,IAAI,CAAC,MAAM,EAAE,EAAE,eAAe,IAAI,CAAC,GAAG,EAAE,EAAE,CAAC,CAAC;QACtD,MAAM,EAAE,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAC7C,OAAO,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;IACxC,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,MAAM,EAAE,GAAG,EAAE;QACpB,EAAE,CAAC,2CAA2C,EAAE,KAAK,IAAI,EAAE;YACzD,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,IAAI,EAAE,CAAC;YACrC,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;YACjC,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YACrC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,kCAAkC,EAAE,KAAK,IAAI,EAAE;YAChD,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,CAAC,CAAC;YACtD,MAAM,EAAE,CAAC,KAAK,CAAC,UAAU,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAChD,MAAM,KAAK,GAAG,EAAE,GAAG,aAAa,EAAE,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,MAAM,CAAC,EAAE,CAAC;YAChF,MAAM,EAAE,CAAC,SAAS,CAAC,IAAI,CAAC,UAAU,EAAE,uBAAuB,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;YAErF,yBAAyB;YACzB,MAAM,QAAQ,GAAG,IAAI,cAAc,CAAC,OAAO,CAAC,CAAC;YAC7C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACrC,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;YAC5C,MAAM,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,QAAQ,EAAE,GAAG,EAAE;QACtB,EAAE,CAAC,mCAAmC,EAAE,KAAK,IAAI,EAAE;YACjD,MAAM,OAAO,GAAG,EAAE,GAAG,aAAa,EAAE,CAAC;YACrC,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;YACtD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACtD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uCAAuC,EAAE,KAAK,IAAI,EAAE;YACrD,MAAM,OAAO,GAAQ;gBACnB,GAAG,aAAa;gBAChB,IAAI,EAAE,EAAE,IAAI,EAAE,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC,EAAE;aAC5E,CAAC;YACF,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;YACtD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAC5C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QAC7D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,sCAAsC,EAAE,KAAK,IAAI,EAAE;YACpD,MAAM,OAAO,GAAG,EAAE,GAAG,aAAa,EAAE,CAAC;YACrC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,KAAK,CAAC,CAAC;YACvC,OAAO,CAAC,MAAM,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC;YACxC,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC/C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,WAAW,EAAE,GAAG,EAAE;QACzB,EAAE,CAAC,gCAAgC,EAAE,GAAG,EAAE;YACxC,MAAM,OAAO,GAAQ;gBACnB,GAAG,aAAa;gBAChB,IAAI,EAAE;oBACJ,IAAI,EAAE;wBACJ,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,EAAE;qBAC1D;iBACF;aACF,CAAC;YACF,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAC/C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC;QAC9D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8BAA8B,EAAE,GAAG,EAAE;YACtC,MAAM,OAAO,GAAQ;gBACnB,GAAG,aAAa;gBAChB,IAAI,EAAE;oBACJ,EAAE,EAAE,CAAC,EAAE,KAAK,EAAE,KAAK,EAAE,UAAU,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;iBACjE;aACF,CAAC;YACF,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAC/C,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,aAAa,EAAE,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,MAAM,OAAO,GAAQ;gBACnB,GAAG,aAAa;gBAChB,IAAI,EAAE;oBACJ,GAAG,EAAE,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,QAAQ,EAAE,YAAY,EAAE,CAAC;iBAChE;aACF,CAAC;YACF,MAAM,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC;YAC/C,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,kBAAkB,EAAE,GAAG,EAAE;QAChC,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;YAChD,MAAM,GAAG,GAAG,OAAO,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;YACpD,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACvB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,OAAO,GAAG,EAAE,GAAG,aAAa,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC,MAAM,EAAE,IAAI,CAAC,EAAE,CAAC;YACnF,MAAM,GAAG,GAAG,OAAO,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAC9C,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,SAAS,CAAC,CAAC;YACjC,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YAC9B,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACnC,MAAM,OAAO,GAAG,EAAE,GAAG,aAAa,EAAE,OAAO,EAAE,EAAE,EAAE,QAAQ,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC;YACrE,MAAM,GAAG,GAAG,OAAO,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;YAC9C,MAAM,CAAC,GAAG,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC/B,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,MAAM,EAAE,GAAG,EAAE;QACpB,EAAE,CAAC,uBAAuB,EAAE,KAAK,IAAI,EAAE;YACrC,MAAM,OAAO,GAAG,EAAE,GAAG,aAAa,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC;YACtD,MAAM,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAE5B,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,uBAAuB,CAAC,EAAE,OAAO,CAAC,CAAC;YACpG,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC/B,MAAM,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2BAA2B,EAAE,KAAK,IAAI,EAAE;YACzC,MAAM,OAAO,GAAG,EAAE,GAAG,aAAa,EAAE,CAAC;YACrC,MAAM,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YAC1B,MAAM,OAAO,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YAC5B,MAAM,KAAK,GAAG,IAAI,IAAI,EAAE,CAAC;YAEzB,MAAM,GAAG,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,uBAAuB,CAAC,EAAE,OAAO,CAAC,CAAC;YACpG,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YAC/B,MAAM,OAAO,GAAG,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;YAC3C,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,sBAAsB,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YACnE,MAAM,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC,CAAC,mBAAmB,CAAC,KAAK,CAAC,OAAO,EAAE,CAAC,CAAC;QACjE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
// 纯函数测试,不依赖外部模块
|
|
3
|
+
describe("Recall helpers", () => {
|
|
4
|
+
describe("temporalDecay", () => {
|
|
5
|
+
function temporalDecay(score, ageInDays, halfLifeDays) {
|
|
6
|
+
if (ageInDays <= 0)
|
|
7
|
+
return score;
|
|
8
|
+
const lambda = Math.log(2) / halfLifeDays;
|
|
9
|
+
return score * Math.exp(-lambda * ageInDays);
|
|
10
|
+
}
|
|
11
|
+
it("returns original score for 0 days", () => {
|
|
12
|
+
expect(temporalDecay(1.0, 0, 30)).toBeCloseTo(1.0);
|
|
13
|
+
});
|
|
14
|
+
it("returns ~50% at half-life", () => {
|
|
15
|
+
expect(temporalDecay(1.0, 30, 30)).toBeCloseTo(0.5, 1);
|
|
16
|
+
});
|
|
17
|
+
it("returns ~84% at 7 days with 30-day half-life", () => {
|
|
18
|
+
expect(temporalDecay(1.0, 7, 30)).toBeCloseTo(0.846, 2);
|
|
19
|
+
});
|
|
20
|
+
it("returns ~12.5% at 90 days with 30-day half-life", () => {
|
|
21
|
+
expect(temporalDecay(1.0, 90, 30)).toBeCloseTo(0.125, 2);
|
|
22
|
+
});
|
|
23
|
+
it("shorter half-life decays faster", () => {
|
|
24
|
+
const short = temporalDecay(1.0, 7, 7);
|
|
25
|
+
const long = temporalDecay(1.0, 7, 30);
|
|
26
|
+
expect(short).toBeLessThan(long);
|
|
27
|
+
});
|
|
28
|
+
});
|
|
29
|
+
describe("jaccard", () => {
|
|
30
|
+
function jaccard(a, b) {
|
|
31
|
+
const setA = new Set(a.split(""));
|
|
32
|
+
const setB = new Set(b.split(""));
|
|
33
|
+
const intersection = new Set([...setA].filter(x => setB.has(x)));
|
|
34
|
+
const union = new Set([...setA, ...setB]);
|
|
35
|
+
return union.size === 0 ? 0 : intersection.size / union.size;
|
|
36
|
+
}
|
|
37
|
+
it("returns 1.0 for identical strings", () => {
|
|
38
|
+
expect(jaccard("abc", "abc")).toBeCloseTo(1.0);
|
|
39
|
+
});
|
|
40
|
+
it("returns 0.0 for completely different strings", () => {
|
|
41
|
+
expect(jaccard("abc", "xyz")).toBeCloseTo(0.0);
|
|
42
|
+
});
|
|
43
|
+
it("returns partial similarity for overlapping strings", () => {
|
|
44
|
+
const sim = jaccard("abc", "abd");
|
|
45
|
+
expect(sim).toBeGreaterThan(0);
|
|
46
|
+
expect(sim).toBeLessThan(1);
|
|
47
|
+
});
|
|
48
|
+
it("handles empty strings", () => {
|
|
49
|
+
expect(jaccard("", "")).toBe(0);
|
|
50
|
+
expect(jaccard("abc", "")).toBe(0);
|
|
51
|
+
});
|
|
52
|
+
});
|
|
53
|
+
describe("MMR rerank", () => {
|
|
54
|
+
function mmrRerank(candidates, lambda = 0.7) {
|
|
55
|
+
if (candidates.length <= 1)
|
|
56
|
+
return candidates;
|
|
57
|
+
const jaccard = (a, b) => {
|
|
58
|
+
const setA = new Set(a.split(""));
|
|
59
|
+
const setB = new Set(b.split(""));
|
|
60
|
+
const intersection = new Set([...setA].filter(x => setB.has(x)));
|
|
61
|
+
const union = new Set([...setA, ...setB]);
|
|
62
|
+
return union.size === 0 ? 0 : intersection.size / union.size;
|
|
63
|
+
};
|
|
64
|
+
const selected = [];
|
|
65
|
+
const remaining = [...candidates];
|
|
66
|
+
remaining.sort((a, b) => b.finalScore - a.finalScore);
|
|
67
|
+
selected.push(remaining.shift());
|
|
68
|
+
while (remaining.length > 0) {
|
|
69
|
+
let bestIdx = -1;
|
|
70
|
+
let bestMmr = -Infinity;
|
|
71
|
+
for (let i = 0; i < remaining.length; i++) {
|
|
72
|
+
const relevance = remaining[i].finalScore;
|
|
73
|
+
const maxSim = Math.max(...selected.map(s => jaccard(remaining[i].text, s.text)));
|
|
74
|
+
const mmrScore = lambda * relevance - (1 - lambda) * maxSim;
|
|
75
|
+
if (mmrScore > bestMmr) {
|
|
76
|
+
bestMmr = mmrScore;
|
|
77
|
+
bestIdx = i;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
if (bestIdx >= 0) {
|
|
81
|
+
selected.push(remaining.splice(bestIdx, 1)[0]);
|
|
82
|
+
}
|
|
83
|
+
else
|
|
84
|
+
break;
|
|
85
|
+
}
|
|
86
|
+
return selected;
|
|
87
|
+
}
|
|
88
|
+
it("returns single item as-is", () => {
|
|
89
|
+
const candidates = [{ text: "hello", finalScore: 0.9 }];
|
|
90
|
+
expect(mmrRerank(candidates)).toHaveLength(1);
|
|
91
|
+
});
|
|
92
|
+
it("returns empty for empty input", () => {
|
|
93
|
+
expect(mmrRerank([])).toHaveLength(0);
|
|
94
|
+
});
|
|
95
|
+
it("promotes diverse results over duplicates", () => {
|
|
96
|
+
const candidates = [
|
|
97
|
+
{ text: "在体育西吃了潮汕牛肉火锅", finalScore: 0.92 },
|
|
98
|
+
{ text: "在天河吃了潮汕牛肉火锅", finalScore: 0.89 },
|
|
99
|
+
{ text: "在越秀吃了日本料理", finalScore: 0.70 },
|
|
100
|
+
];
|
|
101
|
+
const reranked = mmrRerank(candidates, 0.7);
|
|
102
|
+
expect(reranked).toHaveLength(3);
|
|
103
|
+
// 第一条应该还是分数最高的
|
|
104
|
+
expect(reranked[0].finalScore).toBe(0.92);
|
|
105
|
+
});
|
|
106
|
+
it("lambda=1.0 is pure relevance (no diversity)", () => {
|
|
107
|
+
const candidates = [
|
|
108
|
+
{ text: "aaa", finalScore: 0.9 },
|
|
109
|
+
{ text: "aaa", finalScore: 0.8 },
|
|
110
|
+
{ text: "bbb", finalScore: 0.5 },
|
|
111
|
+
];
|
|
112
|
+
const reranked = mmrRerank(candidates, 1.0);
|
|
113
|
+
expect(reranked.map(r => r.finalScore)).toEqual([0.9, 0.8, 0.5]);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
describe("dedup", () => {
|
|
117
|
+
function jaccard(a, b) {
|
|
118
|
+
const setA = new Set(a.split(""));
|
|
119
|
+
const setB = new Set(b.split(""));
|
|
120
|
+
const intersection = new Set([...setA].filter(x => setB.has(x)));
|
|
121
|
+
const union = new Set([...setA, ...setB]);
|
|
122
|
+
return union.size === 0 ? 0 : intersection.size / union.size;
|
|
123
|
+
}
|
|
124
|
+
function dedup(results) {
|
|
125
|
+
const sorted = [...results].sort((a, b) => b.score - a.score);
|
|
126
|
+
const unique = [];
|
|
127
|
+
for (const item of sorted) {
|
|
128
|
+
const isDup = unique.some(u => jaccard(item.text, u.text) > 0.7);
|
|
129
|
+
if (!isDup)
|
|
130
|
+
unique.push(item);
|
|
131
|
+
}
|
|
132
|
+
return unique;
|
|
133
|
+
}
|
|
134
|
+
it("removes near-duplicates", () => {
|
|
135
|
+
const results = [
|
|
136
|
+
{ text: "在体育西吃了潮汕牛肉火锅", score: 0.92 },
|
|
137
|
+
{ text: "在体育西吃潮汕牛肉火锅", score: 0.85 },
|
|
138
|
+
{ text: "在天河城看到特斯拉展厅", score: 0.70 },
|
|
139
|
+
];
|
|
140
|
+
const deduped = dedup(results);
|
|
141
|
+
expect(deduped.length).toBeLessThan(3);
|
|
142
|
+
});
|
|
143
|
+
it("keeps distinct results", () => {
|
|
144
|
+
const results = [
|
|
145
|
+
{ text: "今天天气很好", score: 0.9 },
|
|
146
|
+
{ text: "股票涨了", score: 0.8 },
|
|
147
|
+
{ text: "新开了家餐厅", score: 0.7 },
|
|
148
|
+
];
|
|
149
|
+
expect(dedup(results)).toHaveLength(3);
|
|
150
|
+
});
|
|
151
|
+
it("keeps higher-scored duplicate", () => {
|
|
152
|
+
const results = [
|
|
153
|
+
{ text: "abcde fghij", score: 0.9 },
|
|
154
|
+
{ text: "abcde fghij", score: 0.7 },
|
|
155
|
+
];
|
|
156
|
+
const deduped = dedup(results);
|
|
157
|
+
expect(deduped).toHaveLength(1);
|
|
158
|
+
expect(deduped[0].score).toBe(0.9);
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
//# sourceMappingURL=recall.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recall.test.js","sourceRoot":"","sources":["../../../src/__tests__/recall.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAE9C,gBAAgB;AAChB,QAAQ,CAAC,gBAAgB,EAAE,GAAG,EAAE;IAC9B,QAAQ,CAAC,eAAe,EAAE,GAAG,EAAE;QAC7B,SAAS,aAAa,CAAC,KAAa,EAAE,SAAiB,EAAE,YAAoB;YAC3E,IAAI,SAAS,IAAI,CAAC;gBAAE,OAAO,KAAK,CAAC;YACjC,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,YAAY,CAAC;YAC1C,OAAO,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,MAAM,GAAG,SAAS,CAAC,CAAC;QAC/C,CAAC;QAED,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,CAAC,aAAa,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QACrD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACnC,MAAM,CAAC,aAAa,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC;QACzD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;YACtD,MAAM,CAAC,aAAa,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC1D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;YACzD,MAAM,CAAC,aAAa,CAAC,GAAG,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC;QAC3D,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,iCAAiC,EAAE,GAAG,EAAE;YACzC,MAAM,KAAK,GAAG,aAAa,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;YACvC,MAAM,IAAI,GAAG,aAAa,CAAC,GAAG,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC;YACvC,MAAM,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QACnC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,SAAS,EAAE,GAAG,EAAE;QACvB,SAAS,OAAO,CAAC,CAAS,EAAE,CAAS;YACnC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;YAClC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;YAClC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC;YAC1C,OAAO,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;QAC/D,CAAC;QAED,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;YAC3C,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,8CAA8C,EAAE,GAAG,EAAE;YACtD,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;QACjD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,oDAAoD,EAAE,GAAG,EAAE;YAC5D,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;YAClC,MAAM,CAAC,GAAG,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,CAAC;YAC/B,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC9B,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,uBAAuB,EAAE,GAAG,EAAE;YAC/B,MAAM,CAAC,OAAO,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,YAAY,EAAE,GAAG,EAAE;QAC1B,SAAS,SAAS,CAChB,UAAuD,EACvD,SAAiB,GAAG;YAEpB,IAAI,UAAU,CAAC,MAAM,IAAI,CAAC;gBAAE,OAAO,UAAU,CAAC;YAE9C,MAAM,OAAO,GAAG,CAAC,CAAS,EAAE,CAAS,EAAU,EAAE;gBAC/C,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;gBAClC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;gBAClC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACjE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC;gBAC1C,OAAO,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;YAC/D,CAAC,CAAC;YAEF,MAAM,QAAQ,GAAsB,EAAE,CAAC;YACvC,MAAM,SAAS,GAAG,CAAC,GAAG,UAAU,CAAC,CAAC;YAElC,SAAS,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,GAAG,CAAC,CAAC,UAAU,CAAC,CAAC;YACtD,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,EAAG,CAAC,CAAC;YAElC,OAAO,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5B,IAAI,OAAO,GAAG,CAAC,CAAC,CAAC;gBACjB,IAAI,OAAO,GAAG,CAAC,QAAQ,CAAC;gBACxB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC1C,MAAM,SAAS,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC;oBAC1C,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;oBAClF,MAAM,QAAQ,GAAG,MAAM,GAAG,SAAS,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,GAAG,MAAM,CAAC;oBAC5D,IAAI,QAAQ,GAAG,OAAO,EAAE,CAAC;wBACvB,OAAO,GAAG,QAAQ,CAAC;wBACnB,OAAO,GAAG,CAAC,CAAC;oBACd,CAAC;gBACH,CAAC;gBACD,IAAI,OAAO,IAAI,CAAC,EAAE,CAAC;oBACjB,QAAQ,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACjD,CAAC;;oBAAM,MAAM;YACf,CAAC;YACD,OAAO,QAAQ,CAAC;QAClB,CAAC;QAED,EAAE,CAAC,2BAA2B,EAAE,GAAG,EAAE;YACnC,MAAM,UAAU,GAAG,CAAC,EAAE,IAAI,EAAE,OAAO,EAAE,UAAU,EAAE,GAAG,EAAE,CAAC,CAAC;YACxD,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACxC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;YAClD,MAAM,UAAU,GAAG;gBACjB,EAAE,IAAI,EAAE,cAAc,EAAE,UAAU,EAAE,IAAI,EAAE;gBAC1C,EAAE,IAAI,EAAE,aAAa,EAAE,UAAU,EAAE,IAAI,EAAE;gBACzC,EAAE,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,EAAE;aACxC,CAAC;YAEF,MAAM,QAAQ,GAAG,SAAS,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;YAC5C,MAAM,CAAC,QAAQ,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YACjC,eAAe;YACf,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC5C,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,6CAA6C,EAAE,GAAG,EAAE;YACrD,MAAM,UAAU,GAAG;gBACjB,EAAE,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE;gBAChC,EAAE,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE;gBAChC,EAAE,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE,GAAG,EAAE;aACjC,CAAC;YAEF,MAAM,QAAQ,GAAG,SAAS,CAAC,UAAU,EAAE,GAAG,CAAC,CAAC;YAC5C,MAAM,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC;QACnE,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;IAEH,QAAQ,CAAC,OAAO,EAAE,GAAG,EAAE;QACrB,SAAS,OAAO,CAAC,CAAS,EAAE,CAAS;YACnC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;YAClC,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC;YAClC,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,IAAI,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC;YAC1C,OAAO,KAAK,CAAC,IAAI,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;QAC/D,CAAC;QAED,SAAS,KAAK,CAAC,OAA+C;YAC5D,MAAM,MAAM,GAAG,CAAC,GAAG,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;YAC9D,MAAM,MAAM,GAAmB,EAAE,CAAC;YAClC,KAAK,MAAM,IAAI,IAAI,MAAM,EAAE,CAAC;gBAC1B,MAAM,KAAK,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,CAAC;gBACjE,IAAI,CAAC,KAAK;oBAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAChC,CAAC;YACD,OAAO,MAAM,CAAC;QAChB,CAAC;QAED,EAAE,CAAC,yBAAyB,EAAE,GAAG,EAAE;YACjC,MAAM,OAAO,GAAG;gBACd,EAAE,IAAI,EAAE,cAAc,EAAE,KAAK,EAAE,IAAI,EAAE;gBACrC,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE;gBACpC,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE;aACrC,CAAC;YACF,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,MAAM,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,wBAAwB,EAAE,GAAG,EAAE;YAChC,MAAM,OAAO,GAAG;gBACd,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE;gBAC9B,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,GAAG,EAAE;gBAC5B,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE;aAC/B,CAAC;YACF,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QACzC,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,+BAA+B,EAAE,GAAG,EAAE;YACvC,MAAM,OAAO,GAAG;gBACd,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,GAAG,EAAE;gBACnC,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,GAAG,EAAE;aACpC,CAAC;YACF,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/B,MAAM,CAAC,OAAO,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;YAChC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;IACL,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory System Plugin - BM25 Short-term Memory Index
|
|
3
|
+
*
|
|
4
|
+
* 短期记忆的 BM25 索引构建与搜索
|
|
5
|
+
* 纯 TypeScript 实现,不依赖外部 BM25 库
|
|
6
|
+
*/
|
|
7
|
+
export type ShortTermEntry = {
|
|
8
|
+
text: string;
|
|
9
|
+
date: string;
|
|
10
|
+
category: string;
|
|
11
|
+
filePath: string;
|
|
12
|
+
};
|
|
13
|
+
export declare function initJieba(): Promise<void>;
|
|
14
|
+
export declare function tokenize(text: string): string[];
|
|
15
|
+
export declare class BM25Index {
|
|
16
|
+
private docs;
|
|
17
|
+
private idf;
|
|
18
|
+
private avgDl;
|
|
19
|
+
/**
|
|
20
|
+
* 从短期记忆文件构建索引
|
|
21
|
+
* v0.5: 全量索引所有 .md 文件(不再按 maxAgeDays 截断)
|
|
22
|
+
* maxAgeDays 参数保留但仅用于向下兼容(不再生效)
|
|
23
|
+
*/
|
|
24
|
+
buildFromDirectory(dir: string, _maxAgeDays?: number): Promise<void>;
|
|
25
|
+
/**
|
|
26
|
+
* 解析结构化的短期记忆文件
|
|
27
|
+
* 格式:### HH:MM [分类标签]\n摘要内容
|
|
28
|
+
*
|
|
29
|
+
* 特殊处理 [对话记录]: 按 Scott / Shadow 切分每一轮对话
|
|
30
|
+
* 支持两种格式:
|
|
31
|
+
* - 旧格式: Scott: / Shadow:
|
|
32
|
+
* - 新格式: Scott [MM-DD HH:MM]: / Shadow [MM-DD HH:MM]:
|
|
33
|
+
* 每一轮单独做一个 BM25 文档,精准召回不浪费
|
|
34
|
+
*/
|
|
35
|
+
private parseEntries;
|
|
36
|
+
/**
|
|
37
|
+
* 处理文本块,支持三种格式:
|
|
38
|
+
* 1. 精简格式(每行 | 分隔)
|
|
39
|
+
* 2. 对话记录(Scott/Shadow 切分)
|
|
40
|
+
* 3. 普通文本(整块索引)
|
|
41
|
+
*/
|
|
42
|
+
private processTextBlock;
|
|
43
|
+
/**
|
|
44
|
+
* 构建 IDF(逆文档频率)
|
|
45
|
+
*/
|
|
46
|
+
private buildIDF;
|
|
47
|
+
/**
|
|
48
|
+
* BM25 搜索
|
|
49
|
+
* @returns 带分数的结果列表
|
|
50
|
+
*/
|
|
51
|
+
search(query: string, topK?: number): Promise<Array<{
|
|
52
|
+
entry: ShortTermEntry;
|
|
53
|
+
score: number;
|
|
54
|
+
}>>;
|
|
55
|
+
/**
|
|
56
|
+
* 添加单条记录(用于实时写入后的即时索引)
|
|
57
|
+
*/
|
|
58
|
+
addEntry(entry: ShortTermEntry): Promise<void>;
|
|
59
|
+
get size(): number;
|
|
60
|
+
}
|
package/dist/src/bm25.js
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory System Plugin - BM25 Short-term Memory Index
|
|
3
|
+
*
|
|
4
|
+
* 短期记忆的 BM25 索引构建与搜索
|
|
5
|
+
* 纯 TypeScript 实现,不依赖外部 BM25 库
|
|
6
|
+
*/
|
|
7
|
+
import { promises as fs } from "node:fs";
|
|
8
|
+
import { logger } from "./logger.js";
|
|
9
|
+
import { join } from "node:path";
|
|
10
|
+
/**
|
|
11
|
+
* 获取本地日期字符串 (YYYY-MM-DD)
|
|
12
|
+
*/
|
|
13
|
+
function getLocalDateString() {
|
|
14
|
+
const now = new Date();
|
|
15
|
+
const year = now.getFullYear();
|
|
16
|
+
const month = String(now.getMonth() + 1).padStart(2, "0");
|
|
17
|
+
const day = String(now.getDate()).padStart(2, "0");
|
|
18
|
+
return `${year}-${month}-${day}`;
|
|
19
|
+
}
|
|
20
|
+
let jiebaInstance = null;
|
|
21
|
+
// 轻量停用词:优先过滤中文口语噪声、助词和标点
|
|
22
|
+
const STOPWORDS = new Set([
|
|
23
|
+
"的", "地", "得", "了", "着", "过",
|
|
24
|
+
"我", "你", "他", "她", "它", "我们", "你们", "他们",
|
|
25
|
+
"这", "那", "这个", "那个", "这里", "那里",
|
|
26
|
+
"是", "有", "在", "和", "与", "及", "并", "或", "而", "但",
|
|
27
|
+
"不", "没", "没有", "很", "也", "都", "就", "还", "又",
|
|
28
|
+
"吗", "呢", "啊", "呀", "吧", "哦", "嗯",
|
|
29
|
+
"什么", "怎么", "为何", "为什么", "怎样", "是否",
|
|
30
|
+
"", ",", "。", "!", "?", "、", ":", ";", "(", ")", "《", "》",
|
|
31
|
+
",", ".", "!", "?", ":", ";", "(", ")",
|
|
32
|
+
]);
|
|
33
|
+
function isMeaningfulToken(token) {
|
|
34
|
+
const t = token.trim().toLowerCase();
|
|
35
|
+
if (!t)
|
|
36
|
+
return false;
|
|
37
|
+
if (STOPWORDS.has(t))
|
|
38
|
+
return false;
|
|
39
|
+
// 过滤纯标点 / 符号
|
|
40
|
+
if (/^[\p{P}\p{S}]+$/u.test(t))
|
|
41
|
+
return false;
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
export async function initJieba() {
|
|
45
|
+
if (!jiebaInstance) {
|
|
46
|
+
const mod = await import("jieba-wasm");
|
|
47
|
+
jiebaInstance = await mod.default;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
export function tokenize(text) {
|
|
51
|
+
if (jiebaInstance) {
|
|
52
|
+
const tokens = jiebaInstance.cut(text);
|
|
53
|
+
return tokens.map((t) => t.trim()).filter(isMeaningfulToken);
|
|
54
|
+
}
|
|
55
|
+
// fallback: 英文按空格分
|
|
56
|
+
return text
|
|
57
|
+
.toLowerCase()
|
|
58
|
+
.replace(/[^\w\u4e00-\u9fff]/g, " ")
|
|
59
|
+
.split(/\s+/)
|
|
60
|
+
.map((t) => t.trim())
|
|
61
|
+
.filter(isMeaningfulToken);
|
|
62
|
+
}
|
|
63
|
+
export class BM25Index {
|
|
64
|
+
docs = [];
|
|
65
|
+
idf = new Map();
|
|
66
|
+
avgDl = 0;
|
|
67
|
+
/**
|
|
68
|
+
* 从短期记忆文件构建索引
|
|
69
|
+
* v0.5: 全量索引所有 .md 文件(不再按 maxAgeDays 截断)
|
|
70
|
+
* maxAgeDays 参数保留但仅用于向下兼容(不再生效)
|
|
71
|
+
*/
|
|
72
|
+
async buildFromDirectory(dir, _maxAgeDays = 7) {
|
|
73
|
+
await initJieba();
|
|
74
|
+
this.docs = [];
|
|
75
|
+
// 设计初心:只读取 memory/short-term/
|
|
76
|
+
const shortTermDir = join(dir, "short-term");
|
|
77
|
+
try {
|
|
78
|
+
const files = await fs.readdir(shortTermDir);
|
|
79
|
+
let mdFiles = 0;
|
|
80
|
+
for (const file of files.sort().reverse()) { // 最新的优先
|
|
81
|
+
if (!file.endsWith(".md"))
|
|
82
|
+
continue;
|
|
83
|
+
const filePath = join(shortTermDir, file);
|
|
84
|
+
const content = await fs.readFile(filePath, "utf-8");
|
|
85
|
+
const entries = this.parseEntries(content, filePath, file);
|
|
86
|
+
this.docs.push(...entries);
|
|
87
|
+
mdFiles++;
|
|
88
|
+
}
|
|
89
|
+
logger.info(`[engram:bm25] Scanned ${mdFiles} MD files (full scan, no expiry cutoff)`);
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
logger.info(`[engram:bm25] short-term directory not found at ${shortTermDir}, 0 entries`);
|
|
93
|
+
}
|
|
94
|
+
logger.info(`[engram:bm25] Built index: ${this.docs.length} entries from ${shortTermDir}`);
|
|
95
|
+
this.buildIDF();
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* 解析结构化的短期记忆文件
|
|
99
|
+
* 格式:### HH:MM [分类标签]\n摘要内容
|
|
100
|
+
*
|
|
101
|
+
* 特殊处理 [对话记录]: 按 Scott / Shadow 切分每一轮对话
|
|
102
|
+
* 支持两种格式:
|
|
103
|
+
* - 旧格式: Scott: / Shadow:
|
|
104
|
+
* - 新格式: Scott [MM-DD HH:MM]: / Shadow [MM-DD HH:MM]:
|
|
105
|
+
* 每一轮单独做一个 BM25 文档,精准召回不浪费
|
|
106
|
+
*/
|
|
107
|
+
parseEntries(content, filePath, fileName) {
|
|
108
|
+
const entries = [];
|
|
109
|
+
const lines = content.split("\n");
|
|
110
|
+
const dateMatch = fileName.match(/(\d{4}-\d{2}-\d{2})/);
|
|
111
|
+
const date = dateMatch ? dateMatch[1] : getLocalDateString();
|
|
112
|
+
let currentCategory = "随聊";
|
|
113
|
+
let currentText = "";
|
|
114
|
+
// 正则匹配 Scott 或 Shadow 开头,可选带时间戳
|
|
115
|
+
// 匹配: "Scott:" / "Shadow:" / "Scott [03-13 12:34]:" / "Shadow [03-13 12:34]:"
|
|
116
|
+
const speakerSplitRegex = /(?=^(?:Scott|Shadow)(?:\s*\[[^\]]+\])?:)/m;
|
|
117
|
+
// 精简格式检测:主题词 | 关键实体 | 摘要
|
|
118
|
+
const condensedLineRegex = /^.+\s*\|\s*.+\s*\|\s*.+$/;
|
|
119
|
+
for (const line of lines) {
|
|
120
|
+
const headerMatch = line.match(/^###\s+\d{2}:\d{2}\s+\[([^\]]+)\]/);
|
|
121
|
+
if (headerMatch) {
|
|
122
|
+
// 保存上一个分类块
|
|
123
|
+
if (currentText.trim()) {
|
|
124
|
+
this.processTextBlock(entries, currentText, currentCategory, date, filePath, speakerSplitRegex, condensedLineRegex);
|
|
125
|
+
}
|
|
126
|
+
currentCategory = headerMatch[1];
|
|
127
|
+
currentText = "";
|
|
128
|
+
}
|
|
129
|
+
else if (line.startsWith("#") || line.startsWith("---") || line.startsWith("```")) {
|
|
130
|
+
continue;
|
|
131
|
+
}
|
|
132
|
+
else {
|
|
133
|
+
currentText += line + "\n";
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
// 最后一个分类块
|
|
137
|
+
if (currentText.trim()) {
|
|
138
|
+
this.processTextBlock(entries, currentText, currentCategory, date, filePath, speakerSplitRegex, condensedLineRegex);
|
|
139
|
+
}
|
|
140
|
+
return entries;
|
|
141
|
+
}
|
|
142
|
+
/**
|
|
143
|
+
* 处理文本块,支持三种格式:
|
|
144
|
+
* 1. 精简格式(每行 | 分隔)
|
|
145
|
+
* 2. 对话记录(Scott/Shadow 切分)
|
|
146
|
+
* 3. 普通文本(整块索引)
|
|
147
|
+
*/
|
|
148
|
+
processTextBlock(entries, text, category, date, filePath, speakerSplitRegex, condensedLineRegex) {
|
|
149
|
+
const trimmed = text.trim();
|
|
150
|
+
if (!trimmed)
|
|
151
|
+
return;
|
|
152
|
+
const lines = trimmed.split("\n").filter(l => l.trim());
|
|
153
|
+
// 检测是否为精简格式(超过一半的行匹配 | 分隔格式)
|
|
154
|
+
const condensedLines = lines.filter(l => condensedLineRegex.test(l));
|
|
155
|
+
const isCondensedFormat = condensedLines.length > lines.length * 0.5;
|
|
156
|
+
if (isCondensedFormat) {
|
|
157
|
+
// 精简格式:每行单独作为一个文档,关键词密度高
|
|
158
|
+
for (const line of condensedLines) {
|
|
159
|
+
if (line.length < 10)
|
|
160
|
+
continue;
|
|
161
|
+
// 将 | 分隔的内容合并,所有关键词都参与索引
|
|
162
|
+
const parts = line.split("|").map(p => p.trim());
|
|
163
|
+
const fullText = parts.join(" "); // 合并所有部分用于分词
|
|
164
|
+
const entry = {
|
|
165
|
+
text: line, // 原始格式保留,用于展示
|
|
166
|
+
date,
|
|
167
|
+
category: "精简记录",
|
|
168
|
+
filePath,
|
|
169
|
+
};
|
|
170
|
+
entries.push({ tokens: tokenize(fullText), entry });
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
else if (category === "对话记录" || category === "对话") {
|
|
174
|
+
// 对话记录分类:按 speaker 切分每一轮
|
|
175
|
+
const turns = trimmed.split(speakerSplitRegex);
|
|
176
|
+
for (const turn of turns) {
|
|
177
|
+
const t = turn.trim();
|
|
178
|
+
if (!t || t.length < 10)
|
|
179
|
+
continue;
|
|
180
|
+
const entry = {
|
|
181
|
+
text: t,
|
|
182
|
+
date,
|
|
183
|
+
category,
|
|
184
|
+
filePath,
|
|
185
|
+
};
|
|
186
|
+
entries.push({ tokens: tokenize(entry.text), entry });
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
else {
|
|
190
|
+
// 普通分类:整个块作为一个文档
|
|
191
|
+
const entry = {
|
|
192
|
+
text: trimmed,
|
|
193
|
+
date,
|
|
194
|
+
category,
|
|
195
|
+
filePath,
|
|
196
|
+
};
|
|
197
|
+
entries.push({ tokens: tokenize(entry.text), entry });
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
/**
|
|
201
|
+
* 构建 IDF(逆文档频率)
|
|
202
|
+
*/
|
|
203
|
+
buildIDF() {
|
|
204
|
+
const df = new Map(); // 文档频率
|
|
205
|
+
let totalLen = 0;
|
|
206
|
+
for (const doc of this.docs) {
|
|
207
|
+
const unique = new Set(doc.tokens);
|
|
208
|
+
for (const t of unique) {
|
|
209
|
+
df.set(t, (df.get(t) || 0) + 1);
|
|
210
|
+
}
|
|
211
|
+
totalLen += doc.tokens.length;
|
|
212
|
+
}
|
|
213
|
+
this.avgDl = this.docs.length > 0 ? totalLen / this.docs.length : 1;
|
|
214
|
+
const N = this.docs.length;
|
|
215
|
+
for (const [term, freq] of df) {
|
|
216
|
+
// IDF = log((N - df + 0.5) / (df + 0.5) + 1)
|
|
217
|
+
this.idf.set(term, Math.log((N - freq + 0.5) / (freq + 0.5) + 1));
|
|
218
|
+
}
|
|
219
|
+
logger.info(`[engram:bm25] buildIDF: ${N} docs, ${df.size} unique terms, avgDl=${this.avgDl.toFixed(1)}`);
|
|
220
|
+
}
|
|
221
|
+
/**
|
|
222
|
+
* BM25 搜索
|
|
223
|
+
* @returns 带分数的结果列表
|
|
224
|
+
*/
|
|
225
|
+
async search(query, topK = 3) {
|
|
226
|
+
await initJieba();
|
|
227
|
+
const queryTokens = tokenize(query);
|
|
228
|
+
const k1 = 1.5; // 词频饱和参数
|
|
229
|
+
const b = 0.75; // 文档长度归一化参数
|
|
230
|
+
logger.info(`[engram:bm25] search: query="${query.slice(0, 80)}" tokens=[${queryTokens.join(", ")}] topK=${topK} docs=${this.docs.length} avgDl=${this.avgDl.toFixed(1)}`);
|
|
231
|
+
const scores = [];
|
|
232
|
+
for (const doc of this.docs) {
|
|
233
|
+
let score = 0;
|
|
234
|
+
for (const token of queryTokens) {
|
|
235
|
+
const idf = this.idf.get(token) || 0;
|
|
236
|
+
if (idf === 0)
|
|
237
|
+
continue;
|
|
238
|
+
// 词频
|
|
239
|
+
const tf = doc.tokens.filter(t => t === token).length;
|
|
240
|
+
const dl = doc.tokens.length;
|
|
241
|
+
// BM25 分数
|
|
242
|
+
const tfComponent = (tf * (k1 + 1)) / (tf + k1 * (1 - b + b * (dl / this.avgDl)));
|
|
243
|
+
score += idf * tfComponent;
|
|
244
|
+
}
|
|
245
|
+
if (score > 0) {
|
|
246
|
+
scores.push({ entry: doc.entry, score });
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
scores.sort((a, b) => b.score - a.score);
|
|
250
|
+
const result = scores.slice(0, topK);
|
|
251
|
+
logger.info(`[engram:bm25] search: ${scores.length} docs matched, returning top ${result.length}`);
|
|
252
|
+
return result;
|
|
253
|
+
}
|
|
254
|
+
/**
|
|
255
|
+
* 添加单条记录(用于实时写入后的即时索引)
|
|
256
|
+
*/
|
|
257
|
+
async addEntry(entry) {
|
|
258
|
+
await initJieba();
|
|
259
|
+
const tokens = tokenize(entry.text);
|
|
260
|
+
this.docs.push({
|
|
261
|
+
tokens,
|
|
262
|
+
entry,
|
|
263
|
+
});
|
|
264
|
+
logger.info(`[engram:bm25] addEntry: "${entry.text.slice(0, 60)}" → ${tokens.length} tokens, total docs=${this.docs.length}`);
|
|
265
|
+
this.buildIDF();
|
|
266
|
+
}
|
|
267
|
+
get size() {
|
|
268
|
+
return this.docs.length;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
//# sourceMappingURL=bm25.js.map
|