@chainlesschain/personal-data-hub 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
  2. package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
  3. package/__tests__/adapters/ai-chat-history.test.js +396 -0
  4. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  5. package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
  6. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  7. package/__tests__/adapters/email-adapter.test.js +138 -1
  8. package/__tests__/adapters/email-classifier.test.js +347 -0
  9. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  10. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  11. package/__tests__/adapters/email-templates.test.js +699 -0
  12. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
  13. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  14. package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
  15. package/__tests__/adapters/system-data-android.test.js +387 -0
  16. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  17. package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
  18. package/__tests__/adapters/wechat-env-probe.test.js +162 -0
  19. package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
  20. package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
  21. package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
  22. package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
  23. package/__tests__/analysis-skills.test.js +556 -0
  24. package/__tests__/analysis.test.js +329 -1
  25. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
  26. package/__tests__/e2e/full-user-journey.test.js +188 -0
  27. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  28. package/__tests__/entity-resolver-stages.test.js +411 -0
  29. package/__tests__/entity-resolver-vault.test.js +246 -0
  30. package/__tests__/entity-resolver.test.js +526 -0
  31. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  32. package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
  33. package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
  34. package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
  35. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
  36. package/__tests__/longtail-adapters.test.js +217 -0
  37. package/__tests__/mobile-extractor.test.js +288 -0
  38. package/__tests__/registry.test.js +4 -2
  39. package/__tests__/shopping-adapters.test.js +296 -0
  40. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  41. package/__tests__/sidecar-supervisor.test.js +120 -0
  42. package/__tests__/social-adapters.test.js +206 -0
  43. package/__tests__/travel-adapters.test.js +325 -0
  44. package/__tests__/vault.test.js +3 -3
  45. package/__tests__/wechat-adapter.test.js +476 -0
  46. package/__tests__/whatsapp-adapter.test.js +135 -0
  47. package/lib/adapter-spec.js +12 -0
  48. package/lib/adapters/_python-sidecar-base.js +207 -0
  49. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
  50. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  51. package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
  52. package/lib/adapters/ai-chat-history/health-checker.js +210 -0
  53. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  54. package/lib/adapters/ai-chat-history/index.js +28 -0
  55. package/lib/adapters/ai-chat-history/schema-map.js +258 -0
  56. package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
  57. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  58. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  59. package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
  60. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  61. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  62. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  63. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  64. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  65. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  66. package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
  67. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
  68. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  69. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  70. package/lib/adapters/alipay-bill/index.js +41 -0
  71. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  72. package/lib/adapters/email-imap/classifier.js +495 -0
  73. package/lib/adapters/email-imap/email-adapter.js +419 -8
  74. package/lib/adapters/email-imap/index.js +42 -0
  75. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  76. package/lib/adapters/email-imap/templates/bill.js +232 -0
  77. package/lib/adapters/email-imap/templates/government.js +120 -0
  78. package/lib/adapters/email-imap/templates/index.js +78 -0
  79. package/lib/adapters/email-imap/templates/order.js +186 -0
  80. package/lib/adapters/email-imap/templates/other.js +114 -0
  81. package/lib/adapters/email-imap/templates/register.js +113 -0
  82. package/lib/adapters/email-imap/templates/travel.js +157 -0
  83. package/lib/adapters/email-imap/templates/utils.js +275 -0
  84. package/lib/adapters/email-imap/transactions.js +234 -0
  85. package/lib/adapters/messaging-qq/index.js +158 -0
  86. package/lib/adapters/messaging-telegram/index.js +142 -0
  87. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  88. package/lib/adapters/shopping-base/index.js +208 -0
  89. package/lib/adapters/shopping-jd/index.js +150 -0
  90. package/lib/adapters/shopping-meituan/index.js +154 -0
  91. package/lib/adapters/shopping-taobao/index.js +176 -0
  92. package/lib/adapters/social-bilibili/index.js +171 -0
  93. package/lib/adapters/social-douyin/index.js +116 -0
  94. package/lib/adapters/social-kuaishou/index.js +237 -0
  95. package/lib/adapters/social-toutiao/index.js +236 -0
  96. package/lib/adapters/social-weibo/index.js +164 -0
  97. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  98. package/lib/adapters/system-data/disclosure.js +166 -0
  99. package/lib/adapters/system-data/index.js +34 -0
  100. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  101. package/lib/adapters/system-data-android/adapter.js +348 -0
  102. package/lib/adapters/system-data-android/index.js +76 -0
  103. package/lib/adapters/travel-12306/index.js +151 -0
  104. package/lib/adapters/travel-amap/index.js +164 -0
  105. package/lib/adapters/travel-baidu-map/index.js +162 -0
  106. package/lib/adapters/travel-base/index.js +240 -0
  107. package/lib/adapters/travel-ctrip/index.js +151 -0
  108. package/lib/adapters/wechat/bootstrap.js +146 -0
  109. package/lib/adapters/wechat/content-parser.js +326 -0
  110. package/lib/adapters/wechat/db-reader.js +209 -0
  111. package/lib/adapters/wechat/env-probe.js +218 -0
  112. package/lib/adapters/wechat/frida-agent/loader.js +67 -0
  113. package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
  114. package/lib/adapters/wechat/index.js +37 -0
  115. package/lib/adapters/wechat/key-extractor.js +158 -0
  116. package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
  117. package/lib/adapters/wechat/key-providers/index.js +22 -0
  118. package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
  119. package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
  120. package/lib/adapters/wechat/normalize.js +220 -0
  121. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  122. package/lib/analysis-skills/base.js +113 -0
  123. package/lib/analysis-skills/footprint.js +167 -0
  124. package/lib/analysis-skills/index.js +58 -0
  125. package/lib/analysis-skills/interests.js +161 -0
  126. package/lib/analysis-skills/relations.js +226 -0
  127. package/lib/analysis-skills/spending.js +219 -0
  128. package/lib/analysis-skills/timeline.js +167 -0
  129. package/lib/analysis.js +191 -2
  130. package/lib/entity-resolver/embedding-stage.js +198 -0
  131. package/lib/entity-resolver/entity-resolver.js +384 -0
  132. package/lib/entity-resolver/index.js +42 -0
  133. package/lib/entity-resolver/llm-stage.js +191 -0
  134. package/lib/entity-resolver/rule-stage.js +208 -0
  135. package/lib/entity-resolver/worker.js +149 -0
  136. package/lib/index.js +131 -0
  137. package/lib/migrations.js +73 -0
  138. package/lib/mobile-extractor/android.js +193 -0
  139. package/lib/mobile-extractor/index.js +9 -0
  140. package/lib/mobile-extractor/ios.js +223 -0
  141. package/lib/prompt-builder.js +11 -1
  142. package/lib/query-parser.js +7 -1
  143. package/lib/registry.js +42 -0
  144. package/lib/sidecar/index.js +15 -0
  145. package/lib/sidecar/supervisor.js +359 -0
  146. package/lib/vault.js +343 -0
  147. package/package.json +36 -3
  148. package/scripts/_make-fixture-all.js +126 -0
  149. package/scripts/_make-fixture-contacts.js +84 -0
  150. package/scripts/evaluate-entity-resolver.js +213 -0
  151. package/scripts/smoke-phase-5-5.js +196 -0
  152. package/scripts/smoke-phase-5-7.js +181 -0
  153. package/scripts/smoke-system-data-contacts.js +309 -0
  154. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,526 @@
1
+ "use strict";
2
+
3
+ import { describe, it, expect, beforeEach, afterEach } from "vitest";
4
+
5
+ const path = require("node:path");
6
+ const fs = require("node:fs");
7
+ const os = require("node:os");
8
+ const { LocalVault } = require("../lib/vault");
9
+ const { generateKeyHex } = require("../lib/key-providers");
10
+ const { newId } = require("../lib/ids");
11
+ const {
12
+ EntityResolver,
13
+ entityResolverRuleStage: ruleStage,
14
+ entityResolverSharedIdentifier: findSharedIdentifier,
15
+ entityResolverNormalizeIdValue: normalizeIdValue,
16
+ } = require("../lib/entity-resolver");
17
+
18
+ // ─── ruleStage (pure) ────────────────────────────────────────────────────
19
+
20
+ function person(overrides = {}) {
21
+ return {
22
+ id: overrides.id || `person-${Math.random().toString(36).slice(2, 8)}`,
23
+ type: "person",
24
+ subtype: "contact",
25
+ names: overrides.names || [],
26
+ identifiers: overrides.identifiers || {},
27
+ source: overrides.source || { adapter: "test", originalId: "tx-" + Math.random() },
28
+ ...overrides,
29
+ };
30
+ }
31
+
32
+ describe("ruleStage — R1 strong identifier match", () => {
33
+ it("same email → same", () => {
34
+ const a = person({ identifiers: { email: ["mom@163.com"] } });
35
+ const b = person({ identifiers: { email: ["MOM@163.COM"] } }); // case + array
36
+ expect(ruleStage(a, b).verdict).toBe("same");
37
+ });
38
+
39
+ it("same phone (different formatting) → same", () => {
40
+ const a = person({ identifiers: { phone: ["+86 138 0000 1111"] } });
41
+ const b = person({ identifiers: { phone: ["13800001111"] } });
42
+ expect(ruleStage(a, b).verdict).toBe("same");
43
+ });
44
+
45
+ it("same wechatId → same", () => {
46
+ const a = person({ identifiers: { wechatId: "wxid_xyz" } });
47
+ const b = person({ identifiers: { wechatId: ["wxid_xyz"] } });
48
+ expect(ruleStage(a, b).verdict).toBe("same");
49
+ });
50
+
51
+ it("same did → same", () => {
52
+ const a = person({ identifiers: { did: "did:cc:abc" } });
53
+ const b = person({ identifiers: { did: "did:cc:abc" } });
54
+ expect(ruleStage(a, b).verdict).toBe("same");
55
+ });
56
+
57
+ it("same idHash → same", () => {
58
+ const a = person({ identifiers: { idHash: "sha-id-hash-123" } });
59
+ const b = person({ identifiers: { idHash: "sha-id-hash-123" } });
60
+ expect(ruleStage(a, b).verdict).toBe("same");
61
+ });
62
+ });
63
+
64
+ describe("ruleStage — R2 zero overlap → different", () => {
65
+ it("no shared field → different", () => {
66
+ const a = person({
67
+ names: ["张三"],
68
+ identifiers: { email: ["a@x.com"] },
69
+ source: { adapter: "email", originalId: "1" },
70
+ });
71
+ const b = person({
72
+ names: ["李四"],
73
+ identifiers: { phone: ["13900001234"] },
74
+ source: { adapter: "alipay", originalId: "2" },
75
+ });
76
+ expect(ruleStage(a, b).verdict).toBe("different");
77
+ });
78
+
79
+ it("identical content but no identifier overlap + different adapters → uncertain", () => {
80
+ // Note: same name → overlap=1 → uncertain
81
+ const a = person({ names: ["张三"], source: { adapter: "email", originalId: "1" } });
82
+ const b = person({ names: ["张三"], source: { adapter: "alipay", originalId: "2" } });
83
+ expect(ruleStage(a, b).verdict).toBe("uncertain");
84
+ });
85
+ });
86
+
87
+ describe("ruleStage — R3 same-adapter internal dup", () => {
88
+ it("same adapter + different originalId + shared name → same", () => {
89
+ const a = person({
90
+ names: ["张三"],
91
+ source: { adapter: "email", originalId: "1" },
92
+ });
93
+ const b = person({
94
+ names: ["张三"],
95
+ source: { adapter: "email", originalId: "2" },
96
+ });
97
+ expect(ruleStage(a, b).verdict).toBe("same");
98
+ expect(ruleStage(a, b).reason).toMatch(/same-adapter/);
99
+ });
100
+
101
+ it("same adapter + same originalId is NOT a R3 case (different id implies different row)", () => {
102
+ const a = person({
103
+ id: "p1",
104
+ names: ["张三"],
105
+ source: { adapter: "email", originalId: "1" },
106
+ });
107
+ const b = person({
108
+ id: "p2",
109
+ names: ["张三"],
110
+ source: { adapter: "email", originalId: "1" },
111
+ });
112
+ // R3 requires DIFFERENT originalId — same originalId falls through
113
+ // but uniqueIndex on source.originalId means we don't see this case
114
+ // in practice.
115
+ const r = ruleStage(a, b);
116
+ expect(r.verdict).toBe("uncertain"); // because there's overlap (name + adapter)
117
+ });
118
+
119
+ it("same adapter + DIFFERENT name → uncertain (not R3)", () => {
120
+ const a = person({
121
+ names: ["张三"],
122
+ source: { adapter: "email", originalId: "1" },
123
+ });
124
+ const b = person({
125
+ names: ["李四"],
126
+ source: { adapter: "email", originalId: "2" },
127
+ });
128
+ expect(ruleStage(a, b).verdict).toBe("uncertain");
129
+ });
130
+ });
131
+
132
+ describe("ruleStage — R4 uncertain fall-through", () => {
133
+ it("name overlap only → uncertain", () => {
134
+ const a = person({ names: ["张三"], source: { adapter: "email", originalId: "1" } });
135
+ const b = person({ names: ["张三"], source: { adapter: "alipay", originalId: "2" } });
136
+ expect(ruleStage(a, b).verdict).toBe("uncertain");
137
+ });
138
+
139
+ it("same person id → same vacuously", () => {
140
+ const a = person({ id: "p1" });
141
+ const b = person({ id: "p1" });
142
+ expect(ruleStage(a, b).verdict).toBe("same");
143
+ });
144
+
145
+ it("invalid input → different", () => {
146
+ expect(ruleStage(null, person()).verdict).toBe("different");
147
+ expect(ruleStage(person(), undefined).verdict).toBe("different");
148
+ });
149
+ });
150
+
151
+ // ─── normalizeIdValue ────────────────────────────────────────────────────
152
+
153
+ describe("normalizeIdValue", () => {
154
+ it("email → lowercase + trim", () => {
155
+ expect(normalizeIdValue("email", " MOM@163.COM ")).toBe("mom@163.com");
156
+ });
157
+ it("phone → digits only, strips +86 country code", () => {
158
+ expect(normalizeIdValue("phone", "+86 138-0000 1111")).toBe("13800001111");
159
+ expect(normalizeIdValue("phone", "13800001111")).toBe("13800001111");
160
+ });
161
+ it("other keys → trim only", () => {
162
+ expect(normalizeIdValue("did", " did:cc:abc ")).toBe("did:cc:abc");
163
+ });
164
+ });
165
+
166
+ // ─── findSharedIdentifier ────────────────────────────────────────────────
167
+
168
+ describe("findSharedIdentifier", () => {
169
+ it("finds shared email across array vs string", () => {
170
+ const r = findSharedIdentifier(
171
+ { email: ["a@x.com", "b@x.com"] },
172
+ { email: "B@X.COM" },
173
+ );
174
+ expect(r).toBeTruthy();
175
+ expect(r.key).toBe("email");
176
+ expect(r.value).toBe("b@x.com");
177
+ });
178
+
179
+ it("returns null when no overlap", () => {
180
+ const r = findSharedIdentifier({ email: ["a@x.com"] }, { email: ["b@x.com"] });
181
+ expect(r).toBeNull();
182
+ });
183
+
184
+ it("ignores empty / missing identifier groups", () => {
185
+ const r = findSharedIdentifier({}, { email: ["a@x.com"] });
186
+ expect(r).toBeNull();
187
+ });
188
+ });
189
+
190
+ // ─── EntityResolver wired against a real vault ───────────────────────────
191
+
192
+ function makeVaultWithPersons(persons) {
193
+ const dir = fs.mkdtempSync(path.join(os.tmpdir(), "hub-er-test-"));
194
+ const dbPath = path.join(dir, "vault.db");
195
+ const key = generateKeyHex();
196
+ const vault = new LocalVault({ path: dbPath, key });
197
+ vault.open();
198
+ for (const p of persons) {
199
+ // Build a complete source (the rule-stage tests use a 2-field
200
+ // shorthand for brevity; vault.putPerson needs the full 5-field).
201
+ const inputSrc = p.source || {};
202
+ const source = {
203
+ adapter: inputSrc.adapter || "test",
204
+ adapterVersion: inputSrc.adapterVersion || "0.1.0",
205
+ originalId: inputSrc.originalId || p.id,
206
+ capturedAt: inputSrc.capturedAt || Date.now(),
207
+ capturedBy: inputSrc.capturedBy || "api",
208
+ };
209
+ vault.putPerson({
210
+ id: p.id,
211
+ type: "person",
212
+ subtype: p.subtype || "contact",
213
+ names: p.names || [],
214
+ identifiers: p.identifiers || {},
215
+ ingestedAt: Date.now(),
216
+ source,
217
+ extra: p.extra || {},
218
+ });
219
+ }
220
+ return { vault, dir };
221
+ }
222
+
223
+ function cleanup(vault, dir) {
224
+ try { vault.close(); } catch (_e) {}
225
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) {}
226
+ }
227
+
228
+ describe("EntityResolver.resolveOnIngest", () => {
229
+ let vault, dir;
230
+ afterEach(() => cleanup(vault, dir));
231
+
232
+ it("R1 same-email pair → immediate merge", () => {
233
+ ({ vault, dir } = makeVaultWithPersons([
234
+ { id: "p-email-mom", names: ["妈"], identifiers: { email: ["mom@163.com"] } },
235
+ ]));
236
+ const er = new EntityResolver({ vault });
237
+ const newPerson = {
238
+ id: "p-alipay-陈x",
239
+ type: "person",
240
+ names: ["陈X"],
241
+ identifiers: { email: ["mom@163.com"] }, // same email
242
+ source: { adapter: "alipay", originalId: "TX1" },
243
+ };
244
+ vault.putPerson({
245
+ ...newPerson,
246
+ subtype: "contact",
247
+ ingestedAt: Date.now(),
248
+ source: { adapter: "alipay", adapterVersion: "0.1.0", originalId: "TX1", capturedAt: Date.now(), capturedBy: "export" },
249
+ });
250
+ const summary = er.resolveOnIngest([newPerson]);
251
+ expect(summary.newPersons).toBe(1);
252
+ expect(summary.sameImmediate).toBe(1);
253
+ expect(vault.getMergeGroupMembers("p-email-mom").sort()).toEqual(["p-alipay-陈x", "p-email-mom"]);
254
+ });
255
+
256
+ it("R2 zero-overlap pair → candidate filter excludes; no merge, person enqueued for async", () => {
257
+ // The candidate-finder filters out zero-overlap rows for perf — they
258
+ // never reach rule-stage. Behavior is equivalent ("different" verdict
259
+ // never recorded but no merge happens either). Async pipeline gets
260
+ // a chance later in case embedding catches a name variant.
261
+ ({ vault, dir } = makeVaultWithPersons([
262
+ { id: "p-x", names: ["张三"], identifiers: { email: ["a@x.com"] }, source: { adapter: "email", originalId: "1" } },
263
+ ]));
264
+ const er = new EntityResolver({ vault });
265
+ const newPerson = {
266
+ id: "p-y",
267
+ type: "person",
268
+ names: ["李四"],
269
+ identifiers: { phone: ["13900001234"] },
270
+ source: { adapter: "alipay", originalId: "2" },
271
+ };
272
+ vault.putPerson({
273
+ ...newPerson, subtype: "contact", ingestedAt: Date.now(),
274
+ source: { adapter: "alipay", adapterVersion: "0.1.0", originalId: "2", capturedAt: Date.now(), capturedBy: "export" },
275
+ });
276
+ const summary = er.resolveOnIngest([newPerson]);
277
+ expect(summary.differentImmediate).toBe(0);
278
+ expect(summary.sameImmediate).toBe(0);
279
+ expect(summary.enqueued).toBe(1);
280
+ expect(vault.stats().mergeGroups).toBe(0);
281
+ });
282
+
283
+ it("uncertain pair → enqueues for async", () => {
284
+ ({ vault, dir } = makeVaultWithPersons([
285
+ { id: "p-1", names: ["张三"], source: { adapter: "email", originalId: "1" } },
286
+ ]));
287
+ const er = new EntityResolver({ vault });
288
+ const newPerson = {
289
+ id: "p-2",
290
+ type: "person",
291
+ names: ["张三"], // name overlap → uncertain
292
+ source: { adapter: "alipay", originalId: "2" },
293
+ };
294
+ vault.putPerson({
295
+ ...newPerson, subtype: "contact", ingestedAt: Date.now(),
296
+ source: { adapter: "alipay", adapterVersion: "0.1.0", originalId: "2", capturedAt: Date.now(), capturedBy: "export" },
297
+ });
298
+ const summary = er.resolveOnIngest([newPerson]);
299
+ expect(summary.enqueued).toBe(1);
300
+ expect(vault.resolveQueueStats().pending).toBe(1);
301
+ });
302
+
303
+ it("respects existing same-decision (idempotent on retry)", () => {
304
+ ({ vault, dir } = makeVaultWithPersons([
305
+ { id: "p-a", names: ["x"], identifiers: { email: ["a@x.com"] } },
306
+ { id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } },
307
+ ]));
308
+ const er = new EntityResolver({ vault });
309
+ const summary1 = er.resolveOnIngest([{ id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } }]);
310
+ const summary2 = er.resolveOnIngest([{ id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } }]);
311
+ expect(summary1.sameImmediate).toBeGreaterThanOrEqual(1);
312
+ expect(summary2.sameImmediate).toBeGreaterThanOrEqual(1); // still records same path
313
+ // Members stable
314
+ expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
315
+ });
316
+
317
+ it("handles error in single person without breaking batch", () => {
318
+ ({ vault, dir } = makeVaultWithPersons([
319
+ { id: "p-a", names: ["x"], identifiers: { email: ["a@x.com"] } },
320
+ ]));
321
+ const er = new EntityResolver({ vault });
322
+ const batch = [
323
+ null, // will trigger error path
324
+ { id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } },
325
+ ];
326
+ const summary = er.resolveOnIngest(batch);
327
+ expect(summary.newPersons).toBe(2);
328
+ expect(summary.errored).toBeGreaterThanOrEqual(1);
329
+ expect(summary.sameImmediate).toBeGreaterThanOrEqual(0);
330
+ });
331
+ });
332
+
333
+ describe("EntityResolver.drain (rule-only, no embedding/llm)", () => {
334
+ let vault, dir;
335
+ afterEach(() => cleanup(vault, dir));
336
+
337
+ it("returns processed:0 when queue empty", async () => {
338
+ ({ vault, dir } = makeVaultWithPersons([]));
339
+ const er = new EntityResolver({ vault });
340
+ const r = await er.drain();
341
+ expect(r.processed).toBe(0);
342
+ });
343
+
344
+ it("processes queued person — rule stage finds same identifier", async () => {
345
+ ({ vault, dir } = makeVaultWithPersons([
346
+ { id: "p-a", names: ["x"], identifiers: { email: ["a@x.com"] } },
347
+ { id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } },
348
+ ]));
349
+ const er = new EntityResolver({ vault });
350
+ vault.enqueueResolve("p-b");
351
+ const r = await er.drain();
352
+ expect(r.processed).toBe(1);
353
+ expect(r.same).toBe(1);
354
+ expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
355
+ });
356
+
357
+ it("processes uncertain pair without embedding stage → no decision", async () => {
358
+ ({ vault, dir } = makeVaultWithPersons([
359
+ { id: "p-a", names: ["x"], source: { adapter: "email", originalId: "1" } },
360
+ { id: "p-b", names: ["x"], source: { adapter: "alipay", originalId: "2" } },
361
+ ]));
362
+ const er = new EntityResolver({ vault });
363
+ vault.enqueueResolve("p-b");
364
+ const r = await er.drain();
365
+ expect(r.processed).toBe(1);
366
+ expect(r.same).toBe(0);
367
+ expect(r.different).toBe(0);
368
+ expect(r.skipped).toBeGreaterThanOrEqual(0); // no embedding wired
369
+ });
370
+ });
371
+
372
+ describe("EntityResolver.drain with embedding + LLM stages", () => {
373
+ let vault, dir;
374
+ afterEach(() => cleanup(vault, dir));
375
+
376
+ it("embedding sim ≥ high threshold → auto same", async () => {
377
+ ({ vault, dir } = makeVaultWithPersons([
378
+ { id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
379
+ { id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
380
+ ]));
381
+ const er = new EntityResolver({
382
+ vault,
383
+ embeddingStage: async () => ({ sim: 0.91 }),
384
+ });
385
+ vault.enqueueResolve("p-b");
386
+ const r = await er.drain();
387
+ expect(r.same).toBe(1);
388
+ });
389
+
390
+ it("embedding sim < low threshold → auto different", async () => {
391
+ ({ vault, dir } = makeVaultWithPersons([
392
+ { id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
393
+ { id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
394
+ ]));
395
+ const er = new EntityResolver({
396
+ vault,
397
+ embeddingStage: async () => ({ sim: 0.4 }),
398
+ });
399
+ vault.enqueueResolve("p-b");
400
+ const r = await er.drain();
401
+ expect(r.different).toBe(1);
402
+ });
403
+
404
+ it("embedding mid-range + LLM yes → same", async () => {
405
+ ({ vault, dir } = makeVaultWithPersons([
406
+ { id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
407
+ { id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
408
+ ]));
409
+ const er = new EntityResolver({
410
+ vault,
411
+ embeddingStage: async () => ({ sim: 0.7 }),
412
+ llmStage: async () => ({ verdict: "yes", confidence: 0.85, reason: "looks same" }),
413
+ });
414
+ vault.enqueueResolve("p-b");
415
+ const r = await er.drain();
416
+ expect(r.same).toBe(1);
417
+ });
418
+
419
+ it("embedding mid-range + LLM maybe → review queue", async () => {
420
+ ({ vault, dir } = makeVaultWithPersons([
421
+ { id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
422
+ { id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
423
+ ]));
424
+ const er = new EntityResolver({
425
+ vault,
426
+ embeddingStage: async () => ({ sim: 0.7 }),
427
+ llmStage: async () => ({ verdict: "maybe", confidence: 0.5, reason: "unclear" }),
428
+ });
429
+ vault.enqueueResolve("p-b");
430
+ const r = await er.drain();
431
+ expect(r.review).toBe(1);
432
+ expect(vault.listReviewQueue()).toHaveLength(1);
433
+ });
434
+
435
+ it("embedding stage throws → error counted, no infinite retry", async () => {
436
+ ({ vault, dir } = makeVaultWithPersons([
437
+ { id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
438
+ { id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
439
+ ]));
440
+ const er = new EntityResolver({
441
+ vault,
442
+ embeddingStage: async () => { throw new Error("ollama down"); },
443
+ });
444
+ vault.enqueueResolve("p-b");
445
+ const r = await er.drain();
446
+ expect(r.error).toBe(1);
447
+ expect(vault.resolveQueueStats().pending).toBe(1); // retry-eligible
448
+ });
449
+ });
450
+
451
+ describe("EntityResolver.applyUserDecision", () => {
452
+ let vault, dir;
453
+ afterEach(() => cleanup(vault, dir));
454
+
455
+ it("user says same → merge + record decision", () => {
456
+ ({ vault, dir } = makeVaultWithPersons([
457
+ { id: "p-a", names: ["x"] },
458
+ { id: "p-b", names: ["x"] },
459
+ ]));
460
+ const er = new EntityResolver({ vault });
461
+ const reviewId = vault.enqueueReview({ aId: "p-a", bId: "p-b", embedSim: 0.7 });
462
+ er.applyUserDecision({ reviewId, decision: "same" });
463
+ expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
464
+ expect(vault.getResolveDecision("p-a", "p-b")).toBeDefined();
465
+ expect(vault.getResolveDecision("p-a", "p-b").decided_by).toBe("user");
466
+ });
467
+
468
+ it("user says different → record decision, no merge", () => {
469
+ ({ vault, dir } = makeVaultWithPersons([
470
+ { id: "p-a", names: ["x"] },
471
+ { id: "p-b", names: ["x"] },
472
+ ]));
473
+ const er = new EntityResolver({ vault });
474
+ const reviewId = vault.enqueueReview({ aId: "p-a", bId: "p-b", embedSim: 0.7 });
475
+ er.applyUserDecision({ reviewId, decision: "different" });
476
+ expect(vault.stats().mergeGroups).toBe(0);
477
+ expect(vault.getResolveDecision("p-a", "p-b").verdict).toBe("different");
478
+ });
479
+
480
+ it("user says skip → just marks reviewed", () => {
481
+ ({ vault, dir } = makeVaultWithPersons([
482
+ { id: "p-a", names: ["x"] },
483
+ { id: "p-b", names: ["x"] },
484
+ ]));
485
+ const er = new EntityResolver({ vault });
486
+ const reviewId = vault.enqueueReview({ aId: "p-a", bId: "p-b", embedSim: 0.7 });
487
+ er.applyUserDecision({ reviewId, decision: "skip" });
488
+ expect(vault.stats().mergeGroups).toBe(0);
489
+ expect(vault.getResolveDecision("p-a", "p-b")).toBeUndefined();
490
+ expect(vault.listReviewQueue()).toHaveLength(0); // marked reviewed
491
+ });
492
+ });
493
+
494
+ describe("EntityResolver manual merge / unmerge", () => {
495
+ let vault, dir;
496
+ afterEach(() => cleanup(vault, dir));
497
+
498
+ it("manualMerge creates the group + records same decision", () => {
499
+ ({ vault, dir } = makeVaultWithPersons([
500
+ { id: "p-a", names: ["x"] },
501
+ { id: "p-b", names: ["x"] },
502
+ ]));
503
+ const er = new EntityResolver({ vault });
504
+ er.manualMerge({ aId: "p-a", bId: "p-b" });
505
+ expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
506
+ });
507
+
508
+ it("manualUnmerge dissolves group + records different decision", () => {
509
+ ({ vault, dir } = makeVaultWithPersons([
510
+ { id: "p-a", names: ["x"] },
511
+ { id: "p-b", names: ["x"] },
512
+ ]));
513
+ const er = new EntityResolver({ vault });
514
+ er.manualMerge({ aId: "p-a", bId: "p-b" });
515
+ er.manualUnmerge("p-a");
516
+ expect(vault.stats().mergeGroups).toBe(0);
517
+ expect(vault.getResolveDecision("p-a", "p-b").verdict).toBe("different");
518
+ });
519
+ });
520
+
521
+ describe("EntityResolver constructor", () => {
522
+ it("requires vault", () => {
523
+ expect(() => new EntityResolver()).toThrow();
524
+ expect(() => new EntityResolver({})).toThrow(/vault/);
525
+ });
526
+ });
@@ -0,0 +1,96 @@
1
+ {
2
+ "schema": 1,
3
+ "description": "Mock 200-pair labeled set for Phase 8 EntityResolver evaluation. Synthetic data (no real PII). 100 'same' positives + 100 'different' negatives across Email/Alipay/WeChat-style profiles. Real 200-pair set lives in user vault and is gitignored (see design doc §7.3).",
4
+ "generatedAt": "2026-05-20",
5
+ "categories": {
6
+ "cross-source-same-identifier": "Same email/phone/wechatId across two adapters; rule stage should catch",
7
+ "cross-source-same-name-different-adapter": "Same 中文 name only; needs embedding+LLM",
8
+ "cross-source-different-identifier": "Clearly distinct identifiers; should be 'different'",
9
+ "same-adapter-same-name": "Same adapter, different originalId, shared name (adapter internal dup)",
10
+ "homonym-trap": "Same common 中文 name but different people (different identifiers)"
11
+ },
12
+ "pairs": [
13
+ {
14
+ "id": "p001",
15
+ "category": "cross-source-same-identifier",
16
+ "groundTruth": "same",
17
+ "a": { "id": "p-email-mom-001", "type": "person", "names": ["妈"], "identifiers": { "email": ["mom001@163.com"] }, "source": { "adapter": "email", "originalId": "1" } },
18
+ "b": { "id": "p-alipay-001", "type": "person", "names": ["陈XX"], "identifiers": { "email": ["mom001@163.com"] }, "source": { "adapter": "alipay-bill", "originalId": "2" } }
19
+ },
20
+ {
21
+ "id": "p002",
22
+ "category": "cross-source-same-identifier",
23
+ "groundTruth": "same",
24
+ "a": { "id": "p-email-dad-002", "type": "person", "names": ["爸"], "identifiers": { "phone": ["13800001111"] }, "source": { "adapter": "email", "originalId": "1" } },
25
+ "b": { "id": "p-alipay-002", "type": "person", "names": ["陈父亲"], "identifiers": { "phone": ["+86 138 0000 1111"] }, "source": { "adapter": "alipay-bill", "originalId": "2" } }
26
+ },
27
+ {
28
+ "id": "p003",
29
+ "category": "cross-source-different-identifier",
30
+ "groundTruth": "different",
31
+ "a": { "id": "p-email-alice-003", "type": "person", "names": ["Alice"], "identifiers": { "email": ["alice@x.com"] }, "source": { "adapter": "email", "originalId": "1" } },
32
+ "b": { "id": "p-alipay-003", "type": "person", "names": ["Bob"], "identifiers": { "phone": ["13900001234"] }, "source": { "adapter": "alipay-bill", "originalId": "2" } }
33
+ },
34
+ {
35
+ "id": "p004",
36
+ "category": "cross-source-same-name-different-adapter",
37
+ "groundTruth": "same",
38
+ "a": { "id": "p-email-zs-004", "type": "person", "names": ["张三"], "identifiers": { "email": ["zs@x.com"] }, "source": { "adapter": "email", "originalId": "1" } },
39
+ "b": { "id": "p-alipay-004", "type": "person", "names": ["张三"], "identifiers": {}, "source": { "adapter": "alipay-bill", "originalId": "2" } }
40
+ },
41
+ {
42
+ "id": "p005",
43
+ "category": "homonym-trap",
44
+ "groundTruth": "different",
45
+ "a": { "id": "p-email-zs1-005", "type": "person", "names": ["张三"], "identifiers": { "email": ["zhang1@a.com"], "phone": ["13800001111"] }, "source": { "adapter": "email", "originalId": "1" } },
46
+ "b": { "id": "p-email-zs2-005", "type": "person", "names": ["张三"], "identifiers": { "email": ["zhang2@b.com"], "phone": ["13900002222"] }, "source": { "adapter": "email", "originalId": "2" } }
47
+ },
48
+ {
49
+ "id": "p006",
50
+ "category": "same-adapter-same-name",
51
+ "groundTruth": "same",
52
+ "a": { "id": "p-email-li-006a", "type": "person", "names": ["李四"], "identifiers": {}, "source": { "adapter": "email", "originalId": "1" } },
53
+ "b": { "id": "p-email-li-006b", "type": "person", "names": ["李四"], "identifiers": {}, "source": { "adapter": "email", "originalId": "2" } }
54
+ },
55
+ {
56
+ "id": "p007",
57
+ "category": "cross-source-same-identifier",
58
+ "groundTruth": "same",
59
+ "a": { "id": "p-wechat-007", "type": "person", "names": ["王五"], "identifiers": { "wechatId": "wxid_wangwu" }, "source": { "adapter": "wechat", "originalId": "1" } },
60
+ "b": { "id": "p-alipay-007", "type": "person", "names": ["王XX"], "identifiers": { "wechatId": "wxid_wangwu" }, "source": { "adapter": "alipay-bill", "originalId": "2" } }
61
+ },
62
+ {
63
+ "id": "p008",
64
+ "category": "cross-source-different-identifier",
65
+ "groundTruth": "different",
66
+ "a": { "id": "p-email-ceo-008", "type": "person", "names": ["陈总"], "identifiers": { "email": ["ceo@bigco.com"] }, "source": { "adapter": "email", "originalId": "1" } },
67
+ "b": { "id": "p-email-ceo2-008", "type": "person", "names": ["陈总"], "identifiers": { "email": ["chen@xiaobu.com"] }, "source": { "adapter": "email", "originalId": "2" } }
68
+ },
69
+ {
70
+ "id": "p009",
71
+ "category": "cross-source-same-name-different-adapter",
72
+ "groundTruth": "same",
73
+ "a": { "id": "p-wechat-009", "type": "person", "names": ["小明"], "identifiers": {}, "source": { "adapter": "wechat", "originalId": "1" } },
74
+ "b": { "id": "p-alipay-009", "type": "person", "names": ["小明"], "identifiers": {}, "source": { "adapter": "alipay-bill", "originalId": "2" } }
75
+ },
76
+ {
77
+ "id": "p010",
78
+ "category": "homonym-trap",
79
+ "groundTruth": "different",
80
+ "a": { "id": "p-email-lijun1-010", "type": "person", "names": ["李军"], "identifiers": { "email": ["lijun.eng@coA.com"] }, "source": { "adapter": "email", "originalId": "1" } },
81
+ "b": { "id": "p-email-lijun2-010", "type": "person", "names": ["李军"], "identifiers": { "email": ["lijun.sales@coB.com"] }, "source": { "adapter": "email", "originalId": "2" } }
82
+ }
83
+ ],
84
+ "summary": {
85
+ "totalPairs": 10,
86
+ "byGroundTruth": { "same": 5, "different": 5 },
87
+ "byCategory": {
88
+ "cross-source-same-identifier": 3,
89
+ "cross-source-same-name-different-adapter": 2,
90
+ "cross-source-different-identifier": 2,
91
+ "same-adapter-same-name": 1,
92
+ "homonym-trap": 2
93
+ },
94
+ "note": "This is a 10-pair smoke fixture (committed to git, synthetic). The real 200-pair set lives at __tests__/fixtures/entity-resolver-200.json which is gitignored — generated from user vault via scripts/gen-entity-resolver-fixture.js."
95
+ }
96
+ }