@chainlesschain/personal-data-hub 0.4.29 → 0.4.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/forensics/qq-nt-collect.js +190 -0
- package/lib/prompt-builder.js +15 -1
- package/package.json +8 -3
- package/__tests__/adapter-guide.test.js +0 -47
- package/__tests__/adapter-spec.test.js +0 -78
- package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +0 -211
- package/__tests__/adapters/ai-chat-health-checker.test.js +0 -262
- package/__tests__/adapters/ai-chat-history.test.js +0 -396
- package/__tests__/adapters/ai-chat-http-client.test.js +0 -242
- package/__tests__/adapters/ai-chat-vendors.test.js +0 -874
- package/__tests__/adapters/alipay-bill-adapter.test.js +0 -538
- package/__tests__/adapters/apple-health.test.js +0 -95
- package/__tests__/adapters/bank-family.test.js +0 -125
- package/__tests__/adapters/biz-tianyancha.test.js +0 -159
- package/__tests__/adapters/browser-history-chrome.test.js +0 -377
- package/__tests__/adapters/browser-history-edge.test.js +0 -159
- package/__tests__/adapters/car-mercedesme.test.js +0 -74
- package/__tests__/adapters/doc-baidu-netdisk.test.js +0 -102
- package/__tests__/adapters/doc-camscanner.test.js +0 -147
- package/__tests__/adapters/doc-platforms.test.js +0 -177
- package/__tests__/adapters/edu-huawei-learning-live.test.js +0 -198
- package/__tests__/adapters/edu-zuoyebang-live.test.js +0 -226
- package/__tests__/adapters/email-adapter-snapshot.test.js +0 -237
- package/__tests__/adapters/email-adapter.test.js +0 -742
- package/__tests__/adapters/email-classifier.test.js +0 -347
- package/__tests__/adapters/email-imap-session.test.js +0 -334
- package/__tests__/adapters/email-parser.test.js +0 -244
- package/__tests__/adapters/email-pdf-extractor.test.js +0 -529
- package/__tests__/adapters/email-providers.test.js +0 -84
- package/__tests__/adapters/email-retry-progress.test.js +0 -294
- package/__tests__/adapters/email-templates.test.js +0 -822
- package/__tests__/adapters/family-23-collectors-scaffold.test.js +0 -182
- package/__tests__/adapters/finance-alipay-live.test.js +0 -258
- package/__tests__/adapters/finance-dcep.test.js +0 -74
- package/__tests__/adapters/fitness-joyrun.test.js +0 -82
- package/__tests__/adapters/game-genshin-live.test.js +0 -238
- package/__tests__/adapters/game-genshin-scaffold.test.js +0 -108
- package/__tests__/adapters/game-honor-of-kings-live.test.js +0 -230
- package/__tests__/adapters/git-activity.test.js +0 -222
- package/__tests__/adapters/gov-12123.test.js +0 -103
- package/__tests__/adapters/gov-ixiamen.test.js +0 -150
- package/__tests__/adapters/gov-tax.test.js +0 -135
- package/__tests__/adapters/health-meiyou.test.js +0 -125
- package/__tests__/adapters/local-files.test.js +0 -264
- package/__tests__/adapters/local-im-pc.test.js +0 -154
- package/__tests__/adapters/messaging-whatsapp.test.js +0 -289
- package/__tests__/adapters/music-kugou.test.js +0 -187
- package/__tests__/adapters/music-qq.test.js +0 -112
- package/__tests__/adapters/netease-music-live.test.js +0 -244
- package/__tests__/adapters/netease-music.test.js +0 -74
- package/__tests__/adapters/pc-local-discovery.test.js +0 -141
- package/__tests__/adapters/qq-pc-direct-read.test.js +0 -227
- package/__tests__/adapters/reading-family.test.js +0 -108
- package/__tests__/adapters/recruit-boss.test.js +0 -180
- package/__tests__/adapters/shell-history.test.js +0 -180
- package/__tests__/adapters/shopping-base.test.js +0 -179
- package/__tests__/adapters/shopping-dianping.test.js +0 -239
- package/__tests__/adapters/social-bilibili-adb-api-client.test.js +0 -721
- package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +0 -346
- package/__tests__/adapters/social-bilibili-adb-collector.test.js +0 -284
- package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +0 -343
- package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +0 -296
- package/__tests__/adapters/social-csdn.test.js +0 -175
- package/__tests__/adapters/social-dongchedi.test.js +0 -165
- package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +0 -165
- package/__tests__/adapters/social-douyin-adb-collector.test.js +0 -254
- package/__tests__/adapters/social-douyin-adb-db-extension.test.js +0 -114
- package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +0 -304
- package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +0 -216
- package/__tests__/adapters/social-douyin-adb-usage-profile.test.js +0 -229
- package/__tests__/adapters/social-douyin-adb-watch-history.test.js +0 -269
- package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +0 -496
- package/__tests__/adapters/social-kuaishou-adb-collector.test.js +0 -276
- package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +0 -152
- package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +0 -178
- package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +0 -135
- package/__tests__/adapters/social-toutiao-adb-api-client.test.js +0 -626
- package/__tests__/adapters/social-toutiao-adb-article.test.js +0 -155
- package/__tests__/adapters/social-toutiao-adb-collector.test.js +0 -378
- package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +0 -193
- package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +0 -196
- package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +0 -311
- package/__tests__/adapters/social-weibo-adb-api-client.test.js +0 -362
- package/__tests__/adapters/social-weibo-adb-collector.test.js +0 -201
- package/__tests__/adapters/social-weibo-adb-cookies-extension.test.js +0 -167
- package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +0 -189
- package/__tests__/adapters/social-xiaohongshu-adb-api-client.test.js +0 -431
- package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +0 -207
- package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +0 -351
- package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +0 -130
- package/__tests__/adapters/social-xiaohongshu-adb-snapshot-builder.test.js +0 -200
- package/__tests__/adapters/social-zhihu.test.js +0 -246
- package/__tests__/adapters/system-data-adapter.test.js +0 -443
- package/__tests__/adapters/system-data-android-ingest.test.js +0 -144
- package/__tests__/adapters/system-data-android.test.js +0 -519
- package/__tests__/adapters/system-data-disclosure.test.js +0 -153
- package/__tests__/adapters/travel-12306.test.js +0 -512
- package/__tests__/adapters/travel-amap.test.js +0 -219
- package/__tests__/adapters/travel-baidu-map.test.js +0 -305
- package/__tests__/adapters/travel-base.test.js +0 -205
- package/__tests__/adapters/travel-ctrip.test.js +0 -377
- package/__tests__/adapters/travel-didi-consumer.test.js +0 -66
- package/__tests__/adapters/travel-didi.test.js +0 -204
- package/__tests__/adapters/travel-tencent-map.test.js +0 -207
- package/__tests__/adapters/travel-tongcheng.test.js +0 -289
- package/__tests__/adapters/video-platforms.test.js +0 -152
- package/__tests__/adapters/video-xigua.test.js +0 -106
- package/__tests__/adapters/vscode.test.js +0 -299
- package/__tests__/adapters/wechat-bootstrap.test.js +0 -240
- package/__tests__/adapters/wechat-env-probe.test.js +0 -162
- package/__tests__/adapters/wechat-frida-agent.test.js +0 -322
- package/__tests__/adapters/wechat-frida-integration.test.js +0 -149
- package/__tests__/adapters/wechat-frida-key-provider.test.js +0 -188
- package/__tests__/adapters/wechat-md5-key-provider.test.js +0 -101
- package/__tests__/adapters/wechat-pc-direct-read.test.js +0 -365
- package/__tests__/adapters/wechat-pc-group-topic.test.js +0 -63
- package/__tests__/adapters/wechat-pc-v4-sidecar.test.js +0 -72
- package/__tests__/adapters/weread.test.js +0 -123
- package/__tests__/adapters/wework-pc.test.js +0 -124
- package/__tests__/adapters/win-recent.test.js +0 -192
- package/__tests__/analysis-skills.test.js +0 -754
- package/__tests__/analysis.test.js +0 -1845
- package/__tests__/audio-ximalaya-snapshot.test.js +0 -279
- package/__tests__/batch.test.js +0 -133
- package/__tests__/bridges-cc-kg.test.js +0 -231
- package/__tests__/bridges-cc-llm.test.js +0 -191
- package/__tests__/bridges-cc-rag.test.js +0 -162
- package/__tests__/categories.test.js +0 -92
- package/__tests__/e2e/ai-chat-cross-source-journey.test.js +0 -213
- package/__tests__/e2e/full-user-journey.test.js +0 -188
- package/__tests__/e2e/local-data-adapters-cli.e2e.test.js +0 -146
- package/__tests__/entity-resolver-ingest-hook.test.js +0 -177
- package/__tests__/entity-resolver-stages.test.js +0 -411
- package/__tests__/entity-resolver-vault.test.js +0 -249
- package/__tests__/entity-resolver.test.js +0 -526
- package/__tests__/fitness-keep-snapshot.test.js +0 -224
- package/__tests__/fixtures/entity-resolver-200-mock.json +0 -96
- package/__tests__/ids.test.js +0 -45
- package/__tests__/integration/ai-chat-history-registry.test.js +0 -228
- package/__tests__/integration/aichat-wizard-end-to-end.test.js +0 -282
- package/__tests__/integration/cross-adapter-pipelines.test.js +0 -396
- package/__tests__/integration/local-data-adapters-pipeline.test.js +0 -373
- package/__tests__/integration/social-bilibili-pipeline.test.js +0 -261
- package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +0 -390
- package/__tests__/key-providers.test.js +0 -126
- package/__tests__/kg-derive.test.js +0 -219
- package/__tests__/llm-client.test.js +0 -122
- package/__tests__/longtail-adapters.test.js +0 -281
- package/__tests__/messaging-qq-snapshot.test.js +0 -294
- package/__tests__/mobile-extractor-encrypted.test.js +0 -460
- package/__tests__/mobile-extractor.test.js +0 -288
- package/__tests__/mock-adapter.test.js +0 -93
- package/__tests__/prompt-builder.test.js +0 -249
- package/__tests__/query-parser.test.js +0 -365
- package/__tests__/rag-derive.test.js +0 -169
- package/__tests__/registry-readiness.test.js +0 -292
- package/__tests__/registry.test.js +0 -420
- package/__tests__/salvage-ingest.test.js +0 -97
- package/__tests__/schemas.test.js +0 -331
- package/__tests__/shopping-adapters.test.js +0 -392
- package/__tests__/shopping-eleme-snapshot.test.js +0 -454
- package/__tests__/shopping-pinduoduo-snapshot.test.js +0 -484
- package/__tests__/shopping-snapshot.test.js +0 -438
- package/__tests__/shopping-vipshop-snapshot.test.js +0 -425
- package/__tests__/shopping-xianyu-snapshot.test.js +0 -451
- package/__tests__/sidecar-contacts-cross-validate.test.js +0 -186
- package/__tests__/sidecar-supervisor.test.js +0 -128
- package/__tests__/sign-providers.test.js +0 -62
- package/__tests__/social-adapters.test.js +0 -280
- package/__tests__/social-bilibili-snapshot.test.js +0 -278
- package/__tests__/social-douban-snapshot.test.js +0 -351
- package/__tests__/social-douyin-im-direct-read.test.js +0 -377
- package/__tests__/social-douyin-salvage-collector.test.js +0 -98
- package/__tests__/social-douyin-salvage-mapper.test.js +0 -90
- package/__tests__/social-douyin-snapshot.test.js +0 -256
- package/__tests__/social-kuaishou-snapshot.test.js +0 -362
- package/__tests__/social-toutiao-snapshot.test.js +0 -366
- package/__tests__/social-weibo-snapshot.test.js +0 -234
- package/__tests__/social-weibo-sqlite-device.test.js +0 -174
- package/__tests__/social-xiaohongshu-snapshot.test.js +0 -232
- package/__tests__/sqlite-leaf-salvage.test.js +0 -97
- package/__tests__/travel-adapters.test.js +0 -483
- package/__tests__/travel-maps-snapshot.test.js +0 -426
- package/__tests__/vault-driver-error.test.js +0 -74
- package/__tests__/vault-search-helpers.test.js +0 -104
- package/__tests__/vault-search.test.js +0 -423
- package/__tests__/vault.test.js +0 -767
- package/__tests__/wechat-adapter.test.js +0 -594
- package/__tests__/whatsapp-adapter.test.js +0 -138
- package/scripts/_make-fixture-all.js +0 -126
- package/scripts/_make-fixture-contacts.js +0 -84
- package/scripts/evaluate-entity-resolver.js +0 -213
- package/scripts/run-native-tests-sandbox.sh +0 -55
- package/scripts/smoke-phase-5-5.js +0 -196
- package/scripts/smoke-phase-5-7.js +0 -181
- package/scripts/smoke-system-data-contacts.js +0 -309
- package/scripts/smoke-system-data.js +0 -312
- package/vitest.config.js +0 -88
|
@@ -1,526 +0,0 @@
|
|
|
1
|
-
"use strict";
|
|
2
|
-
|
|
3
|
-
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
4
|
-
|
|
5
|
-
const path = require("node:path");
|
|
6
|
-
const fs = require("node:fs");
|
|
7
|
-
const os = require("node:os");
|
|
8
|
-
const { LocalVault } = require("../lib/vault");
|
|
9
|
-
const { generateKeyHex } = require("../lib/key-providers");
|
|
10
|
-
const { newId } = require("../lib/ids");
|
|
11
|
-
const {
|
|
12
|
-
EntityResolver,
|
|
13
|
-
entityResolverRuleStage: ruleStage,
|
|
14
|
-
entityResolverSharedIdentifier: findSharedIdentifier,
|
|
15
|
-
entityResolverNormalizeIdValue: normalizeIdValue,
|
|
16
|
-
} = require("../lib/entity-resolver");
|
|
17
|
-
|
|
18
|
-
// ─── ruleStage (pure) ────────────────────────────────────────────────────
|
|
19
|
-
|
|
20
|
-
function person(overrides = {}) {
|
|
21
|
-
return {
|
|
22
|
-
id: overrides.id || `person-${Math.random().toString(36).slice(2, 8)}`,
|
|
23
|
-
type: "person",
|
|
24
|
-
subtype: "contact",
|
|
25
|
-
names: overrides.names || [],
|
|
26
|
-
identifiers: overrides.identifiers || {},
|
|
27
|
-
source: overrides.source || { adapter: "test", originalId: "tx-" + Math.random() },
|
|
28
|
-
...overrides,
|
|
29
|
-
};
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
describe("ruleStage — R1 strong identifier match", () => {
|
|
33
|
-
it("same email → same", () => {
|
|
34
|
-
const a = person({ identifiers: { email: ["mom@163.com"] } });
|
|
35
|
-
const b = person({ identifiers: { email: ["MOM@163.COM"] } }); // case + array
|
|
36
|
-
expect(ruleStage(a, b).verdict).toBe("same");
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
it("same phone (different formatting) → same", () => {
|
|
40
|
-
const a = person({ identifiers: { phone: ["+86 138 0000 1111"] } });
|
|
41
|
-
const b = person({ identifiers: { phone: ["13800001111"] } });
|
|
42
|
-
expect(ruleStage(a, b).verdict).toBe("same");
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
it("same wechatId → same", () => {
|
|
46
|
-
const a = person({ identifiers: { wechatId: "wxid_xyz" } });
|
|
47
|
-
const b = person({ identifiers: { wechatId: ["wxid_xyz"] } });
|
|
48
|
-
expect(ruleStage(a, b).verdict).toBe("same");
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
it("same did → same", () => {
|
|
52
|
-
const a = person({ identifiers: { did: "did:cc:abc" } });
|
|
53
|
-
const b = person({ identifiers: { did: "did:cc:abc" } });
|
|
54
|
-
expect(ruleStage(a, b).verdict).toBe("same");
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
it("same idHash → same", () => {
|
|
58
|
-
const a = person({ identifiers: { idHash: "sha-id-hash-123" } });
|
|
59
|
-
const b = person({ identifiers: { idHash: "sha-id-hash-123" } });
|
|
60
|
-
expect(ruleStage(a, b).verdict).toBe("same");
|
|
61
|
-
});
|
|
62
|
-
});
|
|
63
|
-
|
|
64
|
-
describe("ruleStage — R2 zero overlap → different", () => {
|
|
65
|
-
it("no shared field → different", () => {
|
|
66
|
-
const a = person({
|
|
67
|
-
names: ["张三"],
|
|
68
|
-
identifiers: { email: ["a@x.com"] },
|
|
69
|
-
source: { adapter: "email", originalId: "1" },
|
|
70
|
-
});
|
|
71
|
-
const b = person({
|
|
72
|
-
names: ["李四"],
|
|
73
|
-
identifiers: { phone: ["13900001234"] },
|
|
74
|
-
source: { adapter: "alipay", originalId: "2" },
|
|
75
|
-
});
|
|
76
|
-
expect(ruleStage(a, b).verdict).toBe("different");
|
|
77
|
-
});
|
|
78
|
-
|
|
79
|
-
it("identical content but no identifier overlap + different adapters → uncertain", () => {
|
|
80
|
-
// Note: same name → overlap=1 → uncertain
|
|
81
|
-
const a = person({ names: ["张三"], source: { adapter: "email", originalId: "1" } });
|
|
82
|
-
const b = person({ names: ["张三"], source: { adapter: "alipay", originalId: "2" } });
|
|
83
|
-
expect(ruleStage(a, b).verdict).toBe("uncertain");
|
|
84
|
-
});
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
describe("ruleStage — R3 same-adapter internal dup", () => {
|
|
88
|
-
it("same adapter + different originalId + shared name → same", () => {
|
|
89
|
-
const a = person({
|
|
90
|
-
names: ["张三"],
|
|
91
|
-
source: { adapter: "email", originalId: "1" },
|
|
92
|
-
});
|
|
93
|
-
const b = person({
|
|
94
|
-
names: ["张三"],
|
|
95
|
-
source: { adapter: "email", originalId: "2" },
|
|
96
|
-
});
|
|
97
|
-
expect(ruleStage(a, b).verdict).toBe("same");
|
|
98
|
-
expect(ruleStage(a, b).reason).toMatch(/same-adapter/);
|
|
99
|
-
});
|
|
100
|
-
|
|
101
|
-
it("same adapter + same originalId is NOT a R3 case (different id implies different row)", () => {
|
|
102
|
-
const a = person({
|
|
103
|
-
id: "p1",
|
|
104
|
-
names: ["张三"],
|
|
105
|
-
source: { adapter: "email", originalId: "1" },
|
|
106
|
-
});
|
|
107
|
-
const b = person({
|
|
108
|
-
id: "p2",
|
|
109
|
-
names: ["张三"],
|
|
110
|
-
source: { adapter: "email", originalId: "1" },
|
|
111
|
-
});
|
|
112
|
-
// R3 requires DIFFERENT originalId — same originalId falls through
|
|
113
|
-
// but uniqueIndex on source.originalId means we don't see this case
|
|
114
|
-
// in practice.
|
|
115
|
-
const r = ruleStage(a, b);
|
|
116
|
-
expect(r.verdict).toBe("uncertain"); // because there's overlap (name + adapter)
|
|
117
|
-
});
|
|
118
|
-
|
|
119
|
-
it("same adapter + DIFFERENT name → uncertain (not R3)", () => {
|
|
120
|
-
const a = person({
|
|
121
|
-
names: ["张三"],
|
|
122
|
-
source: { adapter: "email", originalId: "1" },
|
|
123
|
-
});
|
|
124
|
-
const b = person({
|
|
125
|
-
names: ["李四"],
|
|
126
|
-
source: { adapter: "email", originalId: "2" },
|
|
127
|
-
});
|
|
128
|
-
expect(ruleStage(a, b).verdict).toBe("uncertain");
|
|
129
|
-
});
|
|
130
|
-
});
|
|
131
|
-
|
|
132
|
-
describe("ruleStage — R4 uncertain fall-through", () => {
|
|
133
|
-
it("name overlap only → uncertain", () => {
|
|
134
|
-
const a = person({ names: ["张三"], source: { adapter: "email", originalId: "1" } });
|
|
135
|
-
const b = person({ names: ["张三"], source: { adapter: "alipay", originalId: "2" } });
|
|
136
|
-
expect(ruleStage(a, b).verdict).toBe("uncertain");
|
|
137
|
-
});
|
|
138
|
-
|
|
139
|
-
it("same person id → same vacuously", () => {
|
|
140
|
-
const a = person({ id: "p1" });
|
|
141
|
-
const b = person({ id: "p1" });
|
|
142
|
-
expect(ruleStage(a, b).verdict).toBe("same");
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
it("invalid input → different", () => {
|
|
146
|
-
expect(ruleStage(null, person()).verdict).toBe("different");
|
|
147
|
-
expect(ruleStage(person(), undefined).verdict).toBe("different");
|
|
148
|
-
});
|
|
149
|
-
});
|
|
150
|
-
|
|
151
|
-
// ─── normalizeIdValue ────────────────────────────────────────────────────
|
|
152
|
-
|
|
153
|
-
describe("normalizeIdValue", () => {
|
|
154
|
-
it("email → lowercase + trim", () => {
|
|
155
|
-
expect(normalizeIdValue("email", " MOM@163.COM ")).toBe("mom@163.com");
|
|
156
|
-
});
|
|
157
|
-
it("phone → digits only, strips +86 country code", () => {
|
|
158
|
-
expect(normalizeIdValue("phone", "+86 138-0000 1111")).toBe("13800001111");
|
|
159
|
-
expect(normalizeIdValue("phone", "13800001111")).toBe("13800001111");
|
|
160
|
-
});
|
|
161
|
-
it("other keys → trim only", () => {
|
|
162
|
-
expect(normalizeIdValue("did", " did:cc:abc ")).toBe("did:cc:abc");
|
|
163
|
-
});
|
|
164
|
-
});
|
|
165
|
-
|
|
166
|
-
// ─── findSharedIdentifier ────────────────────────────────────────────────
|
|
167
|
-
|
|
168
|
-
describe("findSharedIdentifier", () => {
|
|
169
|
-
it("finds shared email across array vs string", () => {
|
|
170
|
-
const r = findSharedIdentifier(
|
|
171
|
-
{ email: ["a@x.com", "b@x.com"] },
|
|
172
|
-
{ email: "B@X.COM" },
|
|
173
|
-
);
|
|
174
|
-
expect(r).toBeTruthy();
|
|
175
|
-
expect(r.key).toBe("email");
|
|
176
|
-
expect(r.value).toBe("b@x.com");
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
it("returns null when no overlap", () => {
|
|
180
|
-
const r = findSharedIdentifier({ email: ["a@x.com"] }, { email: ["b@x.com"] });
|
|
181
|
-
expect(r).toBeNull();
|
|
182
|
-
});
|
|
183
|
-
|
|
184
|
-
it("ignores empty / missing identifier groups", () => {
|
|
185
|
-
const r = findSharedIdentifier({}, { email: ["a@x.com"] });
|
|
186
|
-
expect(r).toBeNull();
|
|
187
|
-
});
|
|
188
|
-
});
|
|
189
|
-
|
|
190
|
-
// ─── EntityResolver wired against a real vault ───────────────────────────
|
|
191
|
-
|
|
192
|
-
function makeVaultWithPersons(persons) {
|
|
193
|
-
const dir = fs.mkdtempSync(path.join(os.tmpdir(), "hub-er-test-"));
|
|
194
|
-
const dbPath = path.join(dir, "vault.db");
|
|
195
|
-
const key = generateKeyHex();
|
|
196
|
-
const vault = new LocalVault({ path: dbPath, key });
|
|
197
|
-
vault.open();
|
|
198
|
-
for (const p of persons) {
|
|
199
|
-
// Build a complete source (the rule-stage tests use a 2-field
|
|
200
|
-
// shorthand for brevity; vault.putPerson needs the full 5-field).
|
|
201
|
-
const inputSrc = p.source || {};
|
|
202
|
-
const source = {
|
|
203
|
-
adapter: inputSrc.adapter || "test",
|
|
204
|
-
adapterVersion: inputSrc.adapterVersion || "0.1.0",
|
|
205
|
-
originalId: inputSrc.originalId || p.id,
|
|
206
|
-
capturedAt: inputSrc.capturedAt || Date.now(),
|
|
207
|
-
capturedBy: inputSrc.capturedBy || "api",
|
|
208
|
-
};
|
|
209
|
-
vault.putPerson({
|
|
210
|
-
id: p.id,
|
|
211
|
-
type: "person",
|
|
212
|
-
subtype: p.subtype || "contact",
|
|
213
|
-
names: p.names || [],
|
|
214
|
-
identifiers: p.identifiers || {},
|
|
215
|
-
ingestedAt: Date.now(),
|
|
216
|
-
source,
|
|
217
|
-
extra: p.extra || {},
|
|
218
|
-
});
|
|
219
|
-
}
|
|
220
|
-
return { vault, dir };
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
function cleanup(vault, dir) {
|
|
224
|
-
try { vault.close(); } catch (_e) {}
|
|
225
|
-
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) {}
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
describe("EntityResolver.resolveOnIngest", () => {
|
|
229
|
-
let vault, dir;
|
|
230
|
-
afterEach(() => cleanup(vault, dir));
|
|
231
|
-
|
|
232
|
-
it("R1 same-email pair → immediate merge", () => {
|
|
233
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
234
|
-
{ id: "p-email-mom", names: ["妈"], identifiers: { email: ["mom@163.com"] } },
|
|
235
|
-
]));
|
|
236
|
-
const er = new EntityResolver({ vault });
|
|
237
|
-
const newPerson = {
|
|
238
|
-
id: "p-alipay-陈x",
|
|
239
|
-
type: "person",
|
|
240
|
-
names: ["陈X"],
|
|
241
|
-
identifiers: { email: ["mom@163.com"] }, // same email
|
|
242
|
-
source: { adapter: "alipay", originalId: "TX1" },
|
|
243
|
-
};
|
|
244
|
-
vault.putPerson({
|
|
245
|
-
...newPerson,
|
|
246
|
-
subtype: "contact",
|
|
247
|
-
ingestedAt: Date.now(),
|
|
248
|
-
source: { adapter: "alipay", adapterVersion: "0.1.0", originalId: "TX1", capturedAt: Date.now(), capturedBy: "export" },
|
|
249
|
-
});
|
|
250
|
-
const summary = er.resolveOnIngest([newPerson]);
|
|
251
|
-
expect(summary.newPersons).toBe(1);
|
|
252
|
-
expect(summary.sameImmediate).toBe(1);
|
|
253
|
-
expect(vault.getMergeGroupMembers("p-email-mom").sort()).toEqual(["p-alipay-陈x", "p-email-mom"]);
|
|
254
|
-
});
|
|
255
|
-
|
|
256
|
-
it("R2 zero-overlap pair → candidate filter excludes; no merge, person enqueued for async", () => {
|
|
257
|
-
// The candidate-finder filters out zero-overlap rows for perf — they
|
|
258
|
-
// never reach rule-stage. Behavior is equivalent ("different" verdict
|
|
259
|
-
// never recorded but no merge happens either). Async pipeline gets
|
|
260
|
-
// a chance later in case embedding catches a name variant.
|
|
261
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
262
|
-
{ id: "p-x", names: ["张三"], identifiers: { email: ["a@x.com"] }, source: { adapter: "email", originalId: "1" } },
|
|
263
|
-
]));
|
|
264
|
-
const er = new EntityResolver({ vault });
|
|
265
|
-
const newPerson = {
|
|
266
|
-
id: "p-y",
|
|
267
|
-
type: "person",
|
|
268
|
-
names: ["李四"],
|
|
269
|
-
identifiers: { phone: ["13900001234"] },
|
|
270
|
-
source: { adapter: "alipay", originalId: "2" },
|
|
271
|
-
};
|
|
272
|
-
vault.putPerson({
|
|
273
|
-
...newPerson, subtype: "contact", ingestedAt: Date.now(),
|
|
274
|
-
source: { adapter: "alipay", adapterVersion: "0.1.0", originalId: "2", capturedAt: Date.now(), capturedBy: "export" },
|
|
275
|
-
});
|
|
276
|
-
const summary = er.resolveOnIngest([newPerson]);
|
|
277
|
-
expect(summary.differentImmediate).toBe(0);
|
|
278
|
-
expect(summary.sameImmediate).toBe(0);
|
|
279
|
-
expect(summary.enqueued).toBe(1);
|
|
280
|
-
expect(vault.stats().mergeGroups).toBe(0);
|
|
281
|
-
});
|
|
282
|
-
|
|
283
|
-
it("uncertain pair → enqueues for async", () => {
|
|
284
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
285
|
-
{ id: "p-1", names: ["张三"], source: { adapter: "email", originalId: "1" } },
|
|
286
|
-
]));
|
|
287
|
-
const er = new EntityResolver({ vault });
|
|
288
|
-
const newPerson = {
|
|
289
|
-
id: "p-2",
|
|
290
|
-
type: "person",
|
|
291
|
-
names: ["张三"], // name overlap → uncertain
|
|
292
|
-
source: { adapter: "alipay", originalId: "2" },
|
|
293
|
-
};
|
|
294
|
-
vault.putPerson({
|
|
295
|
-
...newPerson, subtype: "contact", ingestedAt: Date.now(),
|
|
296
|
-
source: { adapter: "alipay", adapterVersion: "0.1.0", originalId: "2", capturedAt: Date.now(), capturedBy: "export" },
|
|
297
|
-
});
|
|
298
|
-
const summary = er.resolveOnIngest([newPerson]);
|
|
299
|
-
expect(summary.enqueued).toBe(1);
|
|
300
|
-
expect(vault.resolveQueueStats().pending).toBe(1);
|
|
301
|
-
});
|
|
302
|
-
|
|
303
|
-
it("respects existing same-decision (idempotent on retry)", () => {
|
|
304
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
305
|
-
{ id: "p-a", names: ["x"], identifiers: { email: ["a@x.com"] } },
|
|
306
|
-
{ id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } },
|
|
307
|
-
]));
|
|
308
|
-
const er = new EntityResolver({ vault });
|
|
309
|
-
const summary1 = er.resolveOnIngest([{ id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } }]);
|
|
310
|
-
const summary2 = er.resolveOnIngest([{ id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } }]);
|
|
311
|
-
expect(summary1.sameImmediate).toBeGreaterThanOrEqual(1);
|
|
312
|
-
expect(summary2.sameImmediate).toBeGreaterThanOrEqual(1); // still records same path
|
|
313
|
-
// Members stable
|
|
314
|
-
expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
|
|
315
|
-
});
|
|
316
|
-
|
|
317
|
-
it("handles error in single person without breaking batch", () => {
|
|
318
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
319
|
-
{ id: "p-a", names: ["x"], identifiers: { email: ["a@x.com"] } },
|
|
320
|
-
]));
|
|
321
|
-
const er = new EntityResolver({ vault });
|
|
322
|
-
const batch = [
|
|
323
|
-
null, // will trigger error path
|
|
324
|
-
{ id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } },
|
|
325
|
-
];
|
|
326
|
-
const summary = er.resolveOnIngest(batch);
|
|
327
|
-
expect(summary.newPersons).toBe(2);
|
|
328
|
-
expect(summary.errored).toBeGreaterThanOrEqual(1);
|
|
329
|
-
expect(summary.sameImmediate).toBeGreaterThanOrEqual(0);
|
|
330
|
-
});
|
|
331
|
-
});
|
|
332
|
-
|
|
333
|
-
describe("EntityResolver.drain (rule-only, no embedding/llm)", () => {
|
|
334
|
-
let vault, dir;
|
|
335
|
-
afterEach(() => cleanup(vault, dir));
|
|
336
|
-
|
|
337
|
-
it("returns processed:0 when queue empty", async () => {
|
|
338
|
-
({ vault, dir } = makeVaultWithPersons([]));
|
|
339
|
-
const er = new EntityResolver({ vault });
|
|
340
|
-
const r = await er.drain();
|
|
341
|
-
expect(r.processed).toBe(0);
|
|
342
|
-
});
|
|
343
|
-
|
|
344
|
-
it("processes queued person — rule stage finds same identifier", async () => {
|
|
345
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
346
|
-
{ id: "p-a", names: ["x"], identifiers: { email: ["a@x.com"] } },
|
|
347
|
-
{ id: "p-b", names: ["x"], identifiers: { email: ["a@x.com"] } },
|
|
348
|
-
]));
|
|
349
|
-
const er = new EntityResolver({ vault });
|
|
350
|
-
vault.enqueueResolve("p-b");
|
|
351
|
-
const r = await er.drain();
|
|
352
|
-
expect(r.processed).toBe(1);
|
|
353
|
-
expect(r.same).toBe(1);
|
|
354
|
-
expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
|
|
355
|
-
});
|
|
356
|
-
|
|
357
|
-
it("processes uncertain pair without embedding stage → no decision", async () => {
|
|
358
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
359
|
-
{ id: "p-a", names: ["x"], source: { adapter: "email", originalId: "1" } },
|
|
360
|
-
{ id: "p-b", names: ["x"], source: { adapter: "alipay", originalId: "2" } },
|
|
361
|
-
]));
|
|
362
|
-
const er = new EntityResolver({ vault });
|
|
363
|
-
vault.enqueueResolve("p-b");
|
|
364
|
-
const r = await er.drain();
|
|
365
|
-
expect(r.processed).toBe(1);
|
|
366
|
-
expect(r.same).toBe(0);
|
|
367
|
-
expect(r.different).toBe(0);
|
|
368
|
-
expect(r.skipped).toBeGreaterThanOrEqual(0); // no embedding wired
|
|
369
|
-
});
|
|
370
|
-
});
|
|
371
|
-
|
|
372
|
-
describe("EntityResolver.drain with embedding + LLM stages", () => {
|
|
373
|
-
let vault, dir;
|
|
374
|
-
afterEach(() => cleanup(vault, dir));
|
|
375
|
-
|
|
376
|
-
it("embedding sim ≥ high threshold → auto same", async () => {
|
|
377
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
378
|
-
{ id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
|
|
379
|
-
{ id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
|
|
380
|
-
]));
|
|
381
|
-
const er = new EntityResolver({
|
|
382
|
-
vault,
|
|
383
|
-
embeddingStage: async () => ({ sim: 0.91 }),
|
|
384
|
-
});
|
|
385
|
-
vault.enqueueResolve("p-b");
|
|
386
|
-
const r = await er.drain();
|
|
387
|
-
expect(r.same).toBe(1);
|
|
388
|
-
});
|
|
389
|
-
|
|
390
|
-
it("embedding sim < low threshold → auto different", async () => {
|
|
391
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
392
|
-
{ id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
|
|
393
|
-
{ id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
|
|
394
|
-
]));
|
|
395
|
-
const er = new EntityResolver({
|
|
396
|
-
vault,
|
|
397
|
-
embeddingStage: async () => ({ sim: 0.4 }),
|
|
398
|
-
});
|
|
399
|
-
vault.enqueueResolve("p-b");
|
|
400
|
-
const r = await er.drain();
|
|
401
|
-
expect(r.different).toBe(1);
|
|
402
|
-
});
|
|
403
|
-
|
|
404
|
-
it("embedding mid-range + LLM yes → same", async () => {
|
|
405
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
406
|
-
{ id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
|
|
407
|
-
{ id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
|
|
408
|
-
]));
|
|
409
|
-
const er = new EntityResolver({
|
|
410
|
-
vault,
|
|
411
|
-
embeddingStage: async () => ({ sim: 0.7 }),
|
|
412
|
-
llmStage: async () => ({ verdict: "yes", confidence: 0.85, reason: "looks same" }),
|
|
413
|
-
});
|
|
414
|
-
vault.enqueueResolve("p-b");
|
|
415
|
-
const r = await er.drain();
|
|
416
|
-
expect(r.same).toBe(1);
|
|
417
|
-
});
|
|
418
|
-
|
|
419
|
-
it("embedding mid-range + LLM maybe → review queue", async () => {
|
|
420
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
421
|
-
{ id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
|
|
422
|
-
{ id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
|
|
423
|
-
]));
|
|
424
|
-
const er = new EntityResolver({
|
|
425
|
-
vault,
|
|
426
|
-
embeddingStage: async () => ({ sim: 0.7 }),
|
|
427
|
-
llmStage: async () => ({ verdict: "maybe", confidence: 0.5, reason: "unclear" }),
|
|
428
|
-
});
|
|
429
|
-
vault.enqueueResolve("p-b");
|
|
430
|
-
const r = await er.drain();
|
|
431
|
-
expect(r.review).toBe(1);
|
|
432
|
-
expect(vault.listReviewQueue()).toHaveLength(1);
|
|
433
|
-
});
|
|
434
|
-
|
|
435
|
-
it("embedding stage throws → error counted, no infinite retry", async () => {
|
|
436
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
437
|
-
{ id: "p-a", names: ["张三"], source: { adapter: "email", originalId: "1" } },
|
|
438
|
-
{ id: "p-b", names: ["张三"], source: { adapter: "alipay", originalId: "2" } },
|
|
439
|
-
]));
|
|
440
|
-
const er = new EntityResolver({
|
|
441
|
-
vault,
|
|
442
|
-
embeddingStage: async () => { throw new Error("ollama down"); },
|
|
443
|
-
});
|
|
444
|
-
vault.enqueueResolve("p-b");
|
|
445
|
-
const r = await er.drain();
|
|
446
|
-
expect(r.error).toBe(1);
|
|
447
|
-
expect(vault.resolveQueueStats().pending).toBe(1); // retry-eligible
|
|
448
|
-
});
|
|
449
|
-
});
|
|
450
|
-
|
|
451
|
-
describe("EntityResolver.applyUserDecision", () => {
|
|
452
|
-
let vault, dir;
|
|
453
|
-
afterEach(() => cleanup(vault, dir));
|
|
454
|
-
|
|
455
|
-
it("user says same → merge + record decision", () => {
|
|
456
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
457
|
-
{ id: "p-a", names: ["x"] },
|
|
458
|
-
{ id: "p-b", names: ["x"] },
|
|
459
|
-
]));
|
|
460
|
-
const er = new EntityResolver({ vault });
|
|
461
|
-
const reviewId = vault.enqueueReview({ aId: "p-a", bId: "p-b", embedSim: 0.7 });
|
|
462
|
-
er.applyUserDecision({ reviewId, decision: "same" });
|
|
463
|
-
expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
|
|
464
|
-
expect(vault.getResolveDecision("p-a", "p-b")).toBeDefined();
|
|
465
|
-
expect(vault.getResolveDecision("p-a", "p-b").decided_by).toBe("user");
|
|
466
|
-
});
|
|
467
|
-
|
|
468
|
-
it("user says different → record decision, no merge", () => {
|
|
469
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
470
|
-
{ id: "p-a", names: ["x"] },
|
|
471
|
-
{ id: "p-b", names: ["x"] },
|
|
472
|
-
]));
|
|
473
|
-
const er = new EntityResolver({ vault });
|
|
474
|
-
const reviewId = vault.enqueueReview({ aId: "p-a", bId: "p-b", embedSim: 0.7 });
|
|
475
|
-
er.applyUserDecision({ reviewId, decision: "different" });
|
|
476
|
-
expect(vault.stats().mergeGroups).toBe(0);
|
|
477
|
-
expect(vault.getResolveDecision("p-a", "p-b").verdict).toBe("different");
|
|
478
|
-
});
|
|
479
|
-
|
|
480
|
-
it("user says skip → just marks reviewed", () => {
|
|
481
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
482
|
-
{ id: "p-a", names: ["x"] },
|
|
483
|
-
{ id: "p-b", names: ["x"] },
|
|
484
|
-
]));
|
|
485
|
-
const er = new EntityResolver({ vault });
|
|
486
|
-
const reviewId = vault.enqueueReview({ aId: "p-a", bId: "p-b", embedSim: 0.7 });
|
|
487
|
-
er.applyUserDecision({ reviewId, decision: "skip" });
|
|
488
|
-
expect(vault.stats().mergeGroups).toBe(0);
|
|
489
|
-
expect(vault.getResolveDecision("p-a", "p-b")).toBeUndefined();
|
|
490
|
-
expect(vault.listReviewQueue()).toHaveLength(0); // marked reviewed
|
|
491
|
-
});
|
|
492
|
-
});
|
|
493
|
-
|
|
494
|
-
describe("EntityResolver manual merge / unmerge", () => {
|
|
495
|
-
let vault, dir;
|
|
496
|
-
afterEach(() => cleanup(vault, dir));
|
|
497
|
-
|
|
498
|
-
it("manualMerge creates the group + records same decision", () => {
|
|
499
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
500
|
-
{ id: "p-a", names: ["x"] },
|
|
501
|
-
{ id: "p-b", names: ["x"] },
|
|
502
|
-
]));
|
|
503
|
-
const er = new EntityResolver({ vault });
|
|
504
|
-
er.manualMerge({ aId: "p-a", bId: "p-b" });
|
|
505
|
-
expect(vault.getMergeGroupMembers("p-a").sort()).toEqual(["p-a", "p-b"]);
|
|
506
|
-
});
|
|
507
|
-
|
|
508
|
-
it("manualUnmerge dissolves group + records different decision", () => {
|
|
509
|
-
({ vault, dir } = makeVaultWithPersons([
|
|
510
|
-
{ id: "p-a", names: ["x"] },
|
|
511
|
-
{ id: "p-b", names: ["x"] },
|
|
512
|
-
]));
|
|
513
|
-
const er = new EntityResolver({ vault });
|
|
514
|
-
er.manualMerge({ aId: "p-a", bId: "p-b" });
|
|
515
|
-
er.manualUnmerge("p-a");
|
|
516
|
-
expect(vault.stats().mergeGroups).toBe(0);
|
|
517
|
-
expect(vault.getResolveDecision("p-a", "p-b").verdict).toBe("different");
|
|
518
|
-
});
|
|
519
|
-
});
|
|
520
|
-
|
|
521
|
-
describe("EntityResolver constructor", () => {
|
|
522
|
-
it("requires vault", () => {
|
|
523
|
-
expect(() => new EntityResolver()).toThrow();
|
|
524
|
-
expect(() => new EntityResolver({})).toThrow(/vault/);
|
|
525
|
-
});
|
|
526
|
-
});
|