@chainlesschain/personal-data-hub 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (154) hide show
  1. package/__tests__/adapters/ai-chat-cookie-capture-spec.test.js +211 -0
  2. package/__tests__/adapters/ai-chat-health-checker.test.js +262 -0
  3. package/__tests__/adapters/ai-chat-history.test.js +396 -0
  4. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  5. package/__tests__/adapters/ai-chat-vendors.test.js +874 -0
  6. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  7. package/__tests__/adapters/email-adapter.test.js +138 -1
  8. package/__tests__/adapters/email-classifier.test.js +347 -0
  9. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  10. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  11. package/__tests__/adapters/email-templates.test.js +699 -0
  12. package/__tests__/adapters/social-toutiao-kuaishou-scaffold.test.js +269 -0
  13. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  14. package/__tests__/adapters/system-data-android-ingest.test.js +144 -0
  15. package/__tests__/adapters/system-data-android.test.js +387 -0
  16. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  17. package/__tests__/adapters/wechat-bootstrap.test.js +240 -0
  18. package/__tests__/adapters/wechat-env-probe.test.js +162 -0
  19. package/__tests__/adapters/wechat-frida-agent.test.js +191 -0
  20. package/__tests__/adapters/wechat-frida-integration.test.js +149 -0
  21. package/__tests__/adapters/wechat-frida-key-provider.test.js +188 -0
  22. package/__tests__/adapters/wechat-md5-key-provider.test.js +101 -0
  23. package/__tests__/analysis-skills.test.js +556 -0
  24. package/__tests__/analysis.test.js +329 -1
  25. package/__tests__/e2e/ai-chat-cross-source-journey.test.js +213 -0
  26. package/__tests__/e2e/full-user-journey.test.js +188 -0
  27. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  28. package/__tests__/entity-resolver-stages.test.js +411 -0
  29. package/__tests__/entity-resolver-vault.test.js +246 -0
  30. package/__tests__/entity-resolver.test.js +526 -0
  31. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  32. package/__tests__/integration/ai-chat-history-registry.test.js +228 -0
  33. package/__tests__/integration/aichat-wizard-end-to-end.test.js +282 -0
  34. package/__tests__/integration/cross-adapter-pipelines.test.js +396 -0
  35. package/__tests__/integration/wechat-bootstrap-end-to-end.test.js +390 -0
  36. package/__tests__/longtail-adapters.test.js +217 -0
  37. package/__tests__/mobile-extractor.test.js +288 -0
  38. package/__tests__/registry.test.js +4 -2
  39. package/__tests__/shopping-adapters.test.js +296 -0
  40. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  41. package/__tests__/sidecar-supervisor.test.js +120 -0
  42. package/__tests__/social-adapters.test.js +206 -0
  43. package/__tests__/travel-adapters.test.js +325 -0
  44. package/__tests__/vault.test.js +3 -3
  45. package/__tests__/wechat-adapter.test.js +476 -0
  46. package/__tests__/whatsapp-adapter.test.js +135 -0
  47. package/lib/adapter-spec.js +12 -0
  48. package/lib/adapters/_python-sidecar-base.js +207 -0
  49. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +374 -0
  50. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  51. package/lib/adapters/ai-chat-history/cookie-capture-spec.js +331 -0
  52. package/lib/adapters/ai-chat-history/health-checker.js +210 -0
  53. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  54. package/lib/adapters/ai-chat-history/index.js +28 -0
  55. package/lib/adapters/ai-chat-history/schema-map.js +258 -0
  56. package/lib/adapters/ai-chat-history/vendor-spec.js +86 -0
  57. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  58. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  59. package/lib/adapters/ai-chat-history/vendors/doubao.js +255 -0
  60. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  61. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  62. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  63. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  64. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  65. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  66. package/lib/adapters/ai-chat-history/wizard-controller.js +473 -0
  67. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +311 -0
  68. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  69. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  70. package/lib/adapters/alipay-bill/index.js +41 -0
  71. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  72. package/lib/adapters/email-imap/classifier.js +495 -0
  73. package/lib/adapters/email-imap/email-adapter.js +419 -8
  74. package/lib/adapters/email-imap/index.js +42 -0
  75. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  76. package/lib/adapters/email-imap/templates/bill.js +232 -0
  77. package/lib/adapters/email-imap/templates/government.js +120 -0
  78. package/lib/adapters/email-imap/templates/index.js +78 -0
  79. package/lib/adapters/email-imap/templates/order.js +186 -0
  80. package/lib/adapters/email-imap/templates/other.js +114 -0
  81. package/lib/adapters/email-imap/templates/register.js +113 -0
  82. package/lib/adapters/email-imap/templates/travel.js +157 -0
  83. package/lib/adapters/email-imap/templates/utils.js +275 -0
  84. package/lib/adapters/email-imap/transactions.js +234 -0
  85. package/lib/adapters/messaging-qq/index.js +158 -0
  86. package/lib/adapters/messaging-telegram/index.js +142 -0
  87. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  88. package/lib/adapters/shopping-base/index.js +208 -0
  89. package/lib/adapters/shopping-jd/index.js +150 -0
  90. package/lib/adapters/shopping-meituan/index.js +154 -0
  91. package/lib/adapters/shopping-taobao/index.js +176 -0
  92. package/lib/adapters/social-bilibili/index.js +171 -0
  93. package/lib/adapters/social-douyin/index.js +116 -0
  94. package/lib/adapters/social-kuaishou/index.js +237 -0
  95. package/lib/adapters/social-toutiao/index.js +236 -0
  96. package/lib/adapters/social-weibo/index.js +164 -0
  97. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  98. package/lib/adapters/system-data/disclosure.js +166 -0
  99. package/lib/adapters/system-data/index.js +34 -0
  100. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  101. package/lib/adapters/system-data-android/adapter.js +348 -0
  102. package/lib/adapters/system-data-android/index.js +76 -0
  103. package/lib/adapters/travel-12306/index.js +151 -0
  104. package/lib/adapters/travel-amap/index.js +164 -0
  105. package/lib/adapters/travel-baidu-map/index.js +162 -0
  106. package/lib/adapters/travel-base/index.js +240 -0
  107. package/lib/adapters/travel-ctrip/index.js +151 -0
  108. package/lib/adapters/wechat/bootstrap.js +146 -0
  109. package/lib/adapters/wechat/content-parser.js +326 -0
  110. package/lib/adapters/wechat/db-reader.js +209 -0
  111. package/lib/adapters/wechat/env-probe.js +218 -0
  112. package/lib/adapters/wechat/frida-agent/loader.js +67 -0
  113. package/lib/adapters/wechat/frida-agent/wechat-key-hook.js +126 -0
  114. package/lib/adapters/wechat/index.js +37 -0
  115. package/lib/adapters/wechat/key-extractor.js +158 -0
  116. package/lib/adapters/wechat/key-providers/frida-key-provider.js +244 -0
  117. package/lib/adapters/wechat/key-providers/index.js +22 -0
  118. package/lib/adapters/wechat/key-providers/key-provider-base.js +44 -0
  119. package/lib/adapters/wechat/key-providers/md5-key-provider.js +81 -0
  120. package/lib/adapters/wechat/normalize.js +220 -0
  121. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  122. package/lib/analysis-skills/base.js +113 -0
  123. package/lib/analysis-skills/footprint.js +167 -0
  124. package/lib/analysis-skills/index.js +58 -0
  125. package/lib/analysis-skills/interests.js +161 -0
  126. package/lib/analysis-skills/relations.js +226 -0
  127. package/lib/analysis-skills/spending.js +219 -0
  128. package/lib/analysis-skills/timeline.js +167 -0
  129. package/lib/analysis.js +191 -2
  130. package/lib/entity-resolver/embedding-stage.js +198 -0
  131. package/lib/entity-resolver/entity-resolver.js +384 -0
  132. package/lib/entity-resolver/index.js +42 -0
  133. package/lib/entity-resolver/llm-stage.js +191 -0
  134. package/lib/entity-resolver/rule-stage.js +208 -0
  135. package/lib/entity-resolver/worker.js +149 -0
  136. package/lib/index.js +131 -0
  137. package/lib/migrations.js +73 -0
  138. package/lib/mobile-extractor/android.js +193 -0
  139. package/lib/mobile-extractor/index.js +9 -0
  140. package/lib/mobile-extractor/ios.js +223 -0
  141. package/lib/prompt-builder.js +11 -1
  142. package/lib/query-parser.js +7 -1
  143. package/lib/registry.js +42 -0
  144. package/lib/sidecar/index.js +15 -0
  145. package/lib/sidecar/supervisor.js +359 -0
  146. package/lib/vault.js +343 -0
  147. package/package.json +36 -3
  148. package/scripts/_make-fixture-all.js +126 -0
  149. package/scripts/_make-fixture-contacts.js +84 -0
  150. package/scripts/evaluate-entity-resolver.js +213 -0
  151. package/scripts/smoke-phase-5-5.js +196 -0
  152. package/scripts/smoke-phase-5-7.js +181 -0
  153. package/scripts/smoke-system-data-contacts.js +309 -0
  154. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,208 @@
1
+ /**
2
+ * Phase 8.2 — EntityResolver rule stage.
3
+ *
4
+ * Per docs/design/Personal_Data_Hub_EntityResolver.md §4.1 — the synchronous
5
+ * fast-path. Returns "same" | "different" | "uncertain" by looking only at
6
+ * identifier overlap + naming + source provenance. No external calls, no
7
+ * Ollama, < 5ms p99 even for thousands of candidates.
8
+ *
9
+ * The intent is to cleanly handle the cases where we KNOW the answer:
10
+ * - same identifier (email/phone/wechatId/did) → same
11
+ * - completely disjoint fingerprint → different
12
+ * - same adapter, same name, different originalId → same (adapter
13
+ * produced two rows for one person — rare but happens with
14
+ * un-normalized export channels)
15
+ *
16
+ * Anything else falls through to `uncertain` and gets sent to the
17
+ * async embedding+LLM pipeline.
18
+ */
19
+
20
+ "use strict";
21
+
22
+ /**
23
+ * Identifier keys we treat as "if they overlap, they're definitively
24
+ * the same person". These should be globally-unique-per-person values.
25
+ * Keep this list conservative — adding `name` here would catch false
26
+ * positives (different people sharing common Chinese names).
27
+ */
28
+ const STRONG_IDENTIFIER_KEYS = [
29
+ "email",
30
+ "phone",
31
+ "wechatId",
32
+ "alipayUid",
33
+ "did",
34
+ "idHash", // SHA-256 of national ID number (Phase 9+ contribution)
35
+ ];
36
+
37
+ /**
38
+ * Run the rule stage on a (pending, candidate) pair.
39
+ *
40
+ * @param {object} a Person row
41
+ * @param {object} b Person row
42
+ * @returns {{ verdict: "same"|"different"|"uncertain", reason: string }}
43
+ */
44
+ function ruleStage(a, b) {
45
+ if (!a || !b || typeof a !== "object" || typeof b !== "object") {
46
+ return { verdict: "different", reason: "invalid input" };
47
+ }
48
+ if (a.id === b.id) {
49
+ // Same Person row — vacuously "same" but caller should never pair a
50
+ // person with itself. Surface clearly.
51
+ return { verdict: "same", reason: "identical id" };
52
+ }
53
+
54
+ // R1. Strong identifier overlap → same
55
+ const sharedKey = findSharedIdentifier(a.identifiers || {}, b.identifiers || {});
56
+ if (sharedKey) {
57
+ return { verdict: "same", reason: `identifier match: ${sharedKey.key}=${sharedKey.value}` };
58
+ }
59
+
60
+ // R3. Same adapter, different originalId, sharing a name → same
61
+ // (catches adapter-internal duplicates where an export contains the
62
+ // same person under two surface forms — rare but documented in design.)
63
+ // GUARD: if BOTH sides have a strong identifier of the SAME key but
64
+ // with DIFFERENT values, they're definitively different people sharing
65
+ // a common name (homonym-trap). Don't R3-merge.
66
+ if (a.source && b.source
67
+ && a.source.adapter === b.source.adapter
68
+ && a.source.originalId !== b.source.originalId
69
+ && sharesAnyName(a.names, b.names)
70
+ && !hasConflictingIdentifier(a.identifiers || {}, b.identifiers || {})
71
+ ) {
72
+ return {
73
+ verdict: "same",
74
+ reason: `same-adapter (${a.source.adapter}) internal dup: shared name`,
75
+ };
76
+ }
77
+
78
+ // R2. Zero overlap on any field → different
79
+ const overlap = countFieldOverlap(a, b);
80
+ if (overlap === 0) {
81
+ return { verdict: "different", reason: "no field overlap" };
82
+ }
83
+
84
+ // R4. Otherwise uncertain — send to async pipeline
85
+ return { verdict: "uncertain", reason: `overlap=${overlap}` };
86
+ }
87
+
88
+ /**
89
+ * Find a strong identifier shared between two identifier maps. Each
90
+ * identifier value can be a string OR an array of strings (per UnifiedSchema).
91
+ *
92
+ * @returns {{key: string, value: string} | null}
93
+ */
94
+ function findSharedIdentifier(idsA, idsB) {
95
+ for (const key of STRONG_IDENTIFIER_KEYS) {
96
+ const av = toArray(idsA[key]);
97
+ const bv = toArray(idsB[key]);
98
+ if (av.length === 0 || bv.length === 0) continue;
99
+ // Normalize before compare — emails / phones often have surface variance
100
+ const aNorm = av.map((v) => normalizeIdValue(key, v));
101
+ const bNorm = bv.map((v) => normalizeIdValue(key, v));
102
+ for (const v of aNorm) {
103
+ if (v && bNorm.includes(v)) return { key, value: v };
104
+ }
105
+ }
106
+ return null;
107
+ }
108
+
109
+ function toArray(v) {
110
+ if (Array.isArray(v)) return v.filter((x) => typeof x === "string" && x.length > 0);
111
+ if (typeof v === "string" && v.length > 0) return [v];
112
+ return [];
113
+ }
114
+
115
+ /**
116
+ * Returns true iff both sides have at least one strong identifier of
117
+ * the SAME key but with DIFFERENT (normalized) values. This is the
118
+ * "homonym-trap guard" for R3: if Alice@a.com and Alice@b.com share
119
+ * the name "Alice" but have different emails, they're DIFFERENT people.
120
+ */
121
+ function hasConflictingIdentifier(idsA, idsB) {
122
+ for (const key of STRONG_IDENTIFIER_KEYS) {
123
+ const av = toArray(idsA[key]).map((v) => normalizeIdValue(key, v));
124
+ const bv = toArray(idsB[key]).map((v) => normalizeIdValue(key, v));
125
+ if (av.length === 0 || bv.length === 0) continue;
126
+ // Both have this identifier — overlap means SAME (handled by R1 above);
127
+ // no overlap on the same key = conflict
128
+ const overlap = av.some((v) => bv.includes(v));
129
+ if (!overlap) return true;
130
+ }
131
+ return false;
132
+ }
133
+
134
+ /**
135
+ * Light normalization to avoid trivial misses:
136
+ * - email: lowercase + trim
137
+ * - phone: digits only (strips + - spaces parens)
138
+ * - others: trim only
139
+ */
140
+ function normalizeIdValue(key, v) {
141
+ if (typeof v !== "string") return "";
142
+ const trimmed = v.trim();
143
+ if (key === "email") return trimmed.toLowerCase();
144
+ if (key === "phone") {
145
+ let digits = trimmed.replace(/[^0-9]/g, "");
146
+ // Strip common country-code prefixes so "+86 138-0000 1111" and
147
+ // "13800001111" collapse to the same value.
148
+ if (digits.length === 13 && digits.startsWith("86")) digits = digits.slice(2);
149
+ if (digits.length === 12 && digits.startsWith("1")) digits = digits.slice(1); // US +1 leading
150
+ return digits;
151
+ }
152
+ return trimmed;
153
+ }
154
+
155
+ /**
156
+ * Whether any name in A is also a name (or substring) in B. We use
157
+ * substring match because adapters often label the same person differently
158
+ * ("陈XX" in Alipay vs "陈" in WeChat nickname).
159
+ */
160
+ function sharesAnyName(namesA, namesB) {
161
+ const a = (namesA || []).filter((n) => typeof n === "string" && n.length > 0);
162
+ const b = (namesB || []).filter((n) => typeof n === "string" && n.length > 0);
163
+ if (a.length === 0 || b.length === 0) return false;
164
+ // Exact match
165
+ for (const x of a) {
166
+ if (b.includes(x)) return true;
167
+ }
168
+ return false;
169
+ }
170
+
171
+ /**
172
+ * Count the number of fields (name, identifier value, location, etc.)
173
+ * that show ANY commonality between A and B. The threshold for R2 is
174
+ * "0 overlap → different"; we don't try to weight overlaps here, just
175
+ * detect total disjointedness.
176
+ */
177
+ function countFieldOverlap(a, b) {
178
+ let n = 0;
179
+
180
+ // Name overlap
181
+ if (sharesAnyName(a.names, b.names)) n += 1;
182
+
183
+ // Identifier overlap (counted per-key)
184
+ const aIds = a.identifiers || {};
185
+ const bIds = b.identifiers || {};
186
+ for (const key of Object.keys(aIds)) {
187
+ if (!bIds[key]) continue;
188
+ const av = toArray(aIds[key]);
189
+ const bv = toArray(bIds[key]);
190
+ if (av.some((v) => bv.includes(v))) n += 1;
191
+ }
192
+
193
+ // Same primary source adapter — usually means same data origin
194
+ if (a.source && b.source && a.source.adapter === b.source.adapter) {
195
+ n += 1;
196
+ }
197
+
198
+ return n;
199
+ }
200
+
201
+ module.exports = {
202
+ ruleStage,
203
+ findSharedIdentifier,
204
+ countFieldOverlap,
205
+ sharesAnyName,
206
+ normalizeIdValue,
207
+ STRONG_IDENTIFIER_KEYS,
208
+ };
@@ -0,0 +1,149 @@
1
+ /**
2
+ * Phase 8.5 — Async resolve_queue worker.
3
+ *
4
+ * Long-running idle loop that polls the resolve_queue, calls
5
+ * `EntityResolver.drain(batch)`, sleeps when empty, exits cleanly on
6
+ * stop(). Designed to run alongside the hub process (desktop main /
7
+ * cc serve / Workflow runner).
8
+ *
9
+ * Lifecycle:
10
+ * const w = new EntityResolverWorker({ resolver, intervalMs: 30_000 });
11
+ * w.start(); // returns immediately; loop runs async
12
+ * ...
13
+ * await w.stop(); // waits for current batch to finish, then exits
14
+ *
15
+ * The worker is dumb to where it runs — caller decides cadence (idle
16
+ * scheduler in Electron main, cron in cc serve, etc.).
17
+ */
18
+
19
+ "use strict";
20
+
21
+ class EntityResolverWorker {
22
+ constructor(opts = {}) {
23
+ if (!opts || typeof opts !== "object") {
24
+ throw new Error("EntityResolverWorker: opts required");
25
+ }
26
+ if (!opts.resolver) {
27
+ throw new Error("EntityResolverWorker: opts.resolver required");
28
+ }
29
+ this._resolver = opts.resolver;
30
+ this._batchSize = Number.isFinite(opts.batchSize) ? opts.batchSize : 20;
31
+ // Sleep when the queue is empty (poll cadence). 30s default is
32
+ // enough to keep latency low on user-initiated sync while not
33
+ // pegging CPU in steady state.
34
+ this._idleIntervalMs = Number.isFinite(opts.idleIntervalMs) ? opts.idleIntervalMs : 30_000;
35
+ // Between non-empty batches: small breather so cooperative scheduling
36
+ // works with other adapter syncs.
37
+ this._batchSpacingMs = Number.isFinite(opts.batchSpacingMs) ? opts.batchSpacingMs : 50;
38
+ // Optional progress callback for UI + audit
39
+ this._onProgress = typeof opts.onProgress === "function" ? opts.onProgress : null;
40
+ this._logger = opts.logger || null;
41
+
42
+ this._running = false;
43
+ this._loopPromise = null;
44
+ this._stopRequested = false;
45
+ this._stats = {
46
+ startedAt: 0,
47
+ batchesProcessed: 0,
48
+ itemsProcessed: 0,
49
+ same: 0,
50
+ different: 0,
51
+ review: 0,
52
+ error: 0,
53
+ lastBatchAt: 0,
54
+ };
55
+ }
56
+
57
+ isRunning() {
58
+ return this._running;
59
+ }
60
+
61
+ stats() {
62
+ return { ...this._stats };
63
+ }
64
+
65
+ start() {
66
+ if (this._running) return;
67
+ this._running = true;
68
+ this._stopRequested = false;
69
+ this._stats.startedAt = Date.now();
70
+ this._loopPromise = this._loop().finally(() => {
71
+ this._running = false;
72
+ });
73
+ }
74
+
75
+ async stop() {
76
+ this._stopRequested = true;
77
+ if (this._loopPromise) {
78
+ try { await this._loopPromise; } catch (_e) {}
79
+ }
80
+ this._running = false;
81
+ }
82
+
83
+ /**
84
+ * Run one batch synchronously (caller drives, no loop). Returns the
85
+ * drain output. Useful for tests + on-demand "process N now" buttons.
86
+ */
87
+ async tick() {
88
+ return await this._processBatch();
89
+ }
90
+
91
+ async _loop() {
92
+ while (!this._stopRequested) {
93
+ let batchResult;
94
+ try {
95
+ batchResult = await this._processBatch();
96
+ } catch (err) {
97
+ this._log("worker batch threw — sleeping then retrying", err && err.message);
98
+ batchResult = null;
99
+ }
100
+ if (this._stopRequested) break;
101
+ const empty = !batchResult || batchResult.processed === 0;
102
+ const delay = empty ? this._idleIntervalMs : this._batchSpacingMs;
103
+ await this._sleep(delay);
104
+ }
105
+ }
106
+
107
+ async _processBatch() {
108
+ const result = await this._resolver.drain({ limit: this._batchSize });
109
+ this._stats.batchesProcessed += 1;
110
+ this._stats.itemsProcessed += result.processed;
111
+ this._stats.same += result.same;
112
+ this._stats.different += result.different;
113
+ this._stats.review += result.review;
114
+ this._stats.error += result.error;
115
+ this._stats.lastBatchAt = Date.now();
116
+ this._emitProgress({ batch: result, totals: { ...this._stats } });
117
+ return result;
118
+ }
119
+
120
+ _emitProgress(payload) {
121
+ if (!this._onProgress) return;
122
+ try {
123
+ this._onProgress(payload);
124
+ } catch (_e) {
125
+ // listener errors don't break the loop
126
+ }
127
+ }
128
+
129
+ async _sleep(ms) {
130
+ if (ms <= 0) return;
131
+ // Interruptable sleep — checks _stopRequested every 100ms so stop()
132
+ // can land within a sane bound.
133
+ const step = 100;
134
+ let remaining = ms;
135
+ while (remaining > 0 && !this._stopRequested) {
136
+ const next = Math.min(step, remaining);
137
+ await new Promise((resolve) => setTimeout(resolve, next));
138
+ remaining -= next;
139
+ }
140
+ }
141
+
142
+ _log(...args) {
143
+ if (this._logger && typeof this._logger.info === "function") {
144
+ this._logger.info("[EntityResolverWorker]", ...args);
145
+ }
146
+ }
147
+ }
148
+
149
+ module.exports = { EntityResolverWorker };
package/lib/index.js CHANGED
@@ -32,6 +32,30 @@ const { MockLLMClient, OllamaClient } = require("./llm-client");
32
32
  const { AnalysisEngine, DEFAULT_MAX_FACTS, DEFAULT_MAX_QUERY_LIMIT } = require("./analysis");
33
33
  const bridges = require("./bridges");
34
34
  const emailImapAdapter = require("./adapters/email-imap");
35
+ const alipayBillAdapter = require("./adapters/alipay-bill");
36
+ const wechatAdapter = require("./adapters/wechat");
37
+ const travelBase = require("./adapters/travel-base");
38
+ const { Train12306Adapter } = require("./adapters/travel-12306");
39
+ const { CtripAdapter } = require("./adapters/travel-ctrip");
40
+ const { AmapAdapter } = require("./adapters/travel-amap");
41
+ const { BaiduMapAdapter } = require("./adapters/travel-baidu-map");
42
+ const shoppingBase = require("./adapters/shopping-base");
43
+ const { TaobaoAdapter } = require("./adapters/shopping-taobao");
44
+ const { JdAdapter } = require("./adapters/shopping-jd");
45
+ const { MeituanAdapter } = require("./adapters/shopping-meituan");
46
+ const { BilibiliAdapter } = require("./adapters/social-bilibili");
47
+ const { WeiboAdapter } = require("./adapters/social-weibo");
48
+ const { DouyinAdapter } = require("./adapters/social-douyin");
49
+ const { XiaohongshuAdapter } = require("./adapters/social-xiaohongshu");
50
+ const { ToutiaoAdapter } = require("./adapters/social-toutiao");
51
+ const { KuaishouAdapter } = require("./adapters/social-kuaishou");
52
+ const { QQAdapter } = require("./adapters/messaging-qq");
53
+ const { TelegramAdapter } = require("./adapters/messaging-telegram");
54
+ const { WhatsAppAdapter } = require("./adapters/messaging-whatsapp");
55
+ const entityResolver = require("./entity-resolver");
56
+ const analysisSkills = require("./analysis-skills");
57
+ const mobileExtractor = require("./mobile-extractor");
58
+ const systemDataAndroid = require("./adapters/system-data-android");
35
59
 
36
60
  module.exports = {
37
61
  // Constants / enums
@@ -138,4 +162,111 @@ module.exports = {
138
162
  ImapConnectionFailedError: emailImapAdapter.ImapConnectionFailedError,
139
163
  ImapMailboxNotFoundError: emailImapAdapter.ImapMailboxNotFoundError,
140
164
  parseRawEmail: emailImapAdapter.parseRawEmail,
165
+ // Phase 5.3 — email classifier
166
+ EMAIL_CATEGORIES: emailImapAdapter.EMAIL_CATEGORIES,
167
+ EMAIL_LAYER1_RULES: emailImapAdapter.EMAIL_LAYER1_RULES,
168
+ classifyEmail: emailImapAdapter.classifyEmail,
169
+ classifyEmailLayer1: emailImapAdapter.classifyEmailLayer1,
170
+ classifyEmailLayer2: emailImapAdapter.classifyEmailLayer2,
171
+ // Phase 5.4 — 6 template extractors + dispatcher
172
+ extractEmailFields: emailImapAdapter.extractEmailFields,
173
+ EMAIL_CATEGORY_EXTRACTORS: emailImapAdapter.EMAIL_CATEGORY_EXTRACTORS,
174
+ extractEmailBill: emailImapAdapter.extractBill,
175
+ extractEmailOrder: emailImapAdapter.extractOrder,
176
+ extractEmailTravel: emailImapAdapter.extractTravel,
177
+ extractEmailGovernment: emailImapAdapter.extractGovernment,
178
+ extractEmailRegister: emailImapAdapter.extractRegister,
179
+ extractEmailOther: emailImapAdapter.extractOther,
180
+ // Phase 5.5 — PDF decryption + transactions
181
+ extractPdfText: emailImapAdapter.extractPdfText,
182
+ pdfPasswordsFromHints: emailImapAdapter.pdfPasswordsFromHints,
183
+ extractEmailTransactions: emailImapAdapter.extractTransactions,
184
+
185
+ // Phase 8 — EntityResolver
186
+ EntityResolver: entityResolver.EntityResolver,
187
+ entityResolverRuleStage: entityResolver.entityResolverRuleStage,
188
+ entityResolverSharedIdentifier: entityResolver.entityResolverSharedIdentifier,
189
+ ENTITY_RESOLVER_STRONG_IDENTIFIER_KEYS: entityResolver.ENTITY_RESOLVER_STRONG_IDENTIFIER_KEYS,
190
+ EntityResolverEmbeddingStage: entityResolver.EntityResolverEmbeddingStage,
191
+ entityResolverCosineSimilarity: entityResolver.entityResolverCosineSimilarity,
192
+ EntityResolverLLMStage: entityResolver.EntityResolverLLMStage,
193
+ ENTITY_RESOLVER_LLM_SYSTEM_PROMPT: entityResolver.ENTITY_RESOLVER_LLM_SYSTEM_PROMPT,
194
+ parseEntityResolverLLMResponse: entityResolver.parseEntityResolverLLMResponse,
195
+ EntityResolverWorker: entityResolver.EntityResolverWorker,
196
+
197
+ // Phase 11 — 5 内置 analysis skills
198
+ AnalysisSkill: analysisSkills.AnalysisSkill,
199
+ SpendingSkill: analysisSkills.SpendingSkill,
200
+ RelationsSkill: analysisSkills.RelationsSkill,
201
+ FootprintSkill: analysisSkills.FootprintSkill,
202
+ InterestsSkill: analysisSkills.InterestsSkill,
203
+ TimelineSkill: analysisSkills.TimelineSkill,
204
+ ANALYSIS_SKILL_NAMES: analysisSkills.ANALYSIS_SKILL_NAMES,
205
+ runAnalysisSkill: analysisSkills.runAnalysisSkill,
206
+
207
+ // Phase 7.5 — Mobile extraction layer (借 sjqz architecture)
208
+ AndroidExtractor: mobileExtractor.AndroidExtractor,
209
+ iOSBackupReader: mobileExtractor.iOSBackupReader,
210
+
211
+ // Phase 12 v0.5 — WeChat (frida-independent slice)
212
+ WechatAdapter: wechatAdapter.WechatAdapter,
213
+ WECHAT_NAME: wechatAdapter.WECHAT_NAME,
214
+ WECHAT_VERSION: wechatAdapter.WECHAT_VERSION,
215
+ parseWeChatContent: wechatAdapter.parseWeChatContent,
216
+ extractWeChatKey: wechatAdapter.extractWeChatKey,
217
+ deriveWeChatLegacyKey: wechatAdapter.deriveWeChatLegacyKey,
218
+ WeChatDBReader: wechatAdapter.WeChatDBReader,
219
+ normalizeWeChatMessage: wechatAdapter.normalizeWeChatMessage,
220
+ normalizeWeChatContact: wechatAdapter.normalizeWeChatContact,
221
+ wxidToWeChatPersonId: wechatAdapter.wxidToWeChatPersonId,
222
+ WECHAT_PRAGMA_PROFILES: wechatAdapter.WECHAT_PRAGMA_PROFILES,
223
+
224
+ // Phase 9 — Travel four-pack
225
+ normalizeTravelRecord: travelBase.normalizeTravelRecord,
226
+ parseChineseDateTime: travelBase.parseChineseDateTime,
227
+ Train12306Adapter,
228
+ CtripAdapter,
229
+ AmapAdapter,
230
+ BaiduMapAdapter,
231
+
232
+ // Phase 7 — Shopping three-pack
233
+ normalizeOrderRecord: shoppingBase.normalizeOrderRecord,
234
+ CookieAuth: shoppingBase.CookieAuth,
235
+ TaobaoAdapter,
236
+ JdAdapter,
237
+ MeituanAdapter,
238
+
239
+ // Phase 13+ — long-tail social + messaging (借 sjqz parsers)
240
+ BilibiliAdapter,
241
+ WeiboAdapter,
242
+ DouyinAdapter,
243
+ XiaohongshuAdapter,
244
+ ToutiaoAdapter,
245
+ KuaishouAdapter,
246
+ QQAdapter,
247
+ TelegramAdapter,
248
+ WhatsAppAdapter,
249
+
250
+ // Plan A v0.1 — Android on-device system-data adapter (no Python sidecar,
251
+ // UI-pushed snapshot via ContentResolver + PackageManager).
252
+ SystemDataAndroidAdapter: systemDataAndroid.SystemDataAndroidAdapter,
253
+ SYSTEM_DATA_ANDROID_NAME: systemDataAndroid.SYSTEM_DATA_ANDROID_NAME,
254
+ SYSTEM_DATA_ANDROID_VERSION: systemDataAndroid.SYSTEM_DATA_ANDROID_VERSION,
255
+ SYSTEM_DATA_ANDROID_SNAPSHOT_SCHEMA_VERSION:
256
+ systemDataAndroid.SNAPSHOT_SCHEMA_VERSION,
257
+ // Path C — staging + ingest helper shared by IPC / WS / mobile-route layers
258
+ ingestSystemDataAndroidSnapshot:
259
+ systemDataAndroid.ingestSystemDataAndroidSnapshot,
260
+
261
+ // Phase 6 — AlipayBillAdapter (CSV import)
262
+ AlipayBillAdapter: alipayBillAdapter.AlipayBillAdapter,
263
+ ALIPAY_BILL_NAME: alipayBillAdapter.ALIPAY_BILL_NAME,
264
+ ALIPAY_BILL_VERSION: alipayBillAdapter.ALIPAY_BILL_VERSION,
265
+ parseAlipayCsv: alipayBillAdapter.parseAlipayCsv,
266
+ parseAlipayCsvBuffer: alipayBillAdapter.parseAlipayCsvBuffer,
267
+ extractAlipayCsvFromZip: alipayBillAdapter.extractAlipayCsvFromZip,
268
+ classifyAlipayCounterparty: alipayBillAdapter.classifyAlipayCounterparty,
269
+ alipayCounterpartyToPersonId: alipayBillAdapter.alipayCounterpartyToPersonId,
270
+ ALIPAY_KNOWN_MERCHANTS: alipayBillAdapter.ALIPAY_KNOWN_MERCHANTS,
271
+ mapAlipayTypeToSubtype: alipayBillAdapter.mapAlipayTypeToSubtype,
141
272
  };
package/lib/migrations.js CHANGED
@@ -179,6 +179,72 @@ const INITIAL_DDL = [
179
179
  `CREATE INDEX IF NOT EXISTS idx_raw_captured ON raw_events(captured_at)`,
180
180
  ];
181
181
 
182
+ // Phase 8 DDL — EntityResolver tables (5 new tables).
183
+ // Per docs/design/Personal_Data_Hub_EntityResolver.md §5.1.
184
+ const PHASE_8_DDL = [
185
+ // mergeGroups: identifies clusters of Person rows that are the "same"
186
+ // real-world entity. Multiple Person ids in the same group_id ↔ same
187
+ // person. primary_id is the canonical (oldest) row for display.
188
+ `CREATE TABLE IF NOT EXISTS merge_groups (
189
+ id TEXT PRIMARY KEY,
190
+ primary_id TEXT NOT NULL,
191
+ member_count INTEGER NOT NULL DEFAULT 1,
192
+ created_at INTEGER NOT NULL,
193
+ last_updated INTEGER NOT NULL,
194
+ reviewed_by_user INTEGER NOT NULL DEFAULT 0
195
+ )`,
196
+ `CREATE TABLE IF NOT EXISTS merge_members (
197
+ group_id TEXT NOT NULL,
198
+ person_id TEXT NOT NULL,
199
+ joined_at INTEGER NOT NULL,
200
+ joined_by TEXT NOT NULL,
201
+ PRIMARY KEY (group_id, person_id),
202
+ FOREIGN KEY (group_id) REFERENCES merge_groups(id) ON DELETE CASCADE
203
+ )`,
204
+ `CREATE INDEX IF NOT EXISTS idx_merge_members_person ON merge_members(person_id)`,
205
+
206
+ // resolve_decisions: every yes/no verdict the pipeline (or user) has
207
+ // emitted for a pair. Lexicographic ordering on the two ids prevents
208
+ // both (A,B) and (B,A) ever existing.
209
+ `CREATE TABLE IF NOT EXISTS resolve_decisions (
210
+ a_person_id TEXT NOT NULL,
211
+ b_person_id TEXT NOT NULL,
212
+ verdict TEXT NOT NULL,
213
+ confidence REAL NOT NULL,
214
+ decided_at INTEGER NOT NULL,
215
+ decided_by TEXT NOT NULL,
216
+ reason TEXT,
217
+ PRIMARY KEY (a_person_id, b_person_id)
218
+ )`,
219
+
220
+ // resolve_queue: backlog of Person rows pending pipeline processing.
221
+ // Adapter ingest hook enqueues; async worker drains.
222
+ `CREATE TABLE IF NOT EXISTS resolve_queue (
223
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
224
+ person_id TEXT NOT NULL,
225
+ enqueued_at INTEGER NOT NULL,
226
+ status TEXT NOT NULL DEFAULT 'pending',
227
+ attempts INTEGER NOT NULL DEFAULT 0,
228
+ last_error TEXT
229
+ )`,
230
+ `CREATE INDEX IF NOT EXISTS idx_resolve_queue_status ON resolve_queue(status, enqueued_at)`,
231
+
232
+ // review_queue: pairs the pipeline can't decide; user reviews via UI.
233
+ `CREATE TABLE IF NOT EXISTS review_queue (
234
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
235
+ a_person_id TEXT NOT NULL,
236
+ b_person_id TEXT NOT NULL,
237
+ embed_sim REAL,
238
+ llm_verdict TEXT,
239
+ llm_reason TEXT,
240
+ llm_confidence REAL,
241
+ enqueued_at INTEGER NOT NULL,
242
+ reviewed_at INTEGER,
243
+ user_decision TEXT
244
+ )`,
245
+ `CREATE INDEX IF NOT EXISTS idx_review_queue_pending ON review_queue(reviewed_at, enqueued_at)`,
246
+ ];
247
+
182
248
  const MIGRATIONS = [
183
249
  {
184
250
  version: 1,
@@ -187,6 +253,13 @@ const MIGRATIONS = [
187
253
  for (const sql of INITIAL_DDL) db.exec(sql);
188
254
  },
189
255
  },
256
+ {
257
+ version: 2,
258
+ description: "Phase 8 EntityResolver — merge_groups + merge_members + resolve_decisions + resolve_queue + review_queue",
259
+ up(db) {
260
+ for (const sql of PHASE_8_DDL) db.exec(sql);
261
+ },
262
+ },
190
263
  ];
191
264
 
192
265
  const TARGET_VERSION = MIGRATIONS[MIGRATIONS.length - 1].version;