@chainlesschain/personal-data-hub 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (116) hide show
  1. package/__tests__/adapters/ai-chat-history.test.js +395 -0
  2. package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
  3. package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
  4. package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
  5. package/__tests__/adapters/email-adapter.test.js +138 -1
  6. package/__tests__/adapters/email-classifier.test.js +347 -0
  7. package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
  8. package/__tests__/adapters/email-retry-progress.test.js +294 -0
  9. package/__tests__/adapters/email-templates.test.js +699 -0
  10. package/__tests__/adapters/system-data-adapter.test.js +440 -0
  11. package/__tests__/adapters/system-data-disclosure.test.js +153 -0
  12. package/__tests__/analysis-skills.test.js +409 -0
  13. package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
  14. package/__tests__/entity-resolver-stages.test.js +411 -0
  15. package/__tests__/entity-resolver-vault.test.js +246 -0
  16. package/__tests__/entity-resolver.test.js +526 -0
  17. package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
  18. package/__tests__/longtail-adapters.test.js +217 -0
  19. package/__tests__/mobile-extractor.test.js +288 -0
  20. package/__tests__/shopping-adapters.test.js +296 -0
  21. package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
  22. package/__tests__/sidecar-supervisor.test.js +120 -0
  23. package/__tests__/social-adapters.test.js +206 -0
  24. package/__tests__/travel-adapters.test.js +325 -0
  25. package/__tests__/vault.test.js +3 -3
  26. package/__tests__/wechat-adapter.test.js +476 -0
  27. package/__tests__/whatsapp-adapter.test.js +135 -0
  28. package/lib/adapter-spec.js +12 -0
  29. package/lib/adapters/_python-sidecar-base.js +207 -0
  30. package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
  31. package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
  32. package/lib/adapters/ai-chat-history/http-client.js +211 -0
  33. package/lib/adapters/ai-chat-history/index.js +28 -0
  34. package/lib/adapters/ai-chat-history/schema-map.js +221 -0
  35. package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
  36. package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
  37. package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
  38. package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
  39. package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
  40. package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
  41. package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
  42. package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
  43. package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
  44. package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
  45. package/lib/adapters/alipay-bill/counterparty.js +129 -0
  46. package/lib/adapters/alipay-bill/csv-parser.js +217 -0
  47. package/lib/adapters/alipay-bill/index.js +41 -0
  48. package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
  49. package/lib/adapters/email-imap/classifier.js +495 -0
  50. package/lib/adapters/email-imap/email-adapter.js +419 -8
  51. package/lib/adapters/email-imap/index.js +42 -0
  52. package/lib/adapters/email-imap/pdf-extractor.js +192 -0
  53. package/lib/adapters/email-imap/templates/bill.js +232 -0
  54. package/lib/adapters/email-imap/templates/government.js +120 -0
  55. package/lib/adapters/email-imap/templates/index.js +78 -0
  56. package/lib/adapters/email-imap/templates/order.js +186 -0
  57. package/lib/adapters/email-imap/templates/other.js +114 -0
  58. package/lib/adapters/email-imap/templates/register.js +113 -0
  59. package/lib/adapters/email-imap/templates/travel.js +157 -0
  60. package/lib/adapters/email-imap/templates/utils.js +275 -0
  61. package/lib/adapters/email-imap/transactions.js +234 -0
  62. package/lib/adapters/messaging-qq/index.js +158 -0
  63. package/lib/adapters/messaging-telegram/index.js +142 -0
  64. package/lib/adapters/messaging-whatsapp/index.js +189 -0
  65. package/lib/adapters/shopping-base/index.js +208 -0
  66. package/lib/adapters/shopping-jd/index.js +150 -0
  67. package/lib/adapters/shopping-meituan/index.js +154 -0
  68. package/lib/adapters/shopping-taobao/index.js +176 -0
  69. package/lib/adapters/social-bilibili/index.js +171 -0
  70. package/lib/adapters/social-douyin/index.js +116 -0
  71. package/lib/adapters/social-weibo/index.js +164 -0
  72. package/lib/adapters/social-xiaohongshu/index.js +96 -0
  73. package/lib/adapters/system-data/disclosure.js +166 -0
  74. package/lib/adapters/system-data/index.js +34 -0
  75. package/lib/adapters/system-data/system-data-adapter.js +344 -0
  76. package/lib/adapters/travel-12306/index.js +151 -0
  77. package/lib/adapters/travel-amap/index.js +164 -0
  78. package/lib/adapters/travel-baidu-map/index.js +162 -0
  79. package/lib/adapters/travel-base/index.js +240 -0
  80. package/lib/adapters/travel-ctrip/index.js +151 -0
  81. package/lib/adapters/wechat/content-parser.js +326 -0
  82. package/lib/adapters/wechat/db-reader.js +209 -0
  83. package/lib/adapters/wechat/index.js +28 -0
  84. package/lib/adapters/wechat/key-extractor.js +158 -0
  85. package/lib/adapters/wechat/normalize.js +220 -0
  86. package/lib/adapters/wechat/wechat-adapter.js +205 -0
  87. package/lib/analysis-skills/base.js +113 -0
  88. package/lib/analysis-skills/footprint.js +167 -0
  89. package/lib/analysis-skills/index.js +58 -0
  90. package/lib/analysis-skills/interests.js +161 -0
  91. package/lib/analysis-skills/relations.js +226 -0
  92. package/lib/analysis-skills/spending.js +216 -0
  93. package/lib/analysis-skills/timeline.js +167 -0
  94. package/lib/entity-resolver/embedding-stage.js +198 -0
  95. package/lib/entity-resolver/entity-resolver.js +384 -0
  96. package/lib/entity-resolver/index.js +42 -0
  97. package/lib/entity-resolver/llm-stage.js +191 -0
  98. package/lib/entity-resolver/rule-stage.js +208 -0
  99. package/lib/entity-resolver/worker.js +149 -0
  100. package/lib/index.js +115 -0
  101. package/lib/migrations.js +73 -0
  102. package/lib/mobile-extractor/android.js +193 -0
  103. package/lib/mobile-extractor/index.js +9 -0
  104. package/lib/mobile-extractor/ios.js +223 -0
  105. package/lib/registry.js +42 -0
  106. package/lib/sidecar/index.js +15 -0
  107. package/lib/sidecar/supervisor.js +359 -0
  108. package/lib/vault.js +266 -0
  109. package/package.json +29 -3
  110. package/scripts/_make-fixture-all.js +126 -0
  111. package/scripts/_make-fixture-contacts.js +84 -0
  112. package/scripts/evaluate-entity-resolver.js +213 -0
  113. package/scripts/smoke-phase-5-5.js +196 -0
  114. package/scripts/smoke-phase-5-7.js +181 -0
  115. package/scripts/smoke-system-data-contacts.js +309 -0
  116. package/scripts/smoke-system-data.js +312 -0
@@ -0,0 +1,158 @@
1
+ /**
2
+ * Phase 12 v0.5 — WeChat legacy key extractor (frida-INDEPENDENT).
3
+ *
4
+ * Ports sjqz/parsers/wechat_decrypt.py legacy path to Node:
5
+ *
6
+ * key = MD5(IMEI + UIN)[:7].lower()
7
+ *
8
+ * Works for WeChat versions < 8.0.X where the IMEI-derived key path is
9
+ * still active. WeChat 8.0+ requires Frida hook on `sqlite3_key` —
10
+ * that's Phase 12.6 (frida-dependent) and ships when device + Frida are
11
+ * available.
12
+ *
13
+ * Inputs:
14
+ * - wechatDataPath: directory mirroring /data/data/com.tencent.mm/
15
+ * after `adb pull` (or PC WeChat Files directory)
16
+ * - Optional explicit overrides (imei, uin, manualKey) for testing or
17
+ * when CompatibleInfo.cfg parsing fails
18
+ *
19
+ * Outputs:
20
+ * {
21
+ * uin: "1234567890",
22
+ * imei: "1234567890abcdef",
23
+ * key: "5d41402", // 7-char hex MD5 prefix
24
+ * source: "auth-xml+compatible-cfg" | "manual" | "...",
25
+ * warnings: [...]
26
+ * }
27
+ */
28
+
29
+ "use strict";
30
+
31
+ const fs = require("node:fs");
32
+ const path = require("node:path");
33
+ const crypto = require("node:crypto");
34
+
35
+ /**
36
+ * Extract UIN from shared_prefs/auth_info_key_prefs.xml or
37
+ * system_config_prefs.xml. UIN may be negative; can also be in
38
+ * `default_uin` or `_auth_uin` keys depending on WeChat version.
39
+ */
40
+ function extractUinFromPrefs(wechatDataPath) {
41
+ const candidates = [
42
+ path.join(wechatDataPath, "shared_prefs", "auth_info_key_prefs.xml"),
43
+ path.join(wechatDataPath, "shared_prefs", "system_config_prefs.xml"),
44
+ ];
45
+ for (const p of candidates) {
46
+ if (!fs.existsSync(p)) continue;
47
+ try {
48
+ const content = fs.readFileSync(p, "utf-8");
49
+ const patterns = [
50
+ /<int name="[^"]*_auth_uin[^"]*"\s+value="(-?\d+)"/,
51
+ /<int name="default_uin"\s+value="(-?\d+)"/,
52
+ /<int name="[^"]*uin[^"]*"\s+value="(-?\d+)"/,
53
+ ];
54
+ for (const re of patterns) {
55
+ const m = re.exec(content);
56
+ if (m) return { uin: m[1], from: path.basename(p) };
57
+ }
58
+ } catch (_e) {
59
+ // Try next candidate
60
+ }
61
+ }
62
+ return { uin: null, from: null };
63
+ }
64
+
65
+ /**
66
+ * Extract IMEI / device serial from CompatibleInfo.cfg. The file is a
67
+ * Java HashMap serialization; we use string-search for 15-digit IMEI
68
+ * patterns + GUIDs as fallback (matches sjqz approach).
69
+ */
70
+ function extractImeiFromCompatibleInfo(wechatDataPath) {
71
+ const cfgPath = path.join(wechatDataPath, "MicroMsg", "CompatibleInfo.cfg");
72
+ if (!fs.existsSync(cfgPath)) return { imei: null, from: null };
73
+ try {
74
+ const buf = fs.readFileSync(cfgPath);
75
+ const text = buf.toString("binary"); // 8-bit safe — we don't care about decoding
76
+ // 15-digit IMEI
77
+ const imeiMatch = /\D(\d{15})\D/.exec(text);
78
+ if (imeiMatch) return { imei: imeiMatch[1], from: "CompatibleInfo.cfg (15-digit)" };
79
+ // Fallback: 14-digit + check digit pattern
80
+ const imei14 = /\D(\d{14})\D/.exec(text);
81
+ if (imei14) return { imei: imei14[1], from: "CompatibleInfo.cfg (14-digit)" };
82
+ // Fallback: GUID-like
83
+ const guid = /([0-9a-f]{32})/i.exec(text);
84
+ if (guid) return { imei: guid[1], from: "CompatibleInfo.cfg (guid)" };
85
+ } catch (_e) {}
86
+ return { imei: null, from: null };
87
+ }
88
+
89
+ /**
90
+ * Derive the SQLCipher key.
91
+ *
92
+ * @param {string} imei
93
+ * @param {string|number} uin
94
+ * @returns {string} 7-char hex prefix of MD5(IMEI+UIN), lowercase
95
+ */
96
+ function deriveLegacyKey(imei, uin) {
97
+ if (typeof imei !== "string" || imei.length === 0) {
98
+ throw new Error("deriveLegacyKey: imei required");
99
+ }
100
+ if (uin == null) throw new Error("deriveLegacyKey: uin required");
101
+ const raw = String(imei) + String(uin);
102
+ return crypto.createHash("md5").update(raw, "utf-8").digest("hex").slice(0, 7).toLowerCase();
103
+ }
104
+
105
+ /**
106
+ * Top-level: extract key from a pulled WeChat data directory.
107
+ *
108
+ * @param {object} opts
109
+ * @param {string} opts.wechatDataPath directory like the pulled
110
+ * /data/data/com.tencent.mm/ tree
111
+ * @param {string} [opts.uin] override (skip auth XML parse)
112
+ * @param {string} [opts.imei] override (skip CompatibleInfo)
113
+ * @returns {object} { uin, imei, key, source, warnings }
114
+ */
115
+ function extractWeChatKey(opts = {}) {
116
+ if (!opts.wechatDataPath || typeof opts.wechatDataPath !== "string") {
117
+ throw new Error("extractWeChatKey: opts.wechatDataPath required");
118
+ }
119
+ const warnings = [];
120
+
121
+ let uin = opts.uin || null;
122
+ let uinSource = "manual";
123
+ if (!uin) {
124
+ const r = extractUinFromPrefs(opts.wechatDataPath);
125
+ uin = r.uin;
126
+ uinSource = r.from || "missing";
127
+ if (!uin) warnings.push("UIN not found in shared_prefs — adapter unusable without manual override");
128
+ }
129
+
130
+ let imei = opts.imei || null;
131
+ let imeiSource = "manual";
132
+ if (!imei) {
133
+ const r = extractImeiFromCompatibleInfo(opts.wechatDataPath);
134
+ imei = r.imei;
135
+ imeiSource = r.from || "missing";
136
+ if (!imei) warnings.push("IMEI not found in CompatibleInfo.cfg — adapter unusable without manual override");
137
+ }
138
+
139
+ if (!uin || !imei) {
140
+ return { uin, imei, key: null, source: `uin:${uinSource} | imei:${imeiSource}`, warnings };
141
+ }
142
+
143
+ const key = deriveLegacyKey(imei, uin);
144
+ return {
145
+ uin,
146
+ imei,
147
+ key,
148
+ source: `uin:${uinSource} | imei:${imeiSource}`,
149
+ warnings,
150
+ };
151
+ }
152
+
153
+ module.exports = {
154
+ extractWeChatKey,
155
+ deriveLegacyKey,
156
+ extractUinFromPrefs,
157
+ extractImeiFromCompatibleInfo,
158
+ };
@@ -0,0 +1,220 @@
1
+ /**
2
+ * Phase 12 v0.5 — WeChat row → UnifiedSchema mapping.
3
+ *
4
+ * Per `Adapter_WeChat_SQLCipher.md` §7. Pure function (DB row in → batch
5
+ * out); orchestrated by WechatAdapter.normalize() during ingest.
6
+ */
7
+
8
+ "use strict";
9
+
10
+ const { newId } = require("../../ids");
11
+ const { parseContent, isGroupTalker } = require("./content-parser");
12
+
13
+ const NAME = "wechat";
14
+ const VERSION = "0.5.0"; // Phase 12 v0.5 — frida-indep slice
15
+
16
+ /**
17
+ * Map a single message row to a NormalizedBatch.
18
+ *
19
+ * @param {object} row raw WeChat message row
20
+ * @param {object} ctx { contactByUsername, chatroomByName, accountUin }
21
+ * @returns {NormalizedBatch}
22
+ */
23
+ function normalizeMessage(row, ctx = {}) {
24
+ if (!row || typeof row !== "object") {
25
+ throw new Error("normalizeMessage: row required");
26
+ }
27
+ const parsed = parseContent(row);
28
+ const isGroup = isGroupTalker(row.talker);
29
+ const now = Date.now();
30
+ const occurredAt = Number.isFinite(Number(row.createTime)) ? Number(row.createTime) : now;
31
+ const isSend = Number(row.isSend) === 1;
32
+
33
+ const accountUin = ctx.accountUin || "wechat-self";
34
+ const selfId = `person-wechat-${accountUin}`;
35
+ const peerWxid = row.talker;
36
+ const peerId = peerWxid ? wxidToPersonId(peerWxid) : null;
37
+
38
+ // Group senders use the prefix in parsed.structured.senderWxid; in
39
+ // 1-on-1 chats actor = talker (inbound) or self (outbound).
40
+ let actorId;
41
+ if (isGroup) {
42
+ const senderWxid = parsed.structured && parsed.structured.senderWxid;
43
+ actorId = senderWxid ? wxidToPersonId(senderWxid) : (isSend ? selfId : peerId);
44
+ } else {
45
+ actorId = isSend ? selfId : peerId;
46
+ }
47
+
48
+ const participants = [];
49
+ if (peerId) participants.push(peerId);
50
+ participants.push(selfId);
51
+
52
+ const eventId = newId();
53
+ const source = {
54
+ adapter: NAME,
55
+ adapterVersion: VERSION,
56
+ originalId: String(row.msgSvrId || row.msgId || `wechat-msg-${eventId}`),
57
+ capturedAt: occurredAt,
58
+ capturedBy: "sqlite",
59
+ };
60
+
61
+ // Subtype mapping per UnifiedSchema EVENT_SUBTYPES
62
+ let subtype = "message";
63
+ if (parsed.kind === "voipcall") subtype = "call";
64
+ else if (parsed.kind === "system") subtype = "interaction";
65
+ else if (parsed.kind === "redpacket") subtype = "redenvelope";
66
+ else if (parsed.kind === "image" || parsed.kind === "video" || parsed.kind === "emoji" || parsed.kind === "voice") {
67
+ subtype = "media";
68
+ }
69
+ else subtype = "message";
70
+
71
+ const event = {
72
+ id: eventId,
73
+ type: "event",
74
+ subtype,
75
+ occurredAt,
76
+ actor: actorId || selfId,
77
+ participants: dedup(participants).filter(Boolean),
78
+ content: {
79
+ title: parsed.text.slice(0, 80) || "(无内容)",
80
+ text: parsed.text,
81
+ },
82
+ ingestedAt: now,
83
+ source,
84
+ extra: {
85
+ wechatType: Number(row.type),
86
+ isSend,
87
+ talker: row.talker,
88
+ ...(isGroup ? { isGroup: true, chatroom: row.talker } : {}),
89
+ ...parsed.structured,
90
+ },
91
+ };
92
+
93
+ // Persons — talker / sender; merge-group keys via wxid
94
+ const persons = [];
95
+ if (peerId && peerId !== selfId) {
96
+ persons.push({
97
+ id: peerId,
98
+ type: "person",
99
+ subtype: isGroup ? "unknown" : "contact",
100
+ names: [contactDisplayName(ctx.contactByUsername, row.talker)],
101
+ identifiers: { wechatId: row.talker },
102
+ ingestedAt: now,
103
+ source,
104
+ extra: { fromAdapter: NAME, wxid: row.talker },
105
+ });
106
+ }
107
+ // For group messages, also add the sender as a person if known
108
+ if (isGroup && parsed.structured && parsed.structured.senderWxid) {
109
+ const senderId = wxidToPersonId(parsed.structured.senderWxid);
110
+ if (senderId !== selfId && !persons.some((p) => p.id === senderId)) {
111
+ persons.push({
112
+ id: senderId,
113
+ type: "person",
114
+ subtype: "contact",
115
+ names: [contactDisplayName(ctx.contactByUsername, parsed.structured.senderWxid)],
116
+ identifiers: { wechatId: parsed.structured.senderWxid },
117
+ ingestedAt: now,
118
+ source,
119
+ extra: { fromAdapter: NAME, wxid: parsed.structured.senderWxid },
120
+ });
121
+ }
122
+ }
123
+
124
+ // Topic — every group chat is a Topic (per design doc OQ-4 = C)
125
+ const topics = [];
126
+ if (isGroup) {
127
+ const chatroomName = (ctx.chatroomByName && ctx.chatroomByName[row.talker])
128
+ || row.talker.replace("@chatroom", "");
129
+ topics.push({
130
+ id: `topic-wechat-group-${row.talker}`,
131
+ type: "topic",
132
+ name: chatroomName,
133
+ derivedFromEvents: [event.id],
134
+ ingestedAt: now,
135
+ source,
136
+ extra: { wxid: row.talker, fromAdapter: NAME },
137
+ });
138
+ if (!event.extra.topicId) event.extra.topicId = topics[0].id;
139
+ }
140
+
141
+ return { events: [event], persons, places: [], items: [], topics };
142
+ }
143
+
144
+ /**
145
+ * Map a contact row to a Person entity. Used for backfill — adapter
146
+ * yields RawContact records via sync(); normalize() turns them into
147
+ * persons.
148
+ */
149
+ function normalizeContact(row, ctx = {}) {
150
+ if (!row || !row.username) return { events: [], persons: [], places: [], items: [], topics: [] };
151
+ const now = Date.now();
152
+ const source = {
153
+ adapter: NAME,
154
+ adapterVersion: VERSION,
155
+ originalId: `wechat-contact-${row.username}`,
156
+ capturedAt: now,
157
+ capturedBy: "sqlite",
158
+ };
159
+ const names = [row.conRemark, row.nickname, row.alias, row.username]
160
+ .filter((n) => typeof n === "string" && n.length > 0);
161
+ const subtype = guessContactSubtype(row);
162
+ const person = {
163
+ id: wxidToPersonId(row.username),
164
+ type: "person",
165
+ subtype,
166
+ names: dedup(names),
167
+ identifiers: { wechatId: row.username },
168
+ ingestedAt: now,
169
+ source,
170
+ extra: { fromAdapter: NAME, wxid: row.username, wechatType: row.type },
171
+ };
172
+ return { events: [], persons: [person], places: [], items: [], topics: [] };
173
+ }
174
+
175
+ // ─── helpers ────────────────────────────────────────────────────────────
176
+
177
+ function wxidToPersonId(wxid) {
178
+ if (!wxid) return null;
179
+ // Stable id keyed off wxid (Phase 8 EntityResolver R1 will dedup
180
+ // across adapters via the `wechatId` identifier).
181
+ return `person-wechat-${wxid}`;
182
+ }
183
+
184
+ function dedup(arr) {
185
+ const seen = new Set();
186
+ const out = [];
187
+ for (const x of arr) {
188
+ if (x == null || seen.has(x)) continue;
189
+ seen.add(x);
190
+ out.push(x);
191
+ }
192
+ return out;
193
+ }
194
+
195
+ function contactDisplayName(byUsername, wxid) {
196
+ if (byUsername && byUsername[wxid]) {
197
+ const c = byUsername[wxid];
198
+ return c.conRemark || c.nickname || c.alias || wxid;
199
+ }
200
+ return wxid;
201
+ }
202
+
203
+ function guessContactSubtype(row) {
204
+ // rcontact.type bits: official accounts / group / regular contact /
205
+ // black list. Detailed mapping in WeChat reverse-eng community —
206
+ // for v0.5 we keep it simple: anything that's not the user's self is
207
+ // "contact". Phase 12.6 will refine with full bit mapping.
208
+ if (typeof row.username === "string" && row.username.endsWith("@chatroom")) {
209
+ return "unknown"; // chat group, not a Person
210
+ }
211
+ return "contact";
212
+ }
213
+
214
+ module.exports = {
215
+ normalizeMessage,
216
+ normalizeContact,
217
+ wxidToPersonId,
218
+ NAME,
219
+ VERSION,
220
+ };
@@ -0,0 +1,205 @@
1
+ /**
2
+ * Phase 12 v0.5 — WechatAdapter (frida-INDEPENDENT slice).
3
+ *
4
+ * Per `Adapter_WeChat_SQLCipher.md` §17.2 buildable-now scope. This is
5
+ * the 60% of Phase 12 that can land without rooted device + Frida:
6
+ * everything from "DB file is decrypted at this path on disk" forward.
7
+ *
8
+ * Flow:
9
+ * 1. UI / CLI workflow drives the on-device pull via AndroidExtractor
10
+ * (Phase 7.5) — copies EnMicroMsg.db to a local cache.
11
+ * 2. keyProvider returns the key (legacy: KeyExtractor MD5(IMEI+UIN)
12
+ * computes it; Phase 12.6 hot path: Frida hook fetches it).
13
+ * 3. WechatAdapter.sync() opens the DB via WeChatDBReader, iterates
14
+ * message + contact tables, yields raw events.
15
+ * 4. normalize() turns each row into UnifiedSchema entities.
16
+ *
17
+ * Watermark: max msgSvrId per scope. Adapter sync({sinceWatermark}) is
18
+ * a high-water filter rather than per-talker — Phase 12.6 adds the
19
+ * per-talker variant.
20
+ */
21
+
22
+ "use strict";
23
+
24
+ const fs = require("node:fs");
25
+
26
+ const { CAPTURED_BY } = require("../../constants");
27
+ const { WeChatDBReader } = require("./db-reader");
28
+ const { normalizeMessage, normalizeContact, NAME, VERSION } = require("./normalize");
29
+
30
+ class WechatAdapter {
31
+ constructor(opts = {}) {
32
+ if (!opts || typeof opts !== "object") {
33
+ throw new Error("WechatAdapter: opts required");
34
+ }
35
+ if (!opts.account || typeof opts.account !== "object") {
36
+ throw new Error("WechatAdapter: opts.account required");
37
+ }
38
+ if (!opts.account.uin) {
39
+ throw new Error("WechatAdapter: opts.account.uin required (WeChat user identifier)");
40
+ }
41
+ this.account = opts.account;
42
+ // dbPath: local path to the (already-pulled) decrypted-source
43
+ // EnMicroMsg.db. Test seam.
44
+ this._dbPath = opts.dbPath || null;
45
+ // keyProvider: { getKey(): Promise<string> }. v0.5 default is
46
+ // a synthetic provider for tests; production wires this to either
47
+ // KeyExtractor (legacy) or Frida bridge (Phase 12.6).
48
+ this._keyProvider = opts.keyProvider || null;
49
+ // DI seam for tests — swap the DB reader
50
+ this._dbReaderFactory = typeof opts.dbReaderFactory === "function"
51
+ ? opts.dbReaderFactory
52
+ : null;
53
+
54
+ this.name = NAME;
55
+ this.version = VERSION;
56
+ this.capabilities = [
57
+ "sync:sqlite",
58
+ "auth:keystore",
59
+ "decrypt:sqlcipher-v1",
60
+ "parse:wechat-message",
61
+ ];
62
+ this.extractMode = "device-pull"; // Phase 7.5 contract field
63
+ this.rateLimits = {};
64
+ this.dataDisclosure = {
65
+ fields: [
66
+ "wechat:messages (text + group + 1-on-1 chats from EnMicroMsg.db)",
67
+ "wechat:contacts (rcontact: nickname / alias / 备注名)",
68
+ "wechat:chatrooms (group display names + member lists)",
69
+ ],
70
+ sensitivity: "high",
71
+ legalGate: true, // first-use 法律 gate per design doc OQ-7
72
+ };
73
+ }
74
+
75
+ async authenticate() {
76
+ // No server auth; sanity check the on-disk state.
77
+ if (!this._dbPath || !fs.existsSync(this._dbPath)) {
78
+ return { ok: false, reason: "DB_NOT_PULLED", error: `DB path missing: ${this._dbPath}` };
79
+ }
80
+ if (!this._keyProvider || typeof this._keyProvider.getKey !== "function") {
81
+ return { ok: false, reason: "NO_KEY_PROVIDER", error: "keyProvider required" };
82
+ }
83
+ try {
84
+ const key = await this._keyProvider.getKey();
85
+ if (!key) return { ok: false, reason: "EMPTY_KEY", error: "keyProvider returned empty key" };
86
+ return { ok: true, account: this.account.uin };
87
+ } catch (err) {
88
+ return { ok: false, reason: "KEY_PROVIDER_THREW", error: err && err.message ? err.message : String(err) };
89
+ }
90
+ }
91
+
92
+ async healthCheck() {
93
+ const r = await this.authenticate();
94
+ if (r.ok) return { ok: true, lastChecked: Date.now() };
95
+ return { ok: false, reason: r.reason, error: r.error };
96
+ }
97
+
98
+ /**
99
+ * Iterate WeChat data → RawEvent stream. Each row becomes one raw
100
+ * event with `payload.kind = "message"` or `"contact"`.
101
+ *
102
+ * @param {object} opts
103
+ * @param {string|number} [opts.sinceWatermark] max msgSvrId watermark
104
+ * @param {number} [opts.maxPerType=10_000]
105
+ * @param {Function} [opts.onProgress]
106
+ */
107
+ async *sync(opts = {}) {
108
+ const onProgress = typeof opts.onProgress === "function" ? opts.onProgress : null;
109
+ const emit = (phase, payload = {}) => {
110
+ if (!onProgress) return;
111
+ try { onProgress({ phase, adapter: NAME, ...payload }); } catch (_e) {}
112
+ };
113
+
114
+ if (!this._dbPath || !fs.existsSync(this._dbPath)) {
115
+ // No DB pulled yet — registry-safe idle no-op
116
+ emit("idle", { reason: "no DB at " + this._dbPath });
117
+ return;
118
+ }
119
+ const maxPerType = Number.isFinite(opts.maxPerType) ? opts.maxPerType : 10_000;
120
+ const sinceMsgSvrId = parseWatermark(opts.sinceWatermark);
121
+
122
+ emit("opening", { dbPath: this._dbPath });
123
+ const Reader = this._dbReaderFactory || ((readerOpts) => new WeChatDBReader(readerOpts));
124
+ const reader = Reader({ dbPath: this._dbPath, keyProvider: this._keyProvider });
125
+
126
+ try {
127
+ const openInfo = await reader.open();
128
+ emit("opened", { profile: openInfo.profile, tables: openInfo.tables });
129
+
130
+ if (!reader.isEnMicroMsg()) {
131
+ emit("error", { phase: "verify", message: "not an EnMicroMsg.db (missing message/rcontact)" });
132
+ return;
133
+ }
134
+
135
+ // Contacts first — gives normalize() context for message senders
136
+ const contacts = reader.fetchContacts({ limit: 10_000 });
137
+ emit("contacts-loaded", { count: contacts.length });
138
+ const contactByUsername = {};
139
+ for (const c of contacts) contactByUsername[c.username] = c;
140
+ for (const c of contacts) {
141
+ yield this._rowToRaw("contact", c, { contactByUsername });
142
+ }
143
+
144
+ // Chatrooms — produce Topics
145
+ const chatrooms = reader.fetchChatrooms({ limit: 5000 });
146
+ const chatroomByName = {};
147
+ for (const cr of chatrooms) chatroomByName[cr.chatroomname] = cr.displayname || cr.chatroomname;
148
+ emit("chatrooms-loaded", { count: chatrooms.length });
149
+
150
+ // Messages
151
+ const messages = reader.fetchMessages({ sinceMsgSvrId, limit: maxPerType });
152
+ emit("messages-loaded", { count: messages.length, since: sinceMsgSvrId });
153
+ let count = 0;
154
+ let maxSvr = sinceMsgSvrId;
155
+ for (const m of messages) {
156
+ count += 1;
157
+ if (Number(m.msgSvrId) > maxSvr) maxSvr = Number(m.msgSvrId);
158
+ emit("processing", { current: count, total: messages.length, msgSvrId: m.msgSvrId });
159
+ yield this._rowToRaw("message", m, { contactByUsername, chatroomByName });
160
+ }
161
+ emit("done", { messagesYielded: count, newWatermark: maxSvr });
162
+ } finally {
163
+ try { reader.close(); } catch (_e) {}
164
+ }
165
+ }
166
+
167
+ normalize(raw) {
168
+ if (!raw || !raw.payload) {
169
+ throw new Error("WechatAdapter.normalize: raw.payload missing");
170
+ }
171
+ const ctx = {
172
+ accountUin: this.account.uin,
173
+ contactByUsername: raw.payload.contactByUsername || {},
174
+ chatroomByName: raw.payload.chatroomByName || {},
175
+ };
176
+ if (raw.payload.kind === "contact") {
177
+ return normalizeContact(raw.payload.row, ctx);
178
+ }
179
+ return normalizeMessage(raw.payload.row, ctx);
180
+ }
181
+
182
+ _rowToRaw(kind, row, ctxExtras = {}) {
183
+ const originalId = kind === "message"
184
+ ? String(row.msgSvrId || row.msgId)
185
+ : `contact-${row.username}`;
186
+ return {
187
+ adapter: NAME,
188
+ originalId,
189
+ capturedAt: Date.now(),
190
+ payload: {
191
+ kind,
192
+ row,
193
+ ...ctxExtras,
194
+ },
195
+ };
196
+ }
197
+ }
198
+
199
+ function parseWatermark(wm) {
200
+ if (wm == null) return 0;
201
+ const n = parseInt(String(wm), 10);
202
+ return Number.isFinite(n) && n > 0 ? n : 0;
203
+ }
204
+
205
+ module.exports = { WechatAdapter, NAME, VERSION };
@@ -0,0 +1,113 @@
1
+ /**
2
+ * Phase 11 — Analysis skills base.
3
+ *
4
+ * Each skill = a focused analysis function over the vault. Inputs are
5
+ * a small typed options bag (time range + dimension + filters); output
6
+ * is `{ summary, breakdown, llm_commentary?, citations }`.
7
+ *
8
+ * Skills are pure logic on vault data + optional LLM commentary. They
9
+ * compose with cross-source merge groups (Phase 8 EntityResolver) so
10
+ * "上个月给我妈花了多少" returns combined Email + Alipay + WeChat
11
+ * spending tied to the same merged Person.
12
+ *
13
+ * Skills share these conventions:
14
+ * - `vault` injected at construction
15
+ * - `llm` optional; when null, skill returns pure-data result (no
16
+ * commentary); when provided, llm.chat() generates a 1-2 sentence
17
+ * prose commentary on the breakdown.
18
+ * - `timeWindow` is `{ since, until }` ms epoch pair; absent = all-time
19
+ * - results always carry `citations` = list of event ids that
20
+ * contributed to the answer (lets UI deep-link back per Phase 5.6
21
+ * citation flow)
22
+ *
23
+ * Privacy invariant: every skill that calls llm passes
24
+ * `acceptNonLocal: false` to the wrapper; non-local LLMs need explicit
25
+ * opt-in from the caller (same gate as AnalysisEngine).
26
+ */
27
+
28
+ "use strict";
29
+
30
+ class AnalysisSkill {
31
+ constructor(opts) {
32
+ if (!opts || typeof opts !== "object") {
33
+ throw new Error("AnalysisSkill: opts required");
34
+ }
35
+ if (!opts.vault) {
36
+ throw new Error("AnalysisSkill: opts.vault required");
37
+ }
38
+ this.vault = opts.vault;
39
+ this.llm = opts.llm || null; // optional
40
+ this.name = opts.name || "unnamed";
41
+ }
42
+
43
+ async run(_options = {}) {
44
+ throw new Error(`AnalysisSkill.run() not implemented for ${this.name}`);
45
+ }
46
+
47
+ // ─── helpers shared by skills ───────────────────────────────────────
48
+
49
+ /**
50
+ * Normalize a time window. Accepts:
51
+ * - { since, until } ms epoch
52
+ * - { sinceDays } relative (now - N days)
53
+ * - { sinceMonths } relative
54
+ * Returns `{ since, until }` ms or `{ since: null, until: null }` for
55
+ * all-time.
56
+ */
57
+ resolveTimeWindow(options = {}) {
58
+ const now = Date.now();
59
+ if (typeof options.since === "number" && options.since > 0) {
60
+ return {
61
+ since: options.since,
62
+ until: typeof options.until === "number" ? options.until : now,
63
+ };
64
+ }
65
+ if (typeof options.sinceDays === "number" && options.sinceDays > 0) {
66
+ return {
67
+ since: now - options.sinceDays * 24 * 3600_000,
68
+ until: now,
69
+ };
70
+ }
71
+ if (typeof options.sinceMonths === "number" && options.sinceMonths > 0) {
72
+ return {
73
+ since: now - options.sinceMonths * 30 * 24 * 3600_000,
74
+ until: now,
75
+ };
76
+ }
77
+ return { since: null, until: null };
78
+ }
79
+
80
+ /**
81
+ * Expand a personId to "all Person ids in its merge group". If
82
+ * EntityResolver hasn't merged anyone, returns just `[personId]`.
83
+ * Phase 8 closure utility.
84
+ */
85
+ expandToMergeGroup(personId) {
86
+ if (!personId) return [];
87
+ try {
88
+ if (typeof this.vault.getMergeGroupMembers === "function") {
89
+ return this.vault.getMergeGroupMembers(personId);
90
+ }
91
+ } catch (_e) {}
92
+ return [personId];
93
+ }
94
+
95
+ /**
96
+ * Wrap llm.chat() with the privacy gate. Returns the response text or
97
+ * null when LLM is unavailable / non-local without opt-in.
98
+ */
99
+ async callLlmCommentary(messages, opts = {}) {
100
+ if (!this.llm || typeof this.llm.chat !== "function") return null;
101
+ if (this.llm.isLocal === false && !opts.acceptNonLocal) {
102
+ return null;
103
+ }
104
+ try {
105
+ const r = await this.llm.chat(messages, { temperature: 0.2, ...opts });
106
+ return (r && r.text) || null;
107
+ } catch (_e) {
108
+ return null;
109
+ }
110
+ }
111
+ }
112
+
113
+ module.exports = { AnalysisSkill };