@chainlesschain/personal-data-hub 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/ai-chat-history.test.js +395 -0
- package/__tests__/adapters/ai-chat-http-client.test.js +242 -0
- package/__tests__/adapters/ai-chat-vendors.test.js +733 -0
- package/__tests__/adapters/alipay-bill-adapter.test.js +538 -0
- package/__tests__/adapters/email-adapter.test.js +138 -1
- package/__tests__/adapters/email-classifier.test.js +347 -0
- package/__tests__/adapters/email-pdf-extractor.test.js +529 -0
- package/__tests__/adapters/email-retry-progress.test.js +294 -0
- package/__tests__/adapters/email-templates.test.js +699 -0
- package/__tests__/adapters/system-data-adapter.test.js +440 -0
- package/__tests__/adapters/system-data-disclosure.test.js +153 -0
- package/__tests__/analysis-skills.test.js +409 -0
- package/__tests__/entity-resolver-ingest-hook.test.js +177 -0
- package/__tests__/entity-resolver-stages.test.js +411 -0
- package/__tests__/entity-resolver-vault.test.js +246 -0
- package/__tests__/entity-resolver.test.js +526 -0
- package/__tests__/fixtures/entity-resolver-200-mock.json +96 -0
- package/__tests__/longtail-adapters.test.js +217 -0
- package/__tests__/mobile-extractor.test.js +288 -0
- package/__tests__/shopping-adapters.test.js +296 -0
- package/__tests__/sidecar-contacts-cross-validate.test.js +163 -0
- package/__tests__/sidecar-supervisor.test.js +120 -0
- package/__tests__/social-adapters.test.js +206 -0
- package/__tests__/travel-adapters.test.js +325 -0
- package/__tests__/vault.test.js +3 -3
- package/__tests__/wechat-adapter.test.js +476 -0
- package/__tests__/whatsapp-adapter.test.js +135 -0
- package/lib/adapter-spec.js +12 -0
- package/lib/adapters/_python-sidecar-base.js +207 -0
- package/lib/adapters/ai-chat-history/ai-chat-adapter.js +335 -0
- package/lib/adapters/ai-chat-history/cookie-auth.js +109 -0
- package/lib/adapters/ai-chat-history/http-client.js +211 -0
- package/lib/adapters/ai-chat-history/index.js +28 -0
- package/lib/adapters/ai-chat-history/schema-map.js +221 -0
- package/lib/adapters/ai-chat-history/vendor-spec.js +85 -0
- package/lib/adapters/ai-chat-history/vendors/coze.js +179 -0
- package/lib/adapters/ai-chat-history/vendors/deepseek.js +199 -0
- package/lib/adapters/ai-chat-history/vendors/dreamina.js +174 -0
- package/lib/adapters/ai-chat-history/vendors/hunyuan.js +176 -0
- package/lib/adapters/ai-chat-history/vendors/kimi.js +182 -0
- package/lib/adapters/ai-chat-history/vendors/qianfan.js +160 -0
- package/lib/adapters/ai-chat-history/vendors/tongyi.js +193 -0
- package/lib/adapters/ai-chat-history/vendors/zhipu.js +202 -0
- package/lib/adapters/alipay-bill/alipay-bill-adapter.js +307 -0
- package/lib/adapters/alipay-bill/counterparty.js +129 -0
- package/lib/adapters/alipay-bill/csv-parser.js +217 -0
- package/lib/adapters/alipay-bill/index.js +41 -0
- package/lib/adapters/alipay-bill/zip-decryptor.js +111 -0
- package/lib/adapters/email-imap/classifier.js +495 -0
- package/lib/adapters/email-imap/email-adapter.js +419 -8
- package/lib/adapters/email-imap/index.js +42 -0
- package/lib/adapters/email-imap/pdf-extractor.js +192 -0
- package/lib/adapters/email-imap/templates/bill.js +232 -0
- package/lib/adapters/email-imap/templates/government.js +120 -0
- package/lib/adapters/email-imap/templates/index.js +78 -0
- package/lib/adapters/email-imap/templates/order.js +186 -0
- package/lib/adapters/email-imap/templates/other.js +114 -0
- package/lib/adapters/email-imap/templates/register.js +113 -0
- package/lib/adapters/email-imap/templates/travel.js +157 -0
- package/lib/adapters/email-imap/templates/utils.js +275 -0
- package/lib/adapters/email-imap/transactions.js +234 -0
- package/lib/adapters/messaging-qq/index.js +158 -0
- package/lib/adapters/messaging-telegram/index.js +142 -0
- package/lib/adapters/messaging-whatsapp/index.js +189 -0
- package/lib/adapters/shopping-base/index.js +208 -0
- package/lib/adapters/shopping-jd/index.js +150 -0
- package/lib/adapters/shopping-meituan/index.js +154 -0
- package/lib/adapters/shopping-taobao/index.js +176 -0
- package/lib/adapters/social-bilibili/index.js +171 -0
- package/lib/adapters/social-douyin/index.js +116 -0
- package/lib/adapters/social-weibo/index.js +164 -0
- package/lib/adapters/social-xiaohongshu/index.js +96 -0
- package/lib/adapters/system-data/disclosure.js +166 -0
- package/lib/adapters/system-data/index.js +34 -0
- package/lib/adapters/system-data/system-data-adapter.js +344 -0
- package/lib/adapters/travel-12306/index.js +151 -0
- package/lib/adapters/travel-amap/index.js +164 -0
- package/lib/adapters/travel-baidu-map/index.js +162 -0
- package/lib/adapters/travel-base/index.js +240 -0
- package/lib/adapters/travel-ctrip/index.js +151 -0
- package/lib/adapters/wechat/content-parser.js +326 -0
- package/lib/adapters/wechat/db-reader.js +209 -0
- package/lib/adapters/wechat/index.js +28 -0
- package/lib/adapters/wechat/key-extractor.js +158 -0
- package/lib/adapters/wechat/normalize.js +220 -0
- package/lib/adapters/wechat/wechat-adapter.js +205 -0
- package/lib/analysis-skills/base.js +113 -0
- package/lib/analysis-skills/footprint.js +167 -0
- package/lib/analysis-skills/index.js +58 -0
- package/lib/analysis-skills/interests.js +161 -0
- package/lib/analysis-skills/relations.js +226 -0
- package/lib/analysis-skills/spending.js +216 -0
- package/lib/analysis-skills/timeline.js +167 -0
- package/lib/entity-resolver/embedding-stage.js +198 -0
- package/lib/entity-resolver/entity-resolver.js +384 -0
- package/lib/entity-resolver/index.js +42 -0
- package/lib/entity-resolver/llm-stage.js +191 -0
- package/lib/entity-resolver/rule-stage.js +208 -0
- package/lib/entity-resolver/worker.js +149 -0
- package/lib/index.js +115 -0
- package/lib/migrations.js +73 -0
- package/lib/mobile-extractor/android.js +193 -0
- package/lib/mobile-extractor/index.js +9 -0
- package/lib/mobile-extractor/ios.js +223 -0
- package/lib/registry.js +42 -0
- package/lib/sidecar/index.js +15 -0
- package/lib/sidecar/supervisor.js +359 -0
- package/lib/vault.js +266 -0
- package/package.json +29 -3
- package/scripts/_make-fixture-all.js +126 -0
- package/scripts/_make-fixture-contacts.js +84 -0
- package/scripts/evaluate-entity-resolver.js +213 -0
- package/scripts/smoke-phase-5-5.js +196 -0
- package/scripts/smoke-phase-5-7.js +181 -0
- package/scripts/smoke-system-data-contacts.js +309 -0
- package/scripts/smoke-system-data.js +312 -0
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 12 v0.5 — WeChat legacy key extractor (frida-INDEPENDENT).
|
|
3
|
+
*
|
|
4
|
+
* Ports sjqz/parsers/wechat_decrypt.py legacy path to Node:
|
|
5
|
+
*
|
|
6
|
+
* key = MD5(IMEI + UIN)[:7].lower()
|
|
7
|
+
*
|
|
8
|
+
* Works for WeChat versions < 8.0.X where the IMEI-derived key path is
|
|
9
|
+
* still active. WeChat 8.0+ requires Frida hook on `sqlite3_key` —
|
|
10
|
+
* that's Phase 12.6 (frida-dependent) and ships when device + Frida are
|
|
11
|
+
* available.
|
|
12
|
+
*
|
|
13
|
+
* Inputs:
|
|
14
|
+
* - wechatDataPath: directory mirroring /data/data/com.tencent.mm/
|
|
15
|
+
* after `adb pull` (or PC WeChat Files directory)
|
|
16
|
+
* - Optional explicit overrides (imei, uin, manualKey) for testing or
|
|
17
|
+
* when CompatibleInfo.cfg parsing fails
|
|
18
|
+
*
|
|
19
|
+
* Outputs:
|
|
20
|
+
* {
|
|
21
|
+
* uin: "1234567890",
|
|
22
|
+
* imei: "1234567890abcdef",
|
|
23
|
+
* key: "5d41402", // 7-char hex MD5 prefix
|
|
24
|
+
* source: "auth-xml+compatible-cfg" | "manual" | "...",
|
|
25
|
+
* warnings: [...]
|
|
26
|
+
* }
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
"use strict";
|
|
30
|
+
|
|
31
|
+
const fs = require("node:fs");
|
|
32
|
+
const path = require("node:path");
|
|
33
|
+
const crypto = require("node:crypto");
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Extract UIN from shared_prefs/auth_info_key_prefs.xml or
|
|
37
|
+
* system_config_prefs.xml. UIN may be negative; can also be in
|
|
38
|
+
* `default_uin` or `_auth_uin` keys depending on WeChat version.
|
|
39
|
+
*/
|
|
40
|
+
function extractUinFromPrefs(wechatDataPath) {
|
|
41
|
+
const candidates = [
|
|
42
|
+
path.join(wechatDataPath, "shared_prefs", "auth_info_key_prefs.xml"),
|
|
43
|
+
path.join(wechatDataPath, "shared_prefs", "system_config_prefs.xml"),
|
|
44
|
+
];
|
|
45
|
+
for (const p of candidates) {
|
|
46
|
+
if (!fs.existsSync(p)) continue;
|
|
47
|
+
try {
|
|
48
|
+
const content = fs.readFileSync(p, "utf-8");
|
|
49
|
+
const patterns = [
|
|
50
|
+
/<int name="[^"]*_auth_uin[^"]*"\s+value="(-?\d+)"/,
|
|
51
|
+
/<int name="default_uin"\s+value="(-?\d+)"/,
|
|
52
|
+
/<int name="[^"]*uin[^"]*"\s+value="(-?\d+)"/,
|
|
53
|
+
];
|
|
54
|
+
for (const re of patterns) {
|
|
55
|
+
const m = re.exec(content);
|
|
56
|
+
if (m) return { uin: m[1], from: path.basename(p) };
|
|
57
|
+
}
|
|
58
|
+
} catch (_e) {
|
|
59
|
+
// Try next candidate
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return { uin: null, from: null };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Extract IMEI / device serial from CompatibleInfo.cfg. The file is a
|
|
67
|
+
* Java HashMap serialization; we use string-search for 15-digit IMEI
|
|
68
|
+
* patterns + GUIDs as fallback (matches sjqz approach).
|
|
69
|
+
*/
|
|
70
|
+
function extractImeiFromCompatibleInfo(wechatDataPath) {
|
|
71
|
+
const cfgPath = path.join(wechatDataPath, "MicroMsg", "CompatibleInfo.cfg");
|
|
72
|
+
if (!fs.existsSync(cfgPath)) return { imei: null, from: null };
|
|
73
|
+
try {
|
|
74
|
+
const buf = fs.readFileSync(cfgPath);
|
|
75
|
+
const text = buf.toString("binary"); // 8-bit safe — we don't care about decoding
|
|
76
|
+
// 15-digit IMEI
|
|
77
|
+
const imeiMatch = /\D(\d{15})\D/.exec(text);
|
|
78
|
+
if (imeiMatch) return { imei: imeiMatch[1], from: "CompatibleInfo.cfg (15-digit)" };
|
|
79
|
+
// Fallback: 14-digit + check digit pattern
|
|
80
|
+
const imei14 = /\D(\d{14})\D/.exec(text);
|
|
81
|
+
if (imei14) return { imei: imei14[1], from: "CompatibleInfo.cfg (14-digit)" };
|
|
82
|
+
// Fallback: GUID-like
|
|
83
|
+
const guid = /([0-9a-f]{32})/i.exec(text);
|
|
84
|
+
if (guid) return { imei: guid[1], from: "CompatibleInfo.cfg (guid)" };
|
|
85
|
+
} catch (_e) {}
|
|
86
|
+
return { imei: null, from: null };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Derive the SQLCipher key.
|
|
91
|
+
*
|
|
92
|
+
* @param {string} imei
|
|
93
|
+
* @param {string|number} uin
|
|
94
|
+
* @returns {string} 7-char hex prefix of MD5(IMEI+UIN), lowercase
|
|
95
|
+
*/
|
|
96
|
+
function deriveLegacyKey(imei, uin) {
|
|
97
|
+
if (typeof imei !== "string" || imei.length === 0) {
|
|
98
|
+
throw new Error("deriveLegacyKey: imei required");
|
|
99
|
+
}
|
|
100
|
+
if (uin == null) throw new Error("deriveLegacyKey: uin required");
|
|
101
|
+
const raw = String(imei) + String(uin);
|
|
102
|
+
return crypto.createHash("md5").update(raw, "utf-8").digest("hex").slice(0, 7).toLowerCase();
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Top-level: extract key from a pulled WeChat data directory.
|
|
107
|
+
*
|
|
108
|
+
* @param {object} opts
|
|
109
|
+
* @param {string} opts.wechatDataPath directory like the pulled
|
|
110
|
+
* /data/data/com.tencent.mm/ tree
|
|
111
|
+
* @param {string} [opts.uin] override (skip auth XML parse)
|
|
112
|
+
* @param {string} [opts.imei] override (skip CompatibleInfo)
|
|
113
|
+
* @returns {object} { uin, imei, key, source, warnings }
|
|
114
|
+
*/
|
|
115
|
+
function extractWeChatKey(opts = {}) {
|
|
116
|
+
if (!opts.wechatDataPath || typeof opts.wechatDataPath !== "string") {
|
|
117
|
+
throw new Error("extractWeChatKey: opts.wechatDataPath required");
|
|
118
|
+
}
|
|
119
|
+
const warnings = [];
|
|
120
|
+
|
|
121
|
+
let uin = opts.uin || null;
|
|
122
|
+
let uinSource = "manual";
|
|
123
|
+
if (!uin) {
|
|
124
|
+
const r = extractUinFromPrefs(opts.wechatDataPath);
|
|
125
|
+
uin = r.uin;
|
|
126
|
+
uinSource = r.from || "missing";
|
|
127
|
+
if (!uin) warnings.push("UIN not found in shared_prefs — adapter unusable without manual override");
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
let imei = opts.imei || null;
|
|
131
|
+
let imeiSource = "manual";
|
|
132
|
+
if (!imei) {
|
|
133
|
+
const r = extractImeiFromCompatibleInfo(opts.wechatDataPath);
|
|
134
|
+
imei = r.imei;
|
|
135
|
+
imeiSource = r.from || "missing";
|
|
136
|
+
if (!imei) warnings.push("IMEI not found in CompatibleInfo.cfg — adapter unusable without manual override");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
if (!uin || !imei) {
|
|
140
|
+
return { uin, imei, key: null, source: `uin:${uinSource} | imei:${imeiSource}`, warnings };
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const key = deriveLegacyKey(imei, uin);
|
|
144
|
+
return {
|
|
145
|
+
uin,
|
|
146
|
+
imei,
|
|
147
|
+
key,
|
|
148
|
+
source: `uin:${uinSource} | imei:${imeiSource}`,
|
|
149
|
+
warnings,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
module.exports = {
|
|
154
|
+
extractWeChatKey,
|
|
155
|
+
deriveLegacyKey,
|
|
156
|
+
extractUinFromPrefs,
|
|
157
|
+
extractImeiFromCompatibleInfo,
|
|
158
|
+
};
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 12 v0.5 — WeChat row → UnifiedSchema mapping.
|
|
3
|
+
*
|
|
4
|
+
* Per `Adapter_WeChat_SQLCipher.md` §7. Pure function (DB row in → batch
|
|
5
|
+
* out); orchestrated by WechatAdapter.normalize() during ingest.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
"use strict";
|
|
9
|
+
|
|
10
|
+
const { newId } = require("../../ids");
|
|
11
|
+
const { parseContent, isGroupTalker } = require("./content-parser");
|
|
12
|
+
|
|
13
|
+
const NAME = "wechat";
|
|
14
|
+
const VERSION = "0.5.0"; // Phase 12 v0.5 — frida-indep slice
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Map a single message row to a NormalizedBatch.
|
|
18
|
+
*
|
|
19
|
+
* @param {object} row raw WeChat message row
|
|
20
|
+
* @param {object} ctx { contactByUsername, chatroomByName, accountUin }
|
|
21
|
+
* @returns {NormalizedBatch}
|
|
22
|
+
*/
|
|
23
|
+
function normalizeMessage(row, ctx = {}) {
|
|
24
|
+
if (!row || typeof row !== "object") {
|
|
25
|
+
throw new Error("normalizeMessage: row required");
|
|
26
|
+
}
|
|
27
|
+
const parsed = parseContent(row);
|
|
28
|
+
const isGroup = isGroupTalker(row.talker);
|
|
29
|
+
const now = Date.now();
|
|
30
|
+
const occurredAt = Number.isFinite(Number(row.createTime)) ? Number(row.createTime) : now;
|
|
31
|
+
const isSend = Number(row.isSend) === 1;
|
|
32
|
+
|
|
33
|
+
const accountUin = ctx.accountUin || "wechat-self";
|
|
34
|
+
const selfId = `person-wechat-${accountUin}`;
|
|
35
|
+
const peerWxid = row.talker;
|
|
36
|
+
const peerId = peerWxid ? wxidToPersonId(peerWxid) : null;
|
|
37
|
+
|
|
38
|
+
// Group senders use the prefix in parsed.structured.senderWxid; in
|
|
39
|
+
// 1-on-1 chats actor = talker (inbound) or self (outbound).
|
|
40
|
+
let actorId;
|
|
41
|
+
if (isGroup) {
|
|
42
|
+
const senderWxid = parsed.structured && parsed.structured.senderWxid;
|
|
43
|
+
actorId = senderWxid ? wxidToPersonId(senderWxid) : (isSend ? selfId : peerId);
|
|
44
|
+
} else {
|
|
45
|
+
actorId = isSend ? selfId : peerId;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
const participants = [];
|
|
49
|
+
if (peerId) participants.push(peerId);
|
|
50
|
+
participants.push(selfId);
|
|
51
|
+
|
|
52
|
+
const eventId = newId();
|
|
53
|
+
const source = {
|
|
54
|
+
adapter: NAME,
|
|
55
|
+
adapterVersion: VERSION,
|
|
56
|
+
originalId: String(row.msgSvrId || row.msgId || `wechat-msg-${eventId}`),
|
|
57
|
+
capturedAt: occurredAt,
|
|
58
|
+
capturedBy: "sqlite",
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// Subtype mapping per UnifiedSchema EVENT_SUBTYPES
|
|
62
|
+
let subtype = "message";
|
|
63
|
+
if (parsed.kind === "voipcall") subtype = "call";
|
|
64
|
+
else if (parsed.kind === "system") subtype = "interaction";
|
|
65
|
+
else if (parsed.kind === "redpacket") subtype = "redenvelope";
|
|
66
|
+
else if (parsed.kind === "image" || parsed.kind === "video" || parsed.kind === "emoji" || parsed.kind === "voice") {
|
|
67
|
+
subtype = "media";
|
|
68
|
+
}
|
|
69
|
+
else subtype = "message";
|
|
70
|
+
|
|
71
|
+
const event = {
|
|
72
|
+
id: eventId,
|
|
73
|
+
type: "event",
|
|
74
|
+
subtype,
|
|
75
|
+
occurredAt,
|
|
76
|
+
actor: actorId || selfId,
|
|
77
|
+
participants: dedup(participants).filter(Boolean),
|
|
78
|
+
content: {
|
|
79
|
+
title: parsed.text.slice(0, 80) || "(无内容)",
|
|
80
|
+
text: parsed.text,
|
|
81
|
+
},
|
|
82
|
+
ingestedAt: now,
|
|
83
|
+
source,
|
|
84
|
+
extra: {
|
|
85
|
+
wechatType: Number(row.type),
|
|
86
|
+
isSend,
|
|
87
|
+
talker: row.talker,
|
|
88
|
+
...(isGroup ? { isGroup: true, chatroom: row.talker } : {}),
|
|
89
|
+
...parsed.structured,
|
|
90
|
+
},
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
// Persons — talker / sender; merge-group keys via wxid
|
|
94
|
+
const persons = [];
|
|
95
|
+
if (peerId && peerId !== selfId) {
|
|
96
|
+
persons.push({
|
|
97
|
+
id: peerId,
|
|
98
|
+
type: "person",
|
|
99
|
+
subtype: isGroup ? "unknown" : "contact",
|
|
100
|
+
names: [contactDisplayName(ctx.contactByUsername, row.talker)],
|
|
101
|
+
identifiers: { wechatId: row.talker },
|
|
102
|
+
ingestedAt: now,
|
|
103
|
+
source,
|
|
104
|
+
extra: { fromAdapter: NAME, wxid: row.talker },
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
// For group messages, also add the sender as a person if known
|
|
108
|
+
if (isGroup && parsed.structured && parsed.structured.senderWxid) {
|
|
109
|
+
const senderId = wxidToPersonId(parsed.structured.senderWxid);
|
|
110
|
+
if (senderId !== selfId && !persons.some((p) => p.id === senderId)) {
|
|
111
|
+
persons.push({
|
|
112
|
+
id: senderId,
|
|
113
|
+
type: "person",
|
|
114
|
+
subtype: "contact",
|
|
115
|
+
names: [contactDisplayName(ctx.contactByUsername, parsed.structured.senderWxid)],
|
|
116
|
+
identifiers: { wechatId: parsed.structured.senderWxid },
|
|
117
|
+
ingestedAt: now,
|
|
118
|
+
source,
|
|
119
|
+
extra: { fromAdapter: NAME, wxid: parsed.structured.senderWxid },
|
|
120
|
+
});
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Topic — every group chat is a Topic (per design doc OQ-4 = C)
|
|
125
|
+
const topics = [];
|
|
126
|
+
if (isGroup) {
|
|
127
|
+
const chatroomName = (ctx.chatroomByName && ctx.chatroomByName[row.talker])
|
|
128
|
+
|| row.talker.replace("@chatroom", "");
|
|
129
|
+
topics.push({
|
|
130
|
+
id: `topic-wechat-group-${row.talker}`,
|
|
131
|
+
type: "topic",
|
|
132
|
+
name: chatroomName,
|
|
133
|
+
derivedFromEvents: [event.id],
|
|
134
|
+
ingestedAt: now,
|
|
135
|
+
source,
|
|
136
|
+
extra: { wxid: row.talker, fromAdapter: NAME },
|
|
137
|
+
});
|
|
138
|
+
if (!event.extra.topicId) event.extra.topicId = topics[0].id;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return { events: [event], persons, places: [], items: [], topics };
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Map a contact row to a Person entity. Used for backfill — adapter
|
|
146
|
+
* yields RawContact records via sync(); normalize() turns them into
|
|
147
|
+
* persons.
|
|
148
|
+
*/
|
|
149
|
+
function normalizeContact(row, ctx = {}) {
|
|
150
|
+
if (!row || !row.username) return { events: [], persons: [], places: [], items: [], topics: [] };
|
|
151
|
+
const now = Date.now();
|
|
152
|
+
const source = {
|
|
153
|
+
adapter: NAME,
|
|
154
|
+
adapterVersion: VERSION,
|
|
155
|
+
originalId: `wechat-contact-${row.username}`,
|
|
156
|
+
capturedAt: now,
|
|
157
|
+
capturedBy: "sqlite",
|
|
158
|
+
};
|
|
159
|
+
const names = [row.conRemark, row.nickname, row.alias, row.username]
|
|
160
|
+
.filter((n) => typeof n === "string" && n.length > 0);
|
|
161
|
+
const subtype = guessContactSubtype(row);
|
|
162
|
+
const person = {
|
|
163
|
+
id: wxidToPersonId(row.username),
|
|
164
|
+
type: "person",
|
|
165
|
+
subtype,
|
|
166
|
+
names: dedup(names),
|
|
167
|
+
identifiers: { wechatId: row.username },
|
|
168
|
+
ingestedAt: now,
|
|
169
|
+
source,
|
|
170
|
+
extra: { fromAdapter: NAME, wxid: row.username, wechatType: row.type },
|
|
171
|
+
};
|
|
172
|
+
return { events: [], persons: [person], places: [], items: [], topics: [] };
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
// ─── helpers ────────────────────────────────────────────────────────────
|
|
176
|
+
|
|
177
|
+
function wxidToPersonId(wxid) {
|
|
178
|
+
if (!wxid) return null;
|
|
179
|
+
// Stable id keyed off wxid (Phase 8 EntityResolver R1 will dedup
|
|
180
|
+
// across adapters via the `wechatId` identifier).
|
|
181
|
+
return `person-wechat-${wxid}`;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
function dedup(arr) {
|
|
185
|
+
const seen = new Set();
|
|
186
|
+
const out = [];
|
|
187
|
+
for (const x of arr) {
|
|
188
|
+
if (x == null || seen.has(x)) continue;
|
|
189
|
+
seen.add(x);
|
|
190
|
+
out.push(x);
|
|
191
|
+
}
|
|
192
|
+
return out;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function contactDisplayName(byUsername, wxid) {
|
|
196
|
+
if (byUsername && byUsername[wxid]) {
|
|
197
|
+
const c = byUsername[wxid];
|
|
198
|
+
return c.conRemark || c.nickname || c.alias || wxid;
|
|
199
|
+
}
|
|
200
|
+
return wxid;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function guessContactSubtype(row) {
|
|
204
|
+
// rcontact.type bits: official accounts / group / regular contact /
|
|
205
|
+
// black list. Detailed mapping in WeChat reverse-eng community —
|
|
206
|
+
// for v0.5 we keep it simple: anything that's not the user's self is
|
|
207
|
+
// "contact". Phase 12.6 will refine with full bit mapping.
|
|
208
|
+
if (typeof row.username === "string" && row.username.endsWith("@chatroom")) {
|
|
209
|
+
return "unknown"; // chat group, not a Person
|
|
210
|
+
}
|
|
211
|
+
return "contact";
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
module.exports = {
|
|
215
|
+
normalizeMessage,
|
|
216
|
+
normalizeContact,
|
|
217
|
+
wxidToPersonId,
|
|
218
|
+
NAME,
|
|
219
|
+
VERSION,
|
|
220
|
+
};
|
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 12 v0.5 — WechatAdapter (frida-INDEPENDENT slice).
|
|
3
|
+
*
|
|
4
|
+
* Per `Adapter_WeChat_SQLCipher.md` §17.2 buildable-now scope. This is
|
|
5
|
+
* the 60% of Phase 12 that can land without rooted device + Frida:
|
|
6
|
+
* everything from "DB file is decrypted at this path on disk" forward.
|
|
7
|
+
*
|
|
8
|
+
* Flow:
|
|
9
|
+
* 1. UI / CLI workflow drives the on-device pull via AndroidExtractor
|
|
10
|
+
* (Phase 7.5) — copies EnMicroMsg.db to a local cache.
|
|
11
|
+
* 2. keyProvider returns the key (legacy: KeyExtractor MD5(IMEI+UIN)
|
|
12
|
+
* computes it; Phase 12.6 hot path: Frida hook fetches it).
|
|
13
|
+
* 3. WechatAdapter.sync() opens the DB via WeChatDBReader, iterates
|
|
14
|
+
* message + contact tables, yields raw events.
|
|
15
|
+
* 4. normalize() turns each row into UnifiedSchema entities.
|
|
16
|
+
*
|
|
17
|
+
* Watermark: max msgSvrId per scope. Adapter sync({sinceWatermark}) is
|
|
18
|
+
* a high-water filter rather than per-talker — Phase 12.6 adds the
|
|
19
|
+
* per-talker variant.
|
|
20
|
+
*/
|
|
21
|
+
|
|
22
|
+
"use strict";
|
|
23
|
+
|
|
24
|
+
const fs = require("node:fs");
|
|
25
|
+
|
|
26
|
+
const { CAPTURED_BY } = require("../../constants");
|
|
27
|
+
const { WeChatDBReader } = require("./db-reader");
|
|
28
|
+
const { normalizeMessage, normalizeContact, NAME, VERSION } = require("./normalize");
|
|
29
|
+
|
|
30
|
+
class WechatAdapter {
|
|
31
|
+
constructor(opts = {}) {
|
|
32
|
+
if (!opts || typeof opts !== "object") {
|
|
33
|
+
throw new Error("WechatAdapter: opts required");
|
|
34
|
+
}
|
|
35
|
+
if (!opts.account || typeof opts.account !== "object") {
|
|
36
|
+
throw new Error("WechatAdapter: opts.account required");
|
|
37
|
+
}
|
|
38
|
+
if (!opts.account.uin) {
|
|
39
|
+
throw new Error("WechatAdapter: opts.account.uin required (WeChat user identifier)");
|
|
40
|
+
}
|
|
41
|
+
this.account = opts.account;
|
|
42
|
+
// dbPath: local path to the (already-pulled) decrypted-source
|
|
43
|
+
// EnMicroMsg.db. Test seam.
|
|
44
|
+
this._dbPath = opts.dbPath || null;
|
|
45
|
+
// keyProvider: { getKey(): Promise<string> }. v0.5 default is
|
|
46
|
+
// a synthetic provider for tests; production wires this to either
|
|
47
|
+
// KeyExtractor (legacy) or Frida bridge (Phase 12.6).
|
|
48
|
+
this._keyProvider = opts.keyProvider || null;
|
|
49
|
+
// DI seam for tests — swap the DB reader
|
|
50
|
+
this._dbReaderFactory = typeof opts.dbReaderFactory === "function"
|
|
51
|
+
? opts.dbReaderFactory
|
|
52
|
+
: null;
|
|
53
|
+
|
|
54
|
+
this.name = NAME;
|
|
55
|
+
this.version = VERSION;
|
|
56
|
+
this.capabilities = [
|
|
57
|
+
"sync:sqlite",
|
|
58
|
+
"auth:keystore",
|
|
59
|
+
"decrypt:sqlcipher-v1",
|
|
60
|
+
"parse:wechat-message",
|
|
61
|
+
];
|
|
62
|
+
this.extractMode = "device-pull"; // Phase 7.5 contract field
|
|
63
|
+
this.rateLimits = {};
|
|
64
|
+
this.dataDisclosure = {
|
|
65
|
+
fields: [
|
|
66
|
+
"wechat:messages (text + group + 1-on-1 chats from EnMicroMsg.db)",
|
|
67
|
+
"wechat:contacts (rcontact: nickname / alias / 备注名)",
|
|
68
|
+
"wechat:chatrooms (group display names + member lists)",
|
|
69
|
+
],
|
|
70
|
+
sensitivity: "high",
|
|
71
|
+
legalGate: true, // first-use 法律 gate per design doc OQ-7
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async authenticate() {
|
|
76
|
+
// No server auth; sanity check the on-disk state.
|
|
77
|
+
if (!this._dbPath || !fs.existsSync(this._dbPath)) {
|
|
78
|
+
return { ok: false, reason: "DB_NOT_PULLED", error: `DB path missing: ${this._dbPath}` };
|
|
79
|
+
}
|
|
80
|
+
if (!this._keyProvider || typeof this._keyProvider.getKey !== "function") {
|
|
81
|
+
return { ok: false, reason: "NO_KEY_PROVIDER", error: "keyProvider required" };
|
|
82
|
+
}
|
|
83
|
+
try {
|
|
84
|
+
const key = await this._keyProvider.getKey();
|
|
85
|
+
if (!key) return { ok: false, reason: "EMPTY_KEY", error: "keyProvider returned empty key" };
|
|
86
|
+
return { ok: true, account: this.account.uin };
|
|
87
|
+
} catch (err) {
|
|
88
|
+
return { ok: false, reason: "KEY_PROVIDER_THREW", error: err && err.message ? err.message : String(err) };
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async healthCheck() {
|
|
93
|
+
const r = await this.authenticate();
|
|
94
|
+
if (r.ok) return { ok: true, lastChecked: Date.now() };
|
|
95
|
+
return { ok: false, reason: r.reason, error: r.error };
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Iterate WeChat data → RawEvent stream. Each row becomes one raw
|
|
100
|
+
* event with `payload.kind = "message"` or `"contact"`.
|
|
101
|
+
*
|
|
102
|
+
* @param {object} opts
|
|
103
|
+
* @param {string|number} [opts.sinceWatermark] max msgSvrId watermark
|
|
104
|
+
* @param {number} [opts.maxPerType=10_000]
|
|
105
|
+
* @param {Function} [opts.onProgress]
|
|
106
|
+
*/
|
|
107
|
+
async *sync(opts = {}) {
|
|
108
|
+
const onProgress = typeof opts.onProgress === "function" ? opts.onProgress : null;
|
|
109
|
+
const emit = (phase, payload = {}) => {
|
|
110
|
+
if (!onProgress) return;
|
|
111
|
+
try { onProgress({ phase, adapter: NAME, ...payload }); } catch (_e) {}
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
if (!this._dbPath || !fs.existsSync(this._dbPath)) {
|
|
115
|
+
// No DB pulled yet — registry-safe idle no-op
|
|
116
|
+
emit("idle", { reason: "no DB at " + this._dbPath });
|
|
117
|
+
return;
|
|
118
|
+
}
|
|
119
|
+
const maxPerType = Number.isFinite(opts.maxPerType) ? opts.maxPerType : 10_000;
|
|
120
|
+
const sinceMsgSvrId = parseWatermark(opts.sinceWatermark);
|
|
121
|
+
|
|
122
|
+
emit("opening", { dbPath: this._dbPath });
|
|
123
|
+
const Reader = this._dbReaderFactory || ((readerOpts) => new WeChatDBReader(readerOpts));
|
|
124
|
+
const reader = Reader({ dbPath: this._dbPath, keyProvider: this._keyProvider });
|
|
125
|
+
|
|
126
|
+
try {
|
|
127
|
+
const openInfo = await reader.open();
|
|
128
|
+
emit("opened", { profile: openInfo.profile, tables: openInfo.tables });
|
|
129
|
+
|
|
130
|
+
if (!reader.isEnMicroMsg()) {
|
|
131
|
+
emit("error", { phase: "verify", message: "not an EnMicroMsg.db (missing message/rcontact)" });
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Contacts first — gives normalize() context for message senders
|
|
136
|
+
const contacts = reader.fetchContacts({ limit: 10_000 });
|
|
137
|
+
emit("contacts-loaded", { count: contacts.length });
|
|
138
|
+
const contactByUsername = {};
|
|
139
|
+
for (const c of contacts) contactByUsername[c.username] = c;
|
|
140
|
+
for (const c of contacts) {
|
|
141
|
+
yield this._rowToRaw("contact", c, { contactByUsername });
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Chatrooms — produce Topics
|
|
145
|
+
const chatrooms = reader.fetchChatrooms({ limit: 5000 });
|
|
146
|
+
const chatroomByName = {};
|
|
147
|
+
for (const cr of chatrooms) chatroomByName[cr.chatroomname] = cr.displayname || cr.chatroomname;
|
|
148
|
+
emit("chatrooms-loaded", { count: chatrooms.length });
|
|
149
|
+
|
|
150
|
+
// Messages
|
|
151
|
+
const messages = reader.fetchMessages({ sinceMsgSvrId, limit: maxPerType });
|
|
152
|
+
emit("messages-loaded", { count: messages.length, since: sinceMsgSvrId });
|
|
153
|
+
let count = 0;
|
|
154
|
+
let maxSvr = sinceMsgSvrId;
|
|
155
|
+
for (const m of messages) {
|
|
156
|
+
count += 1;
|
|
157
|
+
if (Number(m.msgSvrId) > maxSvr) maxSvr = Number(m.msgSvrId);
|
|
158
|
+
emit("processing", { current: count, total: messages.length, msgSvrId: m.msgSvrId });
|
|
159
|
+
yield this._rowToRaw("message", m, { contactByUsername, chatroomByName });
|
|
160
|
+
}
|
|
161
|
+
emit("done", { messagesYielded: count, newWatermark: maxSvr });
|
|
162
|
+
} finally {
|
|
163
|
+
try { reader.close(); } catch (_e) {}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
normalize(raw) {
|
|
168
|
+
if (!raw || !raw.payload) {
|
|
169
|
+
throw new Error("WechatAdapter.normalize: raw.payload missing");
|
|
170
|
+
}
|
|
171
|
+
const ctx = {
|
|
172
|
+
accountUin: this.account.uin,
|
|
173
|
+
contactByUsername: raw.payload.contactByUsername || {},
|
|
174
|
+
chatroomByName: raw.payload.chatroomByName || {},
|
|
175
|
+
};
|
|
176
|
+
if (raw.payload.kind === "contact") {
|
|
177
|
+
return normalizeContact(raw.payload.row, ctx);
|
|
178
|
+
}
|
|
179
|
+
return normalizeMessage(raw.payload.row, ctx);
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
_rowToRaw(kind, row, ctxExtras = {}) {
|
|
183
|
+
const originalId = kind === "message"
|
|
184
|
+
? String(row.msgSvrId || row.msgId)
|
|
185
|
+
: `contact-${row.username}`;
|
|
186
|
+
return {
|
|
187
|
+
adapter: NAME,
|
|
188
|
+
originalId,
|
|
189
|
+
capturedAt: Date.now(),
|
|
190
|
+
payload: {
|
|
191
|
+
kind,
|
|
192
|
+
row,
|
|
193
|
+
...ctxExtras,
|
|
194
|
+
},
|
|
195
|
+
};
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function parseWatermark(wm) {
|
|
200
|
+
if (wm == null) return 0;
|
|
201
|
+
const n = parseInt(String(wm), 10);
|
|
202
|
+
return Number.isFinite(n) && n > 0 ? n : 0;
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
module.exports = { WechatAdapter, NAME, VERSION };
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Phase 11 — Analysis skills base.
|
|
3
|
+
*
|
|
4
|
+
* Each skill = a focused analysis function over the vault. Inputs are
|
|
5
|
+
* a small typed options bag (time range + dimension + filters); output
|
|
6
|
+
* is `{ summary, breakdown, llm_commentary?, citations }`.
|
|
7
|
+
*
|
|
8
|
+
* Skills are pure logic on vault data + optional LLM commentary. They
|
|
9
|
+
* compose with cross-source merge groups (Phase 8 EntityResolver) so
|
|
10
|
+
* "上个月给我妈花了多少" returns combined Email + Alipay + WeChat
|
|
11
|
+
* spending tied to the same merged Person.
|
|
12
|
+
*
|
|
13
|
+
* Skills share these conventions:
|
|
14
|
+
* - `vault` injected at construction
|
|
15
|
+
* - `llm` optional; when null, skill returns pure-data result (no
|
|
16
|
+
* commentary); when provided, llm.chat() generates a 1-2 sentence
|
|
17
|
+
* prose commentary on the breakdown.
|
|
18
|
+
* - `timeWindow` is `{ since, until }` ms epoch pair; absent = all-time
|
|
19
|
+
* - results always carry `citations` = list of event ids that
|
|
20
|
+
* contributed to the answer (lets UI deep-link back per Phase 5.6
|
|
21
|
+
* citation flow)
|
|
22
|
+
*
|
|
23
|
+
* Privacy invariant: every skill that calls llm passes
|
|
24
|
+
* `acceptNonLocal: false` to the wrapper; non-local LLMs need explicit
|
|
25
|
+
* opt-in from the caller (same gate as AnalysisEngine).
|
|
26
|
+
*/
|
|
27
|
+
|
|
28
|
+
"use strict";
|
|
29
|
+
|
|
30
|
+
class AnalysisSkill {
|
|
31
|
+
constructor(opts) {
|
|
32
|
+
if (!opts || typeof opts !== "object") {
|
|
33
|
+
throw new Error("AnalysisSkill: opts required");
|
|
34
|
+
}
|
|
35
|
+
if (!opts.vault) {
|
|
36
|
+
throw new Error("AnalysisSkill: opts.vault required");
|
|
37
|
+
}
|
|
38
|
+
this.vault = opts.vault;
|
|
39
|
+
this.llm = opts.llm || null; // optional
|
|
40
|
+
this.name = opts.name || "unnamed";
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
async run(_options = {}) {
|
|
44
|
+
throw new Error(`AnalysisSkill.run() not implemented for ${this.name}`);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// ─── helpers shared by skills ───────────────────────────────────────
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Normalize a time window. Accepts:
|
|
51
|
+
* - { since, until } ms epoch
|
|
52
|
+
* - { sinceDays } relative (now - N days)
|
|
53
|
+
* - { sinceMonths } relative
|
|
54
|
+
* Returns `{ since, until }` ms or `{ since: null, until: null }` for
|
|
55
|
+
* all-time.
|
|
56
|
+
*/
|
|
57
|
+
resolveTimeWindow(options = {}) {
|
|
58
|
+
const now = Date.now();
|
|
59
|
+
if (typeof options.since === "number" && options.since > 0) {
|
|
60
|
+
return {
|
|
61
|
+
since: options.since,
|
|
62
|
+
until: typeof options.until === "number" ? options.until : now,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
if (typeof options.sinceDays === "number" && options.sinceDays > 0) {
|
|
66
|
+
return {
|
|
67
|
+
since: now - options.sinceDays * 24 * 3600_000,
|
|
68
|
+
until: now,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
if (typeof options.sinceMonths === "number" && options.sinceMonths > 0) {
|
|
72
|
+
return {
|
|
73
|
+
since: now - options.sinceMonths * 30 * 24 * 3600_000,
|
|
74
|
+
until: now,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
return { since: null, until: null };
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
/**
|
|
81
|
+
* Expand a personId to "all Person ids in its merge group". If
|
|
82
|
+
* EntityResolver hasn't merged anyone, returns just `[personId]`.
|
|
83
|
+
* Phase 8 closure utility.
|
|
84
|
+
*/
|
|
85
|
+
expandToMergeGroup(personId) {
|
|
86
|
+
if (!personId) return [];
|
|
87
|
+
try {
|
|
88
|
+
if (typeof this.vault.getMergeGroupMembers === "function") {
|
|
89
|
+
return this.vault.getMergeGroupMembers(personId);
|
|
90
|
+
}
|
|
91
|
+
} catch (_e) {}
|
|
92
|
+
return [personId];
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Wrap llm.chat() with the privacy gate. Returns the response text or
|
|
97
|
+
* null when LLM is unavailable / non-local without opt-in.
|
|
98
|
+
*/
|
|
99
|
+
async callLlmCommentary(messages, opts = {}) {
|
|
100
|
+
if (!this.llm || typeof this.llm.chat !== "function") return null;
|
|
101
|
+
if (this.llm.isLocal === false && !opts.acceptNonLocal) {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
try {
|
|
105
|
+
const r = await this.llm.chat(messages, { temperature: 0.2, ...opts });
|
|
106
|
+
return (r && r.text) || null;
|
|
107
|
+
} catch (_e) {
|
|
108
|
+
return null;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = { AnalysisSkill };
|