@chainlesschain/personal-data-hub 0.4.34 → 0.4.36
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/forensics/qq-nt-collect.js +58 -11
- package/lib/forensics/qzone-collect.js +168 -0
- package/lib/forensics/wechat-collect.js +150 -12
- package/package.json +2 -1
|
@@ -137,16 +137,50 @@ function bodyText(blob) {
|
|
|
137
137
|
* @param self the user's own QQ number (attribution fallback)
|
|
138
138
|
* @returns {Array} event objects ready for vault.putEvent
|
|
139
139
|
*/
|
|
140
|
-
|
|
140
|
+
const SELF_QQ_ID = 'person-qq-self';
|
|
141
|
+
const SRC_QQ = (originalId, at) => ({
|
|
142
|
+
adapter: 'qq-pc', adapterVersion: '0.1.0',
|
|
143
|
+
originalId: originalId || `qq-${at || 0}`,
|
|
144
|
+
capturedAt: at || Date.now(), capturedBy: 'sqlite',
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Parse a decrypted QQNT nt_msg.db into a vault batch `{events, persons, topics}`
|
|
149
|
+
* (mirrors wechat-collect): named contacts (sender nickname 40090), canonical
|
|
150
|
+
* self (sender uid 40020 === matched account uid → person-qq-self), group
|
|
151
|
+
* topics, clean titles, and a UNIQUE source.originalId per person/topic (a
|
|
152
|
+
* shared one collapses every row via the persons (adapter, originalId) index).
|
|
153
|
+
*
|
|
154
|
+
* @param opts {string|{selfUid?:string, self?:string}} — selfUid = the matched
|
|
155
|
+
* account uid (from deriveAndDecrypt) for reliable self attribution; a bare
|
|
156
|
+
* string is the legacy own-QQ-number fallback.
|
|
157
|
+
*/
|
|
158
|
+
function parseEvents(Database, dbPath, opts) {
|
|
159
|
+
const selfUid = opts && typeof opts === 'object' ? opts.selfUid || '' : '';
|
|
160
|
+
const selfQQ = opts && typeof opts === 'object' ? opts.self || '' : opts || '';
|
|
141
161
|
const src = new Database(dbPath, { readonly: true });
|
|
142
162
|
const events = [];
|
|
163
|
+
const persons = new Map();
|
|
164
|
+
const topics = new Map();
|
|
143
165
|
const num = (v) => (typeof v === 'bigint' ? Number(v) : v);
|
|
166
|
+
const addPerson = (qq, uid, nick) => {
|
|
167
|
+
if (!qq) return;
|
|
168
|
+
const id = `person-qq-${qq}`;
|
|
169
|
+
if (persons.has(id)) return;
|
|
170
|
+
const nm = nick && nick.trim() && nick.trim() !== qq ? nick.trim() : null;
|
|
171
|
+
persons.set(id, {
|
|
172
|
+
type: 'person', subtype: 'contact', id,
|
|
173
|
+
names: nm ? [nm, qq] : [qq],
|
|
174
|
+
identifiers: { qq, ...(uid ? { qqUid: uid } : {}) },
|
|
175
|
+
source: SRC_QQ(id), ingestedAt: Date.now(),
|
|
176
|
+
});
|
|
177
|
+
};
|
|
144
178
|
const ingestTable = (table, isGroup) => {
|
|
145
179
|
let rows;
|
|
146
180
|
try {
|
|
147
181
|
rows = src.prepare(
|
|
148
182
|
`SELECT [40001] msgId,[40020] uid,[40011] type,[40033] sender,[40021] peer,` +
|
|
149
|
-
`[40050] t,[40800] body FROM ${table}`,
|
|
183
|
+
`[40050] t,[40090] nick,[40800] body FROM ${table}`,
|
|
150
184
|
).safeIntegers().all();
|
|
151
185
|
} catch { return; }
|
|
152
186
|
for (const r of rows) {
|
|
@@ -160,20 +194,32 @@ function parseEvents(Database, dbPath, self) {
|
|
|
160
194
|
const msgId = typeof r.msgId === 'bigint' ? r.msgId.toString() : String(r.msgId);
|
|
161
195
|
const sender = String(num(r.sender) || '');
|
|
162
196
|
const peer = String(num(r.peer) || '');
|
|
197
|
+
const uid = r.uid ? String(r.uid) : '';
|
|
198
|
+
const nick = r.nick ? String(r.nick) : '';
|
|
163
199
|
const occurredAt = num(r.t) * 1000;
|
|
164
200
|
if (!occurredAt) continue;
|
|
165
|
-
|
|
201
|
+
// Self = the sender's uid is the matched account uid. Map to canonical
|
|
202
|
+
// person-qq-self so analysis excludes the owner from contact rankings.
|
|
203
|
+
const isSelf = !!(selfUid && uid && uid === selfUid);
|
|
204
|
+
const actor = isSelf ? SELF_QQ_ID : (sender ? `person-qq-${sender}` : `person-qq-${selfQQ || 'unknown'}`);
|
|
205
|
+
if (!isSelf && sender) addPerson(sender, uid, nick);
|
|
166
206
|
const participants = [actor];
|
|
167
|
-
|
|
207
|
+
let topicId;
|
|
208
|
+
if (isGroup) {
|
|
209
|
+
topicId = `group-qq-${peer}`;
|
|
210
|
+
participants.push(topicId);
|
|
211
|
+
if (!topics.has(topicId)) topics.set(topicId, { type: 'topic', id: topicId, name: peer, source: SRC_QQ(topicId), ingestedAt: Date.now() });
|
|
212
|
+
} else {
|
|
213
|
+
participants.push(`person-qq-${peer}`);
|
|
214
|
+
}
|
|
215
|
+
const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
|
|
168
216
|
events.push({
|
|
169
217
|
type: 'event', subtype: 'message', id: `qq:${table}:${msgId}`,
|
|
170
218
|
occurredAt, actor, participants,
|
|
171
|
-
content: { text: isGroup ? `[群${peer}] ${text}` : text },
|
|
172
|
-
topics:
|
|
173
|
-
source: {
|
|
174
|
-
|
|
175
|
-
capturedAt: occurredAt, capturedBy: 'sqlite',
|
|
176
|
-
},
|
|
219
|
+
content: { title: title || '(无内容)', text: isGroup ? `[群${peer}] ${text}` : text },
|
|
220
|
+
topics: topicId ? [topicId] : undefined,
|
|
221
|
+
source: SRC_QQ(`${table}:${msgId}`, occurredAt),
|
|
222
|
+
extra: { isSelf, peer },
|
|
177
223
|
ingestedAt: Date.now(),
|
|
178
224
|
});
|
|
179
225
|
}
|
|
@@ -181,10 +227,11 @@ function parseEvents(Database, dbPath, self) {
|
|
|
181
227
|
try {
|
|
182
228
|
ingestTable('c2c_msg_table', false);
|
|
183
229
|
ingestTable('group_msg_table', true);
|
|
230
|
+
persons.set(SELF_QQ_ID, { type: 'person', subtype: 'contact', id: SELF_QQ_ID, names: ['我(QQ)'], source: SRC_QQ(SELF_QQ_ID), ingestedAt: Date.now() });
|
|
184
231
|
} finally {
|
|
185
232
|
src.close();
|
|
186
233
|
}
|
|
187
|
-
return events;
|
|
234
|
+
return { events, persons: [...persons.values()], topics: [...topics.values()] };
|
|
188
235
|
}
|
|
189
236
|
|
|
190
237
|
module.exports = { extractRand, headerHmac, deriveAndDecrypt, bodyText, parseEvents };
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* qzone-collect — QQ空间 (Qzone) collector core: 说说 / 留言板 / 相册 → vault events.
|
|
4
|
+
*
|
|
5
|
+
* Qzone has NO local browsable DB (the QQNT databases only cache per-contact
|
|
6
|
+
* "latest feed" preview snippets), so this is the API path: Qzone CGI endpoints
|
|
7
|
+
* authed with the account's qzone-domain `p_skey` + `uin` + a `g_tk` token
|
|
8
|
+
* derived from p_skey (the bkn hash). Pure Node — the only side effect is the
|
|
9
|
+
* caller-supplied `fetchImpl` (defaults to global fetch), so the parsers are
|
|
10
|
+
* unit-testable and the same core runs on PC (`cc hub collect-qzone --cookie`)
|
|
11
|
+
* and in-APK (the Android app captures the cookie via a WebView and feeds it in).
|
|
12
|
+
*
|
|
13
|
+
* Cookie note: the base `.qq.com` skey is rejected by Qzone ("请先登录空间") —
|
|
14
|
+
* the qzone-domain `p_skey` is required (a browser login to user.qzone.qq.com,
|
|
15
|
+
* or the in-app WebView, yields it). Extracted from
|
|
16
|
+
* scripts/android/pdh-qzone-collect.mjs (behaviour identical).
|
|
17
|
+
*/
|
|
18
|
+
const SELF_ID = 'person-qq-self';
|
|
19
|
+
const UA = 'Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Mobile Safari/537.36';
|
|
20
|
+
const SRC = (originalId, at) => ({ adapter: 'qzone', adapterVersion: '0.1.0', originalId, capturedAt: at || Date.now(), capturedBy: 'api' });
|
|
21
|
+
|
|
22
|
+
/** Qzone bkn/g_tk hash over p_skey (or skey). */
|
|
23
|
+
function gtk(s) { let h = 5381; for (let i = 0; i < String(s).length; i++) h += (h << 5) + String(s).charCodeAt(i); return h & 0x7fffffff; }
|
|
24
|
+
|
|
25
|
+
function parseCookieStr(s) { const o = {}; for (const part of String(s).split(/;\s*/)) { const i = part.indexOf('='); if (i > 0) o[part.slice(0, i).trim()] = part.slice(i + 1).trim(); } return o; }
|
|
26
|
+
function cookieHeader(ck) { return Object.entries(ck).map(([k, v]) => `${k}=${v}`).join('; '); }
|
|
27
|
+
function stripHtml(s) {
|
|
28
|
+
return String(s || '')
|
|
29
|
+
.replace(/<img[^>]*>/gi, '')
|
|
30
|
+
.replace(/<[^>]+>/g, '')
|
|
31
|
+
.replace(/ /g, ' ').replace(/&/g, '&').replace(/</g, '<').replace(/>/g, '>').replace(/"/g, '"')
|
|
32
|
+
.replace(/\s+/g, ' ').trim();
|
|
33
|
+
}
|
|
34
|
+
function beijingMs(s) { const m = /^(\d{4})-(\d{2})-(\d{2})[ T](\d{2}):(\d{2}):(\d{2})/.exec(String(s || '')); if (!m) return 0; return Date.parse(`${m[1]}-${m[2]}-${m[3]}T${m[4]}:${m[5]}:${m[6]}+08:00`) || 0; }
|
|
35
|
+
function unwrap(text) { return String(text).trim().replace(/^[\w$]+\(/, '').replace(/\);?\s*$/, ''); }
|
|
36
|
+
|
|
37
|
+
// ── 说说 (emotion_cgi_msglist_v6) → EVENT(post) ─────────────────────────────
|
|
38
|
+
function parseQzoneFeed(text) {
|
|
39
|
+
let json; try { json = JSON.parse(unwrap(text)); } catch { return { code: -1, events: [] }; }
|
|
40
|
+
if (json.code !== undefined && json.code !== 0) return { code: json.code, message: json.message, events: [] };
|
|
41
|
+
const list = json.msglist || (json.result && json.result.msglist) || [];
|
|
42
|
+
const events = [];
|
|
43
|
+
for (const it of list) {
|
|
44
|
+
const tid = it.tid || it.t1_tid || it.cellid;
|
|
45
|
+
const occurredAt = (Number(it.created_time) || 0) * 1000;
|
|
46
|
+
if (!tid || !occurredAt) continue;
|
|
47
|
+
const txt = (it.content || it.summary || '').replace(/\s+/g, ' ').trim();
|
|
48
|
+
const pics = Array.isArray(it.pic) ? it.pic.length : 0;
|
|
49
|
+
if (!txt && !pics) continue;
|
|
50
|
+
events.push({
|
|
51
|
+
type: 'event', subtype: 'post', id: `qzone:${tid}`,
|
|
52
|
+
occurredAt, actor: SELF_ID, participants: [SELF_ID],
|
|
53
|
+
content: { title: (txt || '[图片] 我的说说').slice(0, 80), text: txt || undefined },
|
|
54
|
+
source: SRC(`qzone-${tid}`, occurredAt),
|
|
55
|
+
extra: { kind: 'qzone-shuoshuo', tid, mediaCount: pics, cmtnum: it.cmtnum || 0, secret: !!it.secret },
|
|
56
|
+
ingestedAt: Date.now(),
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
return { code: 0, events, total: json.total != null ? json.total : (json.result && json.result.total) };
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// ── 留言板 (get_msgb) → EVENT(message) by the commenter ────────────────────
|
|
63
|
+
function parseGuestbook(text) {
|
|
64
|
+
let json; try { json = JSON.parse(unwrap(text)); } catch { return { code: -1, events: [], persons: [] }; }
|
|
65
|
+
if (json.code !== 0) return { code: json.code, message: json.message, events: [], persons: [] };
|
|
66
|
+
const list = (json.data && json.data.commentList) || [];
|
|
67
|
+
const events = [], persons = new Map();
|
|
68
|
+
for (const c of list) {
|
|
69
|
+
const id = c.id; const occurredAt = beijingMs(c.pubtime);
|
|
70
|
+
const txt = stripHtml(c.htmlContent || c.content || '');
|
|
71
|
+
if (!id || !occurredAt || !txt) continue;
|
|
72
|
+
const fromUin = String(c.uin || '');
|
|
73
|
+
const fromNick = c.nickname || fromUin;
|
|
74
|
+
const actor = fromUin ? `person-qq-${fromUin}` : SELF_ID;
|
|
75
|
+
if (fromUin && !persons.has(actor)) persons.set(actor, { type: 'person', subtype: 'contact', id: actor, names: fromNick !== fromUin ? [fromNick, fromUin] : [fromUin], identifiers: { qqUin: fromUin }, source: SRC(actor), ingestedAt: Date.now() });
|
|
76
|
+
events.push({
|
|
77
|
+
type: 'event', subtype: 'message', id: `qzone-msgb:${id}`,
|
|
78
|
+
occurredAt, actor, participants: [actor, SELF_ID],
|
|
79
|
+
content: { title: txt.slice(0, 80), text: txt },
|
|
80
|
+
source: SRC(`qzone-msgb-${id}`, occurredAt),
|
|
81
|
+
extra: { kind: 'qzone-guestbook', fromUin, fromNick },
|
|
82
|
+
ingestedAt: Date.now(),
|
|
83
|
+
});
|
|
84
|
+
}
|
|
85
|
+
return { code: 0, events, persons: [...persons.values()], total: json.data && json.data.total };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// ── 相册 (fcg_list_album_v3) → EVENT(media) per album ──────────────────────
|
|
89
|
+
function parseAlbums(text) {
|
|
90
|
+
let json; try { json = JSON.parse(unwrap(text)); } catch { return { code: -1, events: [] }; }
|
|
91
|
+
if (json.code !== 0) return { code: json.code, message: json.message, events: [] };
|
|
92
|
+
const list = (json.data && json.data.albumList) || [];
|
|
93
|
+
const events = [];
|
|
94
|
+
for (const a of list) {
|
|
95
|
+
if (!a.id) continue;
|
|
96
|
+
const occurredAt = (Number(a.createtime) || 0) * 1000;
|
|
97
|
+
const name = a.name || '(相册)';
|
|
98
|
+
events.push({
|
|
99
|
+
type: 'event', subtype: 'media', id: `qzone-album:${a.id}`,
|
|
100
|
+
occurredAt: occurredAt || Date.now(), actor: SELF_ID, participants: [SELF_ID],
|
|
101
|
+
content: { title: `相册:${name}(${a.total || 0} 张)`, text: a.desc || undefined },
|
|
102
|
+
source: SRC(`qzone-album-${a.id}`, occurredAt),
|
|
103
|
+
extra: { kind: 'qzone-album', albumId: a.id, photoCount: a.total || 0, desc: a.desc || '', commentCount: a.comment || 0 },
|
|
104
|
+
ingestedAt: Date.now(),
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
return { code: 0, events, total: (json.data && json.data.albumsInUser) != null ? json.data.albumsInUser : list.length };
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
function qproxy(domainPath, params) {
|
|
111
|
+
const qs = Object.entries({ format: 'json', inCharset: 'utf-8', outCharset: 'utf-8', source: 'qzone', plat: 'qzone', ...params }).map(([k, v]) => `${k}=${encodeURIComponent(v)}`).join('&');
|
|
112
|
+
return `https://user.qzone.qq.com/proxy/domain/${domainPath}?${qs}`;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
/**
|
|
116
|
+
* Collect Qzone data into a vault batch. `fetchImpl(url, opts)` is injectable
|
|
117
|
+
* (defaults to global fetch) so this is testable offline and runs in-APK.
|
|
118
|
+
* @returns {Promise<{ok, uin, events, persons, counts, reason?}>}
|
|
119
|
+
*/
|
|
120
|
+
async function collectQzone({ uin, cookie, what = ['shuoshuo'], max = 500, fetchImpl } = {}) {
|
|
121
|
+
const ck = typeof cookie === 'string' ? parseCookieStr(cookie) : (cookie || {});
|
|
122
|
+
// QQ uin cookies are `o0<uin>` — strip the o/0 prefix (uins never have leading zeros).
|
|
123
|
+
const cleanUin = (s) => String(s || '').replace(/\D/g, '').replace(/^0+/, '');
|
|
124
|
+
uin = cleanUin(uin) || cleanUin(ck.uin) || cleanUin(ck.p_uin);
|
|
125
|
+
const pskey = ck.p_skey || ck.skey;
|
|
126
|
+
if (!uin || !pskey) return { ok: false, reason: 'missing uin or p_skey', events: [], persons: [], counts: {} };
|
|
127
|
+
const _fetch = fetchImpl || (typeof fetch !== 'undefined' ? fetch : null);
|
|
128
|
+
if (!_fetch) throw new Error('qzone collect: no fetch implementation available');
|
|
129
|
+
const wantSet = new Set(Array.isArray(what) ? what : String(what).split(',').map((s) => s.trim()));
|
|
130
|
+
const g = gtk(pskey);
|
|
131
|
+
const headers = { Cookie: cookieHeader(ck), Referer: `https://user.qzone.qq.com/${uin}`, 'User-Agent': UA };
|
|
132
|
+
const get = async (url) => { const r = await _fetch(url, { headers }); return typeof r.text === 'function' ? r.text() : r; };
|
|
133
|
+
|
|
134
|
+
const events = [], persons = new Map();
|
|
135
|
+
const counts = {};
|
|
136
|
+
|
|
137
|
+
if (wantSet.has('shuoshuo')) {
|
|
138
|
+
let n = 0;
|
|
139
|
+
for (let pos = 0; pos < max; pos += 20) {
|
|
140
|
+
const r = parseQzoneFeed(await get(qproxy('taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6', { uin, hostUin: uin, num: 20, pos, g_tk: g, need_private_comment: 1 })));
|
|
141
|
+
if (r.code !== 0 || !r.events.length) break;
|
|
142
|
+
events.push(...r.events); n += r.events.length;
|
|
143
|
+
if (r.total != null && n >= r.total) break;
|
|
144
|
+
}
|
|
145
|
+
counts.shuoshuo = n;
|
|
146
|
+
}
|
|
147
|
+
if (wantSet.has('msgb')) {
|
|
148
|
+
let n = 0, total = null;
|
|
149
|
+
for (let start = 0; start < max; start += 20) {
|
|
150
|
+
const r = parseGuestbook(await get(qproxy('m.qzone.qq.com/cgi-bin/new/get_msgb', { uin, hostUin: uin, num: 20, start, g_tk: g })));
|
|
151
|
+
if (r.code !== 0) break;
|
|
152
|
+
total = r.total;
|
|
153
|
+
if (!r.events.length) break;
|
|
154
|
+
events.push(...r.events); for (const p of r.persons) persons.set(p.id, p); n += r.events.length;
|
|
155
|
+
if (total != null && n >= total) break;
|
|
156
|
+
}
|
|
157
|
+
counts.msgb = n;
|
|
158
|
+
}
|
|
159
|
+
if (wantSet.has('album')) {
|
|
160
|
+
const r = parseAlbums(await get(qproxy('photo.qzone.qq.com/fcgi-bin/fcg_list_album_v3', { g_tk: g, hostUin: uin, uin, mode: 2, pageStart: 0, pageNum: 200 })));
|
|
161
|
+
if (r.code === 0) { events.push(...r.events); counts.album = r.events.length; }
|
|
162
|
+
else counts.album = 0;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
return { ok: true, uin, events, persons: [...persons.values()], counts };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
module.exports = { gtk, parseCookieStr, stripHtml, parseQzoneFeed, parseGuestbook, parseAlbums, collectQzone, SELF_ID };
|
|
@@ -84,9 +84,27 @@ function deriveAndDecrypt(raw, passphrases, rawKeys) {
|
|
|
84
84
|
* Parse a DECRYPTED EnMicroMsg.db → vault events (wechat adapter shape).
|
|
85
85
|
* @param Database better-sqlite3 ctor (injected). @param self the user's wxid.
|
|
86
86
|
*/
|
|
87
|
-
|
|
87
|
+
// Self is ALWAYS the stable canonical id (mirrors adapters/wechat/normalize.js)
|
|
88
|
+
// so analysis skills exclude it from contact rankings and it never fragments.
|
|
89
|
+
const SELF_ID = 'person-wechat-self';
|
|
90
|
+
const SRC = (originalId, at) => ({
|
|
91
|
+
adapter: 'wechat', adapterVersion: '0.1.0',
|
|
92
|
+
originalId: originalId || `wechat-${at || 0}`,
|
|
93
|
+
capturedAt: at || Date.now(), capturedBy: 'sqlite',
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Parse a decrypted EnMicroMsg.db into a vault batch. Returns
|
|
98
|
+
* `{ events, persons, topics }` so the on-device analysis skills get the rich
|
|
99
|
+
* entity graph (named contacts → relations; group topics → interests; clean
|
|
100
|
+
* titles → timeline) instead of bare message events. `self` is ignored — the
|
|
101
|
+
* sender of an outbound message maps to the canonical SELF_ID.
|
|
102
|
+
*/
|
|
103
|
+
function parseEvents(Database, dbPath, _self) {
|
|
88
104
|
const src = new Database(dbPath, { readonly: true });
|
|
89
105
|
const events = [];
|
|
106
|
+
const persons = new Map(); // id -> person record
|
|
107
|
+
const topics = new Map(); // id -> topic record
|
|
90
108
|
try {
|
|
91
109
|
const nameOf = new Map();
|
|
92
110
|
try {
|
|
@@ -94,6 +112,17 @@ function parseEvents(Database, dbPath, self) {
|
|
|
94
112
|
nameOf.set(r.username, (r.conRemark && r.conRemark.trim()) || r.nickname || r.username);
|
|
95
113
|
}
|
|
96
114
|
} catch { /* contacts optional */ }
|
|
115
|
+
const addPerson = (wxid) => {
|
|
116
|
+
if (!wxid) return;
|
|
117
|
+
const id = `person-wechat-${wxid}`;
|
|
118
|
+
if (persons.has(id)) return;
|
|
119
|
+
const nm = nameOf.get(wxid);
|
|
120
|
+
// names[0] = display name (or wxid when unresolved); keep wxid as alias.
|
|
121
|
+
const names = nm && nm !== wxid ? [nm, wxid] : [wxid];
|
|
122
|
+
// Unique originalId per person — a shared originalId collapses every row
|
|
123
|
+
// into one via the persons (adapter, originalId) unique constraint.
|
|
124
|
+
persons.set(id, { type: 'person', subtype: 'contact', id, names, identifiers: { wechatId: wxid }, source: SRC(id), ingestedAt: Date.now() });
|
|
125
|
+
};
|
|
97
126
|
const rows = src.prepare(
|
|
98
127
|
'SELECT msgId,type,isSend,createTime,talker,content FROM message ' +
|
|
99
128
|
"WHERE type=1 ORDER BY createTime DESC LIMIT 5000",
|
|
@@ -101,7 +130,7 @@ function parseEvents(Database, dbPath, self) {
|
|
|
101
130
|
for (const r of rows) {
|
|
102
131
|
const isGroup = /@chatroom$/.test(r.talker || '');
|
|
103
132
|
let text = r.content || '';
|
|
104
|
-
let senderWxid = r.isSend ?
|
|
133
|
+
let senderWxid = r.isSend ? null : r.talker; // null = self (outbound)
|
|
105
134
|
if (isGroup && !r.isSend) {
|
|
106
135
|
const c = text.indexOf(':');
|
|
107
136
|
if (c > 0) { senderWxid = text.slice(0, c); text = text.slice(c + 1).replace(/^\n/, '').trim(); }
|
|
@@ -110,25 +139,134 @@ function parseEvents(Database, dbPath, self) {
|
|
|
110
139
|
const occurredAt = Number(r.createTime) || 0; // already ms in WeChat
|
|
111
140
|
if (!occurredAt) continue;
|
|
112
141
|
const peer = String(r.talker || '');
|
|
113
|
-
const actor = `person-wechat-${senderWxid ||
|
|
142
|
+
const actor = r.isSend ? SELF_ID : `person-wechat-${senderWxid || peer}`;
|
|
143
|
+
if (!r.isSend) addPerson(senderWxid || peer);
|
|
114
144
|
const participants = [actor];
|
|
115
|
-
|
|
145
|
+
let topicId;
|
|
146
|
+
if (isGroup) {
|
|
147
|
+
topicId = `group-wechat-${peer}`;
|
|
148
|
+
participants.push(topicId);
|
|
149
|
+
if (!topics.has(topicId)) {
|
|
150
|
+
topics.set(topicId, { type: 'topic', id: topicId, name: nameOf.get(peer) || peer.replace('@chatroom', ''), source: SRC(topicId), ingestedAt: Date.now() });
|
|
151
|
+
}
|
|
152
|
+
} else {
|
|
153
|
+
addPerson(peer);
|
|
154
|
+
participants.push(`person-wechat-${peer}`);
|
|
155
|
+
}
|
|
156
|
+
const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
|
|
116
157
|
events.push({
|
|
117
158
|
type: 'event', subtype: 'message', id: `wechat:${r.msgId}`,
|
|
118
159
|
occurredAt, actor, participants,
|
|
119
|
-
content: { text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
|
|
120
|
-
topics:
|
|
121
|
-
source:
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
160
|
+
content: { title: title || '(无内容)', text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
|
|
161
|
+
topics: topicId ? [topicId] : undefined,
|
|
162
|
+
source: SRC(String(r.msgId), occurredAt),
|
|
163
|
+
extra: { isSend: !!r.isSend, talker: r.talker },
|
|
164
|
+
ingestedAt: Date.now(),
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
persons.set(SELF_ID, { type: 'person', subtype: 'contact', id: SELF_ID, names: ['我(微信)'], source: SRC(SELF_ID), ingestedAt: Date.now() });
|
|
168
|
+
} finally {
|
|
169
|
+
src.close();
|
|
170
|
+
}
|
|
171
|
+
return { events, persons: [...persons.values()], topics: [...topics.values()] };
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// ── 朋友圈 (SnsMicroMsg.db, PLAINTEXT — no SQLCipher) ───────────────────────
|
|
175
|
+
// Unlike EnMicroMsg.db, SnsMicroMsg.db is NOT encrypted (header = "SQLite
|
|
176
|
+
// format 3\0"), so it opens directly. SnsInfo.content is a protobuf
|
|
177
|
+
// TimelineObject: the post text is top-level field 5 (contentDesc), media are
|
|
178
|
+
// qpic.cn URLs embedded in the blob, and the poster nickname lives in attrBuf.
|
|
179
|
+
// SnsInfo.createTime is epoch SECONDS. Verified on chopin (WeChat 8.0.74):
|
|
180
|
+
// account 60e2c317… had 2931 posts (2623 with text) readable without any key.
|
|
181
|
+
function _pbReadVarint(buf, pos) {
|
|
182
|
+
let shift = 0, result = 0n;
|
|
183
|
+
while (pos < buf.length) {
|
|
184
|
+
const b = buf[pos++];
|
|
185
|
+
result |= BigInt(b & 0x7f) << BigInt(shift);
|
|
186
|
+
if (!(b & 0x80)) break;
|
|
187
|
+
shift += 7;
|
|
188
|
+
}
|
|
189
|
+
return [result, pos];
|
|
190
|
+
}
|
|
191
|
+
// Walk top-level protobuf fields → { fieldNum: [Buffer|BigInt, …] }. Best-effort
|
|
192
|
+
// (stops on malformed input); length-delimited values are returned as slices.
|
|
193
|
+
function _pbFields(buf) {
|
|
194
|
+
const out = {};
|
|
195
|
+
let pos = 0;
|
|
196
|
+
while (pos < buf.length) {
|
|
197
|
+
let tag; [tag, pos] = _pbReadVarint(buf, pos);
|
|
198
|
+
const field = Number(tag >> 3n), wire = Number(tag & 7n);
|
|
199
|
+
if (field === 0) break;
|
|
200
|
+
let val;
|
|
201
|
+
if (wire === 0) { [val, pos] = _pbReadVarint(buf, pos); }
|
|
202
|
+
else if (wire === 2) { let len; [len, pos] = _pbReadVarint(buf, pos); len = Number(len); if (len < 0 || pos + len > buf.length) break; val = buf.subarray(pos, pos + len); pos += len; }
|
|
203
|
+
else if (wire === 1) { val = buf.subarray(pos, pos + 8); pos += 8; }
|
|
204
|
+
else if (wire === 5) { val = buf.subarray(pos, pos + 4); pos += 4; }
|
|
205
|
+
else break;
|
|
206
|
+
(out[field] ||= []).push(val);
|
|
207
|
+
}
|
|
208
|
+
return out;
|
|
209
|
+
}
|
|
210
|
+
function snsPostText(contentBuf) {
|
|
211
|
+
try { const f = _pbFields(contentBuf); if (f[5] && f[5].length) { const t = f[5][0].toString('utf8').trim(); if (t) return t; } } catch { /* not a TimelineObject */ }
|
|
212
|
+
return '';
|
|
213
|
+
}
|
|
214
|
+
function snsMediaUrls(contentBuf) {
|
|
215
|
+
const s = contentBuf.toString('latin1'); const urls = new Set();
|
|
216
|
+
const re = /https?:\/\/[A-Za-z0-9._-]*qpic\.cn[A-Za-z0-9._\-/?=&%]+/g; let m;
|
|
217
|
+
while ((m = re.exec(s))) urls.add(m[0]);
|
|
218
|
+
return [...urls];
|
|
219
|
+
}
|
|
220
|
+
function snsNickname(attrBuf, wxid) {
|
|
221
|
+
try { const f = _pbFields(attrBuf); for (const vals of Object.values(f)) for (const v of vals) { if (Buffer.isBuffer(v)) { const s = v.toString('utf8'); if (s && s !== wxid && !/^wxid_/.test(s) && /[一-鿿A-Za-z]/.test(s) && s.length <= 40 && !/[\x00-\x08]/.test(s)) return s; } } } catch { /* ignore */ }
|
|
222
|
+
return '';
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* Parse a PLAINTEXT SnsMicroMsg.db → 朋友圈 vault batch { events, persons, topics }.
|
|
227
|
+
* Each SnsInfo row → EVENT(post) attributed to the poster. `selfWxid` (optional)
|
|
228
|
+
* maps the user's own posts to SELF_ID; `nameMap` (wxid → displayName, e.g. from
|
|
229
|
+
* the matching account's decrypted rcontact) overrides attrBuf nicknames.
|
|
230
|
+
*/
|
|
231
|
+
function parseSnsEvents(Database, dbPath, { selfWxid, nameMap } = {}) {
|
|
232
|
+
const src = new Database(dbPath, { readonly: true });
|
|
233
|
+
const events = [];
|
|
234
|
+
const persons = new Map();
|
|
235
|
+
const names = nameMap instanceof Map ? nameMap : new Map(Object.entries(nameMap || {}));
|
|
236
|
+
try {
|
|
237
|
+
let rows = [];
|
|
238
|
+
try { rows = src.prepare('SELECT snsId,userName,createTime,type,content,attrBuf FROM SnsInfo ORDER BY createTime DESC LIMIT 5000').all(); }
|
|
239
|
+
catch { return { events: [], persons: [], topics: [] }; } // no SnsInfo table
|
|
240
|
+
for (const r of rows) {
|
|
241
|
+
const wxid = String(r.userName || '');
|
|
242
|
+
if (!wxid) continue;
|
|
243
|
+
const text = r.content ? snsPostText(r.content) : '';
|
|
244
|
+
const media = r.content ? snsMediaUrls(r.content) : [];
|
|
245
|
+
if (!text && !media.length) continue; // skip empty / pure-ad shells
|
|
246
|
+
const occurredAt = (Number(r.createTime) || 0) * 1000; // SnsInfo.createTime is seconds
|
|
247
|
+
if (!occurredAt) continue;
|
|
248
|
+
const isSelf = !!(selfWxid && wxid === selfWxid);
|
|
249
|
+
const nick = names.get(wxid) || (r.attrBuf ? snsNickname(r.attrBuf, wxid) : '') || wxid;
|
|
250
|
+
const actor = isSelf ? SELF_ID : `person-wechat-${wxid}`;
|
|
251
|
+
if (!isSelf && !persons.has(actor)) {
|
|
252
|
+
const nm = nick && nick !== wxid ? [nick, wxid] : [wxid];
|
|
253
|
+
persons.set(actor, { type: 'person', subtype: 'contact', id: actor, names: nm, identifiers: { wechatId: wxid }, source: SRC(actor), ingestedAt: Date.now() });
|
|
254
|
+
}
|
|
255
|
+
const title = (text || `[图片] ${nick}的朋友圈`).replace(/\s+/g, ' ').trim().slice(0, 80);
|
|
256
|
+
events.push({
|
|
257
|
+
type: 'event', subtype: 'post', id: `wechat-sns:${r.snsId}`,
|
|
258
|
+
occurredAt, actor, participants: [actor],
|
|
259
|
+
content: { title: title || '(朋友圈)', text: text || undefined },
|
|
260
|
+
source: SRC(`sns-${r.snsId}`, occurredAt),
|
|
261
|
+
extra: { kind: 'moment', isSelf, poster: nick, mediaCount: media.length, media: media.slice(0, 9) },
|
|
125
262
|
ingestedAt: Date.now(),
|
|
126
263
|
});
|
|
127
264
|
}
|
|
265
|
+
if (selfWxid) persons.set(SELF_ID, { type: 'person', subtype: 'contact', id: SELF_ID, names: ['我(微信)'], source: SRC(SELF_ID), ingestedAt: Date.now() });
|
|
128
266
|
} finally {
|
|
129
267
|
src.close();
|
|
130
268
|
}
|
|
131
|
-
return events;
|
|
269
|
+
return { events, persons: [...persons.values()], topics: [] };
|
|
132
270
|
}
|
|
133
271
|
|
|
134
|
-
module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents };
|
|
272
|
+
module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents, parseSnsEvents, snsPostText, snsMediaUrls, snsNickname };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.36",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|
|
@@ -76,6 +76,7 @@
|
|
|
76
76
|
"./forensics/salvage-ingest": "./lib/forensics/salvage-ingest.js",
|
|
77
77
|
"./forensics/qq-nt-collect": "./lib/forensics/qq-nt-collect.js",
|
|
78
78
|
"./forensics/wechat-collect": "./lib/forensics/wechat-collect.js",
|
|
79
|
+
"./forensics/qzone-collect": "./lib/forensics/qzone-collect.js",
|
|
79
80
|
"./forensics/plaintext-db-collect": "./lib/forensics/plaintext-db-collect.js"
|
|
80
81
|
},
|
|
81
82
|
"scripts": {
|