@chainlesschain/personal-data-hub 0.4.34 → 0.4.36

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -137,16 +137,50 @@ function bodyText(blob) {
137
137
  * @param self the user's own QQ number (attribution fallback)
138
138
  * @returns {Array} event objects ready for vault.putEvent
139
139
  */
140
- function parseEvents(Database, dbPath, self) {
140
+ const SELF_QQ_ID = 'person-qq-self';
141
+ const SRC_QQ = (originalId, at) => ({
142
+ adapter: 'qq-pc', adapterVersion: '0.1.0',
143
+ originalId: originalId || `qq-${at || 0}`,
144
+ capturedAt: at || Date.now(), capturedBy: 'sqlite',
145
+ });
146
+
147
+ /**
148
+ * Parse a decrypted QQNT nt_msg.db into a vault batch `{events, persons, topics}`
149
+ * (mirrors wechat-collect): named contacts (sender nickname 40090), canonical
150
+ * self (sender uid 40020 === matched account uid → person-qq-self), group
151
+ * topics, clean titles, and a UNIQUE source.originalId per person/topic (a
152
+ * shared one collapses every row via the persons (adapter, originalId) index).
153
+ *
154
+ * @param opts {string|{selfUid?:string, self?:string}} — selfUid = the matched
155
+ * account uid (from deriveAndDecrypt) for reliable self attribution; a bare
156
+ * string is the legacy own-QQ-number fallback.
157
+ */
158
+ function parseEvents(Database, dbPath, opts) {
159
+ const selfUid = opts && typeof opts === 'object' ? opts.selfUid || '' : '';
160
+ const selfQQ = opts && typeof opts === 'object' ? opts.self || '' : opts || '';
141
161
  const src = new Database(dbPath, { readonly: true });
142
162
  const events = [];
163
+ const persons = new Map();
164
+ const topics = new Map();
143
165
  const num = (v) => (typeof v === 'bigint' ? Number(v) : v);
166
+ const addPerson = (qq, uid, nick) => {
167
+ if (!qq) return;
168
+ const id = `person-qq-${qq}`;
169
+ if (persons.has(id)) return;
170
+ const nm = nick && nick.trim() && nick.trim() !== qq ? nick.trim() : null;
171
+ persons.set(id, {
172
+ type: 'person', subtype: 'contact', id,
173
+ names: nm ? [nm, qq] : [qq],
174
+ identifiers: { qq, ...(uid ? { qqUid: uid } : {}) },
175
+ source: SRC_QQ(id), ingestedAt: Date.now(),
176
+ });
177
+ };
144
178
  const ingestTable = (table, isGroup) => {
145
179
  let rows;
146
180
  try {
147
181
  rows = src.prepare(
148
182
  `SELECT [40001] msgId,[40020] uid,[40011] type,[40033] sender,[40021] peer,` +
149
- `[40050] t,[40800] body FROM ${table}`,
183
+ `[40050] t,[40090] nick,[40800] body FROM ${table}`,
150
184
  ).safeIntegers().all();
151
185
  } catch { return; }
152
186
  for (const r of rows) {
@@ -160,20 +194,32 @@ function parseEvents(Database, dbPath, self) {
160
194
  const msgId = typeof r.msgId === 'bigint' ? r.msgId.toString() : String(r.msgId);
161
195
  const sender = String(num(r.sender) || '');
162
196
  const peer = String(num(r.peer) || '');
197
+ const uid = r.uid ? String(r.uid) : '';
198
+ const nick = r.nick ? String(r.nick) : '';
163
199
  const occurredAt = num(r.t) * 1000;
164
200
  if (!occurredAt) continue;
165
- const actor = sender ? `person-qq-${sender}` : `person-qq-${self}`;
201
+ // Self = the sender's uid is the matched account uid. Map to canonical
202
+ // person-qq-self so analysis excludes the owner from contact rankings.
203
+ const isSelf = !!(selfUid && uid && uid === selfUid);
204
+ const actor = isSelf ? SELF_QQ_ID : (sender ? `person-qq-${sender}` : `person-qq-${selfQQ || 'unknown'}`);
205
+ if (!isSelf && sender) addPerson(sender, uid, nick);
166
206
  const participants = [actor];
167
- participants.push(isGroup ? `group-qq-${peer}` : `person-qq-${peer}`);
207
+ let topicId;
208
+ if (isGroup) {
209
+ topicId = `group-qq-${peer}`;
210
+ participants.push(topicId);
211
+ if (!topics.has(topicId)) topics.set(topicId, { type: 'topic', id: topicId, name: peer, source: SRC_QQ(topicId), ingestedAt: Date.now() });
212
+ } else {
213
+ participants.push(`person-qq-${peer}`);
214
+ }
215
+ const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
168
216
  events.push({
169
217
  type: 'event', subtype: 'message', id: `qq:${table}:${msgId}`,
170
218
  occurredAt, actor, participants,
171
- content: { text: isGroup ? `[群${peer}] ${text}` : text },
172
- topics: isGroup ? [`group-qq-${peer}`] : undefined,
173
- source: {
174
- adapter: 'qq-pc', adapterVersion: '0.1.0', originalId: `${table}:${msgId}`,
175
- capturedAt: occurredAt, capturedBy: 'sqlite',
176
- },
219
+ content: { title: title || '(无内容)', text: isGroup ? `[群${peer}] ${text}` : text },
220
+ topics: topicId ? [topicId] : undefined,
221
+ source: SRC_QQ(`${table}:${msgId}`, occurredAt),
222
+ extra: { isSelf, peer },
177
223
  ingestedAt: Date.now(),
178
224
  });
179
225
  }
@@ -181,10 +227,11 @@ function parseEvents(Database, dbPath, self) {
181
227
  try {
182
228
  ingestTable('c2c_msg_table', false);
183
229
  ingestTable('group_msg_table', true);
230
+ persons.set(SELF_QQ_ID, { type: 'person', subtype: 'contact', id: SELF_QQ_ID, names: ['我(QQ)'], source: SRC_QQ(SELF_QQ_ID), ingestedAt: Date.now() });
184
231
  } finally {
185
232
  src.close();
186
233
  }
187
- return events;
234
+ return { events, persons: [...persons.values()], topics: [...topics.values()] };
188
235
  }
189
236
 
190
237
  module.exports = { extractRand, headerHmac, deriveAndDecrypt, bodyText, parseEvents };
@@ -0,0 +1,168 @@
1
+ 'use strict';
2
+ /**
3
+ * qzone-collect — QQ空间 (Qzone) collector core: 说说 / 留言板 / 相册 → vault events.
4
+ *
5
+ * Qzone has NO local browsable DB (the QQNT databases only cache per-contact
6
+ * "latest feed" preview snippets), so this is the API path: Qzone CGI endpoints
7
+ * authed with the account's qzone-domain `p_skey` + `uin` + a `g_tk` token
8
+ * derived from p_skey (the bkn hash). Pure Node — the only side effect is the
9
+ * caller-supplied `fetchImpl` (defaults to global fetch), so the parsers are
10
+ * unit-testable and the same core runs on PC (`cc hub collect-qzone --cookie`)
11
+ * and in-APK (the Android app captures the cookie via a WebView and feeds it in).
12
+ *
13
+ * Cookie note: the base `.qq.com` skey is rejected by Qzone ("请先登录空间") —
14
+ * the qzone-domain `p_skey` is required (a browser login to user.qzone.qq.com,
15
+ * or the in-app WebView, yields it). Extracted from
16
+ * scripts/android/pdh-qzone-collect.mjs (behaviour identical).
17
+ */
18
+ const SELF_ID = 'person-qq-self';
19
+ const UA = 'Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120 Mobile Safari/537.36';
20
+ const SRC = (originalId, at) => ({ adapter: 'qzone', adapterVersion: '0.1.0', originalId, capturedAt: at || Date.now(), capturedBy: 'api' });
21
+
22
+ /** Qzone bkn/g_tk hash over p_skey (or skey). */
23
+ function gtk(s) { let h = 5381; for (let i = 0; i < String(s).length; i++) h += (h << 5) + String(s).charCodeAt(i); return h & 0x7fffffff; }
24
+
25
+ function parseCookieStr(s) { const o = {}; for (const part of String(s).split(/;\s*/)) { const i = part.indexOf('='); if (i > 0) o[part.slice(0, i).trim()] = part.slice(i + 1).trim(); } return o; }
26
+ function cookieHeader(ck) { return Object.entries(ck).map(([k, v]) => `${k}=${v}`).join('; '); }
27
+ function stripHtml(s) {
28
+ return String(s || '')
29
+ .replace(/<img[^>]*>/gi, '')
30
+ .replace(/<[^>]+>/g, '')
31
+ .replace(/&nbsp;/g, ' ').replace(/&amp;/g, '&').replace(/&lt;/g, '<').replace(/&gt;/g, '>').replace(/&quot;/g, '"')
32
+ .replace(/\s+/g, ' ').trim();
33
+ }
34
+ function beijingMs(s) { const m = /^(\d{4})-(\d{2})-(\d{2})[ T](\d{2}):(\d{2}):(\d{2})/.exec(String(s || '')); if (!m) return 0; return Date.parse(`${m[1]}-${m[2]}-${m[3]}T${m[4]}:${m[5]}:${m[6]}+08:00`) || 0; }
35
+ function unwrap(text) { return String(text).trim().replace(/^[\w$]+\(/, '').replace(/\);?\s*$/, ''); }
36
+
37
+ // ── 说说 (emotion_cgi_msglist_v6) → EVENT(post) ─────────────────────────────
38
+ function parseQzoneFeed(text) {
39
+ let json; try { json = JSON.parse(unwrap(text)); } catch { return { code: -1, events: [] }; }
40
+ if (json.code !== undefined && json.code !== 0) return { code: json.code, message: json.message, events: [] };
41
+ const list = json.msglist || (json.result && json.result.msglist) || [];
42
+ const events = [];
43
+ for (const it of list) {
44
+ const tid = it.tid || it.t1_tid || it.cellid;
45
+ const occurredAt = (Number(it.created_time) || 0) * 1000;
46
+ if (!tid || !occurredAt) continue;
47
+ const txt = (it.content || it.summary || '').replace(/\s+/g, ' ').trim();
48
+ const pics = Array.isArray(it.pic) ? it.pic.length : 0;
49
+ if (!txt && !pics) continue;
50
+ events.push({
51
+ type: 'event', subtype: 'post', id: `qzone:${tid}`,
52
+ occurredAt, actor: SELF_ID, participants: [SELF_ID],
53
+ content: { title: (txt || '[图片] 我的说说').slice(0, 80), text: txt || undefined },
54
+ source: SRC(`qzone-${tid}`, occurredAt),
55
+ extra: { kind: 'qzone-shuoshuo', tid, mediaCount: pics, cmtnum: it.cmtnum || 0, secret: !!it.secret },
56
+ ingestedAt: Date.now(),
57
+ });
58
+ }
59
+ return { code: 0, events, total: json.total != null ? json.total : (json.result && json.result.total) };
60
+ }
61
+
62
+ // ── 留言板 (get_msgb) → EVENT(message) by the commenter ────────────────────
63
+ function parseGuestbook(text) {
64
+ let json; try { json = JSON.parse(unwrap(text)); } catch { return { code: -1, events: [], persons: [] }; }
65
+ if (json.code !== 0) return { code: json.code, message: json.message, events: [], persons: [] };
66
+ const list = (json.data && json.data.commentList) || [];
67
+ const events = [], persons = new Map();
68
+ for (const c of list) {
69
+ const id = c.id; const occurredAt = beijingMs(c.pubtime);
70
+ const txt = stripHtml(c.htmlContent || c.content || '');
71
+ if (!id || !occurredAt || !txt) continue;
72
+ const fromUin = String(c.uin || '');
73
+ const fromNick = c.nickname || fromUin;
74
+ const actor = fromUin ? `person-qq-${fromUin}` : SELF_ID;
75
+ if (fromUin && !persons.has(actor)) persons.set(actor, { type: 'person', subtype: 'contact', id: actor, names: fromNick !== fromUin ? [fromNick, fromUin] : [fromUin], identifiers: { qqUin: fromUin }, source: SRC(actor), ingestedAt: Date.now() });
76
+ events.push({
77
+ type: 'event', subtype: 'message', id: `qzone-msgb:${id}`,
78
+ occurredAt, actor, participants: [actor, SELF_ID],
79
+ content: { title: txt.slice(0, 80), text: txt },
80
+ source: SRC(`qzone-msgb-${id}`, occurredAt),
81
+ extra: { kind: 'qzone-guestbook', fromUin, fromNick },
82
+ ingestedAt: Date.now(),
83
+ });
84
+ }
85
+ return { code: 0, events, persons: [...persons.values()], total: json.data && json.data.total };
86
+ }
87
+
88
+ // ── 相册 (fcg_list_album_v3) → EVENT(media) per album ──────────────────────
89
+ function parseAlbums(text) {
90
+ let json; try { json = JSON.parse(unwrap(text)); } catch { return { code: -1, events: [] }; }
91
+ if (json.code !== 0) return { code: json.code, message: json.message, events: [] };
92
+ const list = (json.data && json.data.albumList) || [];
93
+ const events = [];
94
+ for (const a of list) {
95
+ if (!a.id) continue;
96
+ const occurredAt = (Number(a.createtime) || 0) * 1000;
97
+ const name = a.name || '(相册)';
98
+ events.push({
99
+ type: 'event', subtype: 'media', id: `qzone-album:${a.id}`,
100
+ occurredAt: occurredAt || Date.now(), actor: SELF_ID, participants: [SELF_ID],
101
+ content: { title: `相册:${name}(${a.total || 0} 张)`, text: a.desc || undefined },
102
+ source: SRC(`qzone-album-${a.id}`, occurredAt),
103
+ extra: { kind: 'qzone-album', albumId: a.id, photoCount: a.total || 0, desc: a.desc || '', commentCount: a.comment || 0 },
104
+ ingestedAt: Date.now(),
105
+ });
106
+ }
107
+ return { code: 0, events, total: (json.data && json.data.albumsInUser) != null ? json.data.albumsInUser : list.length };
108
+ }
109
+
110
+ function qproxy(domainPath, params) {
111
+ const qs = Object.entries({ format: 'json', inCharset: 'utf-8', outCharset: 'utf-8', source: 'qzone', plat: 'qzone', ...params }).map(([k, v]) => `${k}=${encodeURIComponent(v)}`).join('&');
112
+ return `https://user.qzone.qq.com/proxy/domain/${domainPath}?${qs}`;
113
+ }
114
+
115
+ /**
116
+ * Collect Qzone data into a vault batch. `fetchImpl(url, opts)` is injectable
117
+ * (defaults to global fetch) so this is testable offline and runs in-APK.
118
+ * @returns {Promise<{ok, uin, events, persons, counts, reason?}>}
119
+ */
120
+ async function collectQzone({ uin, cookie, what = ['shuoshuo'], max = 500, fetchImpl } = {}) {
121
+ const ck = typeof cookie === 'string' ? parseCookieStr(cookie) : (cookie || {});
122
+ // QQ uin cookies are `o0<uin>` — strip the o/0 prefix (uins never have leading zeros).
123
+ const cleanUin = (s) => String(s || '').replace(/\D/g, '').replace(/^0+/, '');
124
+ uin = cleanUin(uin) || cleanUin(ck.uin) || cleanUin(ck.p_uin);
125
+ const pskey = ck.p_skey || ck.skey;
126
+ if (!uin || !pskey) return { ok: false, reason: 'missing uin or p_skey', events: [], persons: [], counts: {} };
127
+ const _fetch = fetchImpl || (typeof fetch !== 'undefined' ? fetch : null);
128
+ if (!_fetch) throw new Error('qzone collect: no fetch implementation available');
129
+ const wantSet = new Set(Array.isArray(what) ? what : String(what).split(',').map((s) => s.trim()));
130
+ const g = gtk(pskey);
131
+ const headers = { Cookie: cookieHeader(ck), Referer: `https://user.qzone.qq.com/${uin}`, 'User-Agent': UA };
132
+ const get = async (url) => { const r = await _fetch(url, { headers }); return typeof r.text === 'function' ? r.text() : r; };
133
+
134
+ const events = [], persons = new Map();
135
+ const counts = {};
136
+
137
+ if (wantSet.has('shuoshuo')) {
138
+ let n = 0;
139
+ for (let pos = 0; pos < max; pos += 20) {
140
+ const r = parseQzoneFeed(await get(qproxy('taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6', { uin, hostUin: uin, num: 20, pos, g_tk: g, need_private_comment: 1 })));
141
+ if (r.code !== 0 || !r.events.length) break;
142
+ events.push(...r.events); n += r.events.length;
143
+ if (r.total != null && n >= r.total) break;
144
+ }
145
+ counts.shuoshuo = n;
146
+ }
147
+ if (wantSet.has('msgb')) {
148
+ let n = 0, total = null;
149
+ for (let start = 0; start < max; start += 20) {
150
+ const r = parseGuestbook(await get(qproxy('m.qzone.qq.com/cgi-bin/new/get_msgb', { uin, hostUin: uin, num: 20, start, g_tk: g })));
151
+ if (r.code !== 0) break;
152
+ total = r.total;
153
+ if (!r.events.length) break;
154
+ events.push(...r.events); for (const p of r.persons) persons.set(p.id, p); n += r.events.length;
155
+ if (total != null && n >= total) break;
156
+ }
157
+ counts.msgb = n;
158
+ }
159
+ if (wantSet.has('album')) {
160
+ const r = parseAlbums(await get(qproxy('photo.qzone.qq.com/fcgi-bin/fcg_list_album_v3', { g_tk: g, hostUin: uin, uin, mode: 2, pageStart: 0, pageNum: 200 })));
161
+ if (r.code === 0) { events.push(...r.events); counts.album = r.events.length; }
162
+ else counts.album = 0;
163
+ }
164
+
165
+ return { ok: true, uin, events, persons: [...persons.values()], counts };
166
+ }
167
+
168
+ module.exports = { gtk, parseCookieStr, stripHtml, parseQzoneFeed, parseGuestbook, parseAlbums, collectQzone, SELF_ID };
@@ -84,9 +84,27 @@ function deriveAndDecrypt(raw, passphrases, rawKeys) {
84
84
  * Parse a DECRYPTED EnMicroMsg.db → vault events (wechat adapter shape).
85
85
  * @param Database better-sqlite3 ctor (injected). @param self the user's wxid.
86
86
  */
87
- function parseEvents(Database, dbPath, self) {
87
+ // Self is ALWAYS the stable canonical id (mirrors adapters/wechat/normalize.js)
88
+ // so analysis skills exclude it from contact rankings and it never fragments.
89
+ const SELF_ID = 'person-wechat-self';
90
+ const SRC = (originalId, at) => ({
91
+ adapter: 'wechat', adapterVersion: '0.1.0',
92
+ originalId: originalId || `wechat-${at || 0}`,
93
+ capturedAt: at || Date.now(), capturedBy: 'sqlite',
94
+ });
95
+
96
+ /**
97
+ * Parse a decrypted EnMicroMsg.db into a vault batch. Returns
98
+ * `{ events, persons, topics }` so the on-device analysis skills get the rich
99
+ * entity graph (named contacts → relations; group topics → interests; clean
100
+ * titles → timeline) instead of bare message events. `self` is ignored — the
101
+ * sender of an outbound message maps to the canonical SELF_ID.
102
+ */
103
+ function parseEvents(Database, dbPath, _self) {
88
104
  const src = new Database(dbPath, { readonly: true });
89
105
  const events = [];
106
+ const persons = new Map(); // id -> person record
107
+ const topics = new Map(); // id -> topic record
90
108
  try {
91
109
  const nameOf = new Map();
92
110
  try {
@@ -94,6 +112,17 @@ function parseEvents(Database, dbPath, self) {
94
112
  nameOf.set(r.username, (r.conRemark && r.conRemark.trim()) || r.nickname || r.username);
95
113
  }
96
114
  } catch { /* contacts optional */ }
115
+ const addPerson = (wxid) => {
116
+ if (!wxid) return;
117
+ const id = `person-wechat-${wxid}`;
118
+ if (persons.has(id)) return;
119
+ const nm = nameOf.get(wxid);
120
+ // names[0] = display name (or wxid when unresolved); keep wxid as alias.
121
+ const names = nm && nm !== wxid ? [nm, wxid] : [wxid];
122
+ // Unique originalId per person — a shared originalId collapses every row
123
+ // into one via the persons (adapter, originalId) unique constraint.
124
+ persons.set(id, { type: 'person', subtype: 'contact', id, names, identifiers: { wechatId: wxid }, source: SRC(id), ingestedAt: Date.now() });
125
+ };
97
126
  const rows = src.prepare(
98
127
  'SELECT msgId,type,isSend,createTime,talker,content FROM message ' +
99
128
  "WHERE type=1 ORDER BY createTime DESC LIMIT 5000",
@@ -101,7 +130,7 @@ function parseEvents(Database, dbPath, self) {
101
130
  for (const r of rows) {
102
131
  const isGroup = /@chatroom$/.test(r.talker || '');
103
132
  let text = r.content || '';
104
- let senderWxid = r.isSend ? self : r.talker;
133
+ let senderWxid = r.isSend ? null : r.talker; // null = self (outbound)
105
134
  if (isGroup && !r.isSend) {
106
135
  const c = text.indexOf(':');
107
136
  if (c > 0) { senderWxid = text.slice(0, c); text = text.slice(c + 1).replace(/^\n/, '').trim(); }
@@ -110,25 +139,134 @@ function parseEvents(Database, dbPath, self) {
110
139
  const occurredAt = Number(r.createTime) || 0; // already ms in WeChat
111
140
  if (!occurredAt) continue;
112
141
  const peer = String(r.talker || '');
113
- const actor = `person-wechat-${senderWxid || self}`;
142
+ const actor = r.isSend ? SELF_ID : `person-wechat-${senderWxid || peer}`;
143
+ if (!r.isSend) addPerson(senderWxid || peer);
114
144
  const participants = [actor];
115
- participants.push(isGroup ? `group-wechat-${peer}` : `person-wechat-${peer}`);
145
+ let topicId;
146
+ if (isGroup) {
147
+ topicId = `group-wechat-${peer}`;
148
+ participants.push(topicId);
149
+ if (!topics.has(topicId)) {
150
+ topics.set(topicId, { type: 'topic', id: topicId, name: nameOf.get(peer) || peer.replace('@chatroom', ''), source: SRC(topicId), ingestedAt: Date.now() });
151
+ }
152
+ } else {
153
+ addPerson(peer);
154
+ participants.push(`person-wechat-${peer}`);
155
+ }
156
+ const title = text.replace(/\s+/g, ' ').trim().slice(0, 80);
116
157
  events.push({
117
158
  type: 'event', subtype: 'message', id: `wechat:${r.msgId}`,
118
159
  occurredAt, actor, participants,
119
- content: { text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
120
- topics: isGroup ? [`group-wechat-${peer}`] : undefined,
121
- source: {
122
- adapter: 'wechat', adapterVersion: '0.1.0', originalId: String(r.msgId),
123
- capturedAt: occurredAt, capturedBy: 'sqlite',
124
- },
160
+ content: { title: title || '(无内容)', text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
161
+ topics: topicId ? [topicId] : undefined,
162
+ source: SRC(String(r.msgId), occurredAt),
163
+ extra: { isSend: !!r.isSend, talker: r.talker },
164
+ ingestedAt: Date.now(),
165
+ });
166
+ }
167
+ persons.set(SELF_ID, { type: 'person', subtype: 'contact', id: SELF_ID, names: ['我(微信)'], source: SRC(SELF_ID), ingestedAt: Date.now() });
168
+ } finally {
169
+ src.close();
170
+ }
171
+ return { events, persons: [...persons.values()], topics: [...topics.values()] };
172
+ }
173
+
174
+ // ── 朋友圈 (SnsMicroMsg.db, PLAINTEXT — no SQLCipher) ───────────────────────
175
+ // Unlike EnMicroMsg.db, SnsMicroMsg.db is NOT encrypted (header = "SQLite
176
+ // format 3\0"), so it opens directly. SnsInfo.content is a protobuf
177
+ // TimelineObject: the post text is top-level field 5 (contentDesc), media are
178
+ // qpic.cn URLs embedded in the blob, and the poster nickname lives in attrBuf.
179
+ // SnsInfo.createTime is epoch SECONDS. Verified on chopin (WeChat 8.0.74):
180
+ // account 60e2c317… had 2931 posts (2623 with text) readable without any key.
181
+ function _pbReadVarint(buf, pos) {
182
+ let shift = 0, result = 0n;
183
+ while (pos < buf.length) {
184
+ const b = buf[pos++];
185
+ result |= BigInt(b & 0x7f) << BigInt(shift);
186
+ if (!(b & 0x80)) break;
187
+ shift += 7;
188
+ }
189
+ return [result, pos];
190
+ }
191
+ // Walk top-level protobuf fields → { fieldNum: [Buffer|BigInt, …] }. Best-effort
192
+ // (stops on malformed input); length-delimited values are returned as slices.
193
+ function _pbFields(buf) {
194
+ const out = {};
195
+ let pos = 0;
196
+ while (pos < buf.length) {
197
+ let tag; [tag, pos] = _pbReadVarint(buf, pos);
198
+ const field = Number(tag >> 3n), wire = Number(tag & 7n);
199
+ if (field === 0) break;
200
+ let val;
201
+ if (wire === 0) { [val, pos] = _pbReadVarint(buf, pos); }
202
+ else if (wire === 2) { let len; [len, pos] = _pbReadVarint(buf, pos); len = Number(len); if (len < 0 || pos + len > buf.length) break; val = buf.subarray(pos, pos + len); pos += len; }
203
+ else if (wire === 1) { val = buf.subarray(pos, pos + 8); pos += 8; }
204
+ else if (wire === 5) { val = buf.subarray(pos, pos + 4); pos += 4; }
205
+ else break;
206
+ (out[field] ||= []).push(val);
207
+ }
208
+ return out;
209
+ }
210
+ function snsPostText(contentBuf) {
211
+ try { const f = _pbFields(contentBuf); if (f[5] && f[5].length) { const t = f[5][0].toString('utf8').trim(); if (t) return t; } } catch { /* not a TimelineObject */ }
212
+ return '';
213
+ }
214
+ function snsMediaUrls(contentBuf) {
215
+ const s = contentBuf.toString('latin1'); const urls = new Set();
216
+ const re = /https?:\/\/[A-Za-z0-9._-]*qpic\.cn[A-Za-z0-9._\-/?=&%]+/g; let m;
217
+ while ((m = re.exec(s))) urls.add(m[0]);
218
+ return [...urls];
219
+ }
220
+ function snsNickname(attrBuf, wxid) {
221
+ try { const f = _pbFields(attrBuf); for (const vals of Object.values(f)) for (const v of vals) { if (Buffer.isBuffer(v)) { const s = v.toString('utf8'); if (s && s !== wxid && !/^wxid_/.test(s) && /[一-鿿A-Za-z]/.test(s) && s.length <= 40 && !/[\x00-\x08]/.test(s)) return s; } } } catch { /* ignore */ }
222
+ return '';
223
+ }
224
+
225
+ /**
226
+ * Parse a PLAINTEXT SnsMicroMsg.db → 朋友圈 vault batch { events, persons, topics }.
227
+ * Each SnsInfo row → EVENT(post) attributed to the poster. `selfWxid` (optional)
228
+ * maps the user's own posts to SELF_ID; `nameMap` (wxid → displayName, e.g. from
229
+ * the matching account's decrypted rcontact) overrides attrBuf nicknames.
230
+ */
231
+ function parseSnsEvents(Database, dbPath, { selfWxid, nameMap } = {}) {
232
+ const src = new Database(dbPath, { readonly: true });
233
+ const events = [];
234
+ const persons = new Map();
235
+ const names = nameMap instanceof Map ? nameMap : new Map(Object.entries(nameMap || {}));
236
+ try {
237
+ let rows = [];
238
+ try { rows = src.prepare('SELECT snsId,userName,createTime,type,content,attrBuf FROM SnsInfo ORDER BY createTime DESC LIMIT 5000').all(); }
239
+ catch { return { events: [], persons: [], topics: [] }; } // no SnsInfo table
240
+ for (const r of rows) {
241
+ const wxid = String(r.userName || '');
242
+ if (!wxid) continue;
243
+ const text = r.content ? snsPostText(r.content) : '';
244
+ const media = r.content ? snsMediaUrls(r.content) : [];
245
+ if (!text && !media.length) continue; // skip empty / pure-ad shells
246
+ const occurredAt = (Number(r.createTime) || 0) * 1000; // SnsInfo.createTime is seconds
247
+ if (!occurredAt) continue;
248
+ const isSelf = !!(selfWxid && wxid === selfWxid);
249
+ const nick = names.get(wxid) || (r.attrBuf ? snsNickname(r.attrBuf, wxid) : '') || wxid;
250
+ const actor = isSelf ? SELF_ID : `person-wechat-${wxid}`;
251
+ if (!isSelf && !persons.has(actor)) {
252
+ const nm = nick && nick !== wxid ? [nick, wxid] : [wxid];
253
+ persons.set(actor, { type: 'person', subtype: 'contact', id: actor, names: nm, identifiers: { wechatId: wxid }, source: SRC(actor), ingestedAt: Date.now() });
254
+ }
255
+ const title = (text || `[图片] ${nick}的朋友圈`).replace(/\s+/g, ' ').trim().slice(0, 80);
256
+ events.push({
257
+ type: 'event', subtype: 'post', id: `wechat-sns:${r.snsId}`,
258
+ occurredAt, actor, participants: [actor],
259
+ content: { title: title || '(朋友圈)', text: text || undefined },
260
+ source: SRC(`sns-${r.snsId}`, occurredAt),
261
+ extra: { kind: 'moment', isSelf, poster: nick, mediaCount: media.length, media: media.slice(0, 9) },
125
262
  ingestedAt: Date.now(),
126
263
  });
127
264
  }
265
+ if (selfWxid) persons.set(SELF_ID, { type: 'person', subtype: 'contact', id: SELF_ID, names: ['我(微信)'], source: SRC(SELF_ID), ingestedAt: Date.now() });
128
266
  } finally {
129
267
  src.close();
130
268
  }
131
- return events;
269
+ return { events, persons: [...persons.values()], topics: [] };
132
270
  }
133
271
 
134
- module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents };
272
+ module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents, parseSnsEvents, snsPostText, snsMediaUrls, snsNickname };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.4.34",
3
+ "version": "0.4.36",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",
@@ -76,6 +76,7 @@
76
76
  "./forensics/salvage-ingest": "./lib/forensics/salvage-ingest.js",
77
77
  "./forensics/qq-nt-collect": "./lib/forensics/qq-nt-collect.js",
78
78
  "./forensics/wechat-collect": "./lib/forensics/wechat-collect.js",
79
+ "./forensics/qzone-collect": "./lib/forensics/qzone-collect.js",
79
80
  "./forensics/plaintext-db-collect": "./lib/forensics/plaintext-db-collect.js"
80
81
  },
81
82
  "scripts": {