@chainlesschain/personal-data-hub 0.4.30 → 0.4.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ 'use strict';
2
+ /**
3
+ * plaintext-db-collect — generic ingester for an app's PLAINTEXT SQLite dbs.
4
+ *
5
+ * Many app dbs aren't encrypted (browse/read/history/content/config). The Magisk
6
+ * daemon (root) can read them directly on MIUI; this turns any such db into vault
7
+ * events by pulling readable text records from each table — comprehensive coverage
8
+ * of the "明文库" personal data without per-db schema work. Encrypted IM dbs
9
+ * (QQNT/WeChat/WCDB2) have their own decrypt collectors; this is for the rest.
10
+ *
11
+ * Heuristics: skip system/Room-internal tables; per table, take TEXT-ish columns;
12
+ * keep a row only if it has a meaningful readable value (CJK, or ≥6 letters, not a
13
+ * uuid/hash/base64 blob); derive a time from any *time/*date/created/_at column.
14
+ * Pure Node + a caller-provided better-sqlite3 ctor.
15
+ */
16
+ const crypto = require('crypto');
17
+
18
+ const SKIP_TABLE = /^(android_metadata|sqlite_|room_master_table|_room|.*_fts(_.*)?$|.*_log$|.*_index$)/i;
19
+ const TEXT_TYPE = /text|char|clob|json|varchar|string/i;
20
+ const TIME_COL = /(^|_)(time|date|created|updated|_at|timestamp|ctime|mtime)($|_)/i;
21
+ const NOISE_VAL = /^(([0-9a-f]{16,})|([A-Za-z0-9+/=_-]{24,})|(\d{6,})|(https?:\/\/\S+)|(\{.{0,3}\})|(\[\]))$/;
22
+
23
+ function normTime(v) {
24
+ const n = Number(v);
25
+ if (!Number.isFinite(n) || n <= 0) return 0;
26
+ if (n > 1e16) return Math.floor(n / 1e6); // ns? → ms
27
+ if (n > 1e14) return Math.floor(n / 1e3); // µs → ms
28
+ if (n > 1e12) return n; // ms
29
+ if (n > 1e9) return n * 1000; // s → ms
30
+ return 0;
31
+ }
32
+ function readable(v) {
33
+ if (typeof v !== 'string') return false;
34
+ const s = v.trim();
35
+ if (s.length < 4 || s.length > 2000) return false;
36
+ if (NOISE_VAL.test(s)) return false;
37
+ if (/[一-鿿]/.test(s)) return true; // any CJK
38
+ return (s.match(/[A-Za-z]/g) || []).length >= 6; // or enough letters
39
+ }
40
+
41
+ /**
42
+ * @param Database better-sqlite3 ctor (injected)
43
+ * @param dbPath path to a plaintext SQLite db
44
+ * @param app app key (→ source.adapter `local-<app>`)
45
+ * @returns {Array} vault events (subtype:"record")
46
+ */
47
+ function ingestPlaintextDb(Database, dbPath, app) {
48
+ const db = new Database(dbPath, { readonly: true });
49
+ const events = [];
50
+ const dbName = String(dbPath).split(/[\\/]/).pop();
51
+ try {
52
+ const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all()
53
+ .map((r) => r.name).filter((t) => !SKIP_TABLE.test(t));
54
+ for (const t of tables) {
55
+ let cols, rows;
56
+ try {
57
+ cols = db.prepare(`PRAGMA table_info("${t}")`).all();
58
+ rows = db.prepare(`SELECT * FROM "${t}" LIMIT 2000`).all();
59
+ } catch { continue; }
60
+ const textCols = cols.filter((c) => TEXT_TYPE.test(c.type || '')).map((c) => c.name);
61
+ if (!textCols.length || !rows.length) continue;
62
+ const timeCol = (cols.find((c) => TIME_COL.test(c.name)) || {}).name;
63
+ for (const row of rows) {
64
+ const parts = textCols.map((c) => row[c]).filter(readable);
65
+ if (!parts.length) continue;
66
+ const text = [...new Set(parts)].join(' · ').slice(0, 800);
67
+ const occurredAt = timeCol ? normTime(row[timeCol]) : 0;
68
+ const id = `${app}:${dbName}:${t}:` +
69
+ crypto.createHash('md5').update(JSON.stringify(row)).digest('hex').slice(0, 12);
70
+ events.push({
71
+ type: 'event', subtype: 'record',
72
+ id, occurredAt: occurredAt || Date.now(),
73
+ content: { text },
74
+ source: {
75
+ adapter: `local-${app}`, adapterVersion: '0.1.0',
76
+ originalId: `${dbName}:${t}`, capturedAt: occurredAt || Date.now(), capturedBy: 'sqlite',
77
+ },
78
+ topics: [`local-${app}`, `db-${dbName}`],
79
+ ingestedAt: Date.now(),
80
+ });
81
+ }
82
+ }
83
+ } finally {
84
+ db.close();
85
+ }
86
+ return events;
87
+ }
88
+
89
+ module.exports = { ingestPlaintextDb, readable, normTime };
@@ -0,0 +1,190 @@
1
+ 'use strict';
2
+ /**
3
+ * qq-nt-collect — on-device-ready QQNT (nt_msg.db) decrypt + protobuf-parse.
4
+ *
5
+ * Pure Node (crypto + a caller-provided better-sqlite3 ctor) — NO frida, NO adb.
6
+ * This is the bundle-shipped core behind `cc hub collect-qq`, which the Android
7
+ * `CollectQqNativeTool` invokes after su-staging the encrypted DB + uid list. The
8
+ * exact same logic runs on PC (USB pull) and on-device (app su-read).
9
+ *
10
+ * Method (key DERIVED, no frida): `key = MD5(MD5(uid) + rand)` → SQLCipher
11
+ * PBKDF2-HMAC-SHA512/1 (iter 4000) → AES-256-CBC. `rand` is read from the 1024-byte
12
+ * plaintext header; `uid` (QQNT `u_...`) is brute-forced from candidates — page 1
13
+ * decrypting to a valid SQLite header identifies the self uid. (See
14
+ * scripts/android/pdh-qq-android-decrypt.mjs for the standalone PC tool this was
15
+ * extracted from; behaviour is byte-identical.)
16
+ */
17
+ const crypto = require('crypto');
18
+ const md5 = (s) => crypto.createHash('md5').update(s).digest('hex');
19
+
20
+ /** Read `rand` (protobuf field after "QQ_NT DB") from the plaintext header. */
21
+ function extractRand(raw) {
22
+ const head = raw.subarray(0, 256).toString('latin1');
23
+ const i = head.indexOf('QQ_NT DB');
24
+ if (i < 0) return null;
25
+ const m = head.slice(i + 8, i + 40).match(/[A-Za-z0-9]{6,12}/);
26
+ return m ? m[0] : null;
27
+ }
28
+
29
+ /** Detect HMAC algo named in the header (informational). */
30
+ function headerHmac(raw) {
31
+ return (/HMAC_SHA\d+/.exec(raw.subarray(0, 256).toString('latin1')) || [])[0] || null;
32
+ }
33
+
34
+ // decrypt one page slice. p1=true → page 1 (first 16 bytes = salt, skipped).
35
+ function decPage(page, encKey, reserve, ivOff, p1) {
36
+ const ro = page.length - reserve;
37
+ const ct = page.subarray(p1 ? 16 : 0, ro);
38
+ const iv = page.subarray(ro + ivOff, ro + ivOff + 16);
39
+ if (iv.length < 16) return null;
40
+ try {
41
+ const d = crypto.createDecipheriv('aes-256-cbc', encKey, iv);
42
+ d.setAutoPadding(false);
43
+ return Buffer.concat([d.update(ct), d.final()]);
44
+ } catch {
45
+ return null;
46
+ }
47
+ }
48
+ const validHdr = (pt) => pt && pt.length > 8 && pt[5] === 64 && pt[6] === 32 && pt[7] === 32;
49
+ const CFG = [[4096, 48, 0], [4096, 80, 0], [4096, 48, 20]];
50
+
51
+ /**
52
+ * Brute the key over uid candidates, then decrypt every page.
53
+ * @returns {{decrypted: Buffer, uid: string, kdf: string}|null} null = no uid matched.
54
+ */
55
+ function deriveAndDecrypt(raw, uids, rand) {
56
+ if (!raw || raw.length <= 1024 || !rand || !uids || !uids.length) return null;
57
+ const body = raw.subarray(1024);
58
+ const salt = body.subarray(0, 16);
59
+ let hit = null;
60
+ for (const uid of uids) {
61
+ const pass = md5(md5(uid) + rand);
62
+ for (const algo of ['sha512', 'sha1']) {
63
+ const ek = crypto.pbkdf2Sync(Buffer.from(pass, 'utf8'), salt, 4000, 32, algo);
64
+ for (const [pg, rs, iv] of CFG) {
65
+ if (validHdr(decPage(body.subarray(0, pg), ek, rs, iv, true))) {
66
+ hit = { uid, algo, page: pg, reserve: rs, ivOff: iv, ek };
67
+ break;
68
+ }
69
+ }
70
+ if (hit) break;
71
+ }
72
+ if (hit) break;
73
+ }
74
+ if (!hit) return null;
75
+ const { ek, page, reserve, ivOff } = hit;
76
+ const n = Math.floor(body.length / page);
77
+ const out = [];
78
+ for (let i = 0; i < n; i++) {
79
+ const pt = decPage(body.subarray(i * page, (i + 1) * page), ek, reserve, ivOff, i === 0);
80
+ const full = Buffer.alloc(page);
81
+ if (i === 0) {
82
+ Buffer.from('SQLite format 3\0').copy(full, 0);
83
+ if (pt) pt.copy(full, 16);
84
+ } else if (pt) {
85
+ pt.copy(full, 0);
86
+ }
87
+ out.push(full);
88
+ }
89
+ return { decrypted: Buffer.concat(out), uid: hit.uid, kdf: hit.algo };
90
+ }
91
+
92
+ // ── protobuf message-body → readable text (from pdh-qq-ingest.mjs) ──────────
93
+ function readVarint(buf, p) {
94
+ let shift = 0, r = 0n;
95
+ while (p < buf.length) {
96
+ const b = buf[p++];
97
+ r |= BigInt(b & 0x7f) << BigInt(shift);
98
+ if (!(b & 0x80)) break;
99
+ shift += 7;
100
+ }
101
+ return [r, p];
102
+ }
103
+ function* fields(buf) {
104
+ let p = 0;
105
+ while (p < buf.length) {
106
+ let tag;[tag, p] = readVarint(buf, p); tag = Number(tag);
107
+ const wire = tag & 7;
108
+ if (wire === 0) { let v;[v, p] = readVarint(buf, p); } else if (wire === 2) {
109
+ let len;[len, p] = readVarint(buf, p); len = Number(len);
110
+ const data = buf.subarray(p, p + len); p += len; yield data;
111
+ } else if (wire === 5) p += 4; else if (wire === 1) p += 8; else return;
112
+ }
113
+ }
114
+ const readable = (s) => s && /[一-鿿 -ヿA-Za-z0-9]/.test(s) && !/[�]/.test(s) &&
115
+ [...s].every((c) => c.charCodeAt(0) >= 32 || c === '\n');
116
+ function extractTexts(buf, depth, out) {
117
+ if (depth > 6 || !buf || buf.length === 0) return;
118
+ for (const data of fields(buf)) {
119
+ if (!data || data.length === 0) continue;
120
+ let s = null; try { s = data.toString('utf8'); } catch {}
121
+ if (s && Buffer.from(s, 'utf8').equals(data) && readable(s) && s.length <= 1000 &&
122
+ !/^https?:\/\/\S+$/.test(s)) out.push(s);
123
+ if (data.length >= 2) extractTexts(data, depth + 1, out);
124
+ }
125
+ }
126
+ function bodyText(blob) {
127
+ if (!Buffer.isBuffer(blob)) return null;
128
+ const out = []; extractTexts(blob, 0, out);
129
+ const cjk = out.filter((t) => /[一-鿿]/.test(t)).sort((a, b) => b.length - a.length);
130
+ return cjk[0] || out.sort((a, b) => b.length - a.length)[0] || null;
131
+ }
132
+
133
+ /**
134
+ * Parse a DECRYPTED nt_msg.db into vault events (qq-pc adapter shape).
135
+ * @param Database a better-sqlite3 constructor (injected by the caller/bundle)
136
+ * @param dbPath path to the decrypted nt_msg.db
137
+ * @param self the user's own QQ number (attribution fallback)
138
+ * @returns {Array} event objects ready for vault.putEvent
139
+ */
140
+ function parseEvents(Database, dbPath, self) {
141
+ const src = new Database(dbPath, { readonly: true });
142
+ const events = [];
143
+ const num = (v) => (typeof v === 'bigint' ? Number(v) : v);
144
+ const ingestTable = (table, isGroup) => {
145
+ let rows;
146
+ try {
147
+ rows = src.prepare(
148
+ `SELECT [40001] msgId,[40020] uid,[40011] type,[40033] sender,[40021] peer,` +
149
+ `[40050] t,[40800] body FROM ${table}`,
150
+ ).safeIntegers().all();
151
+ } catch { return; }
152
+ for (const r of rows) {
153
+ const type = num(r.type);
154
+ if (type === 5 || type === 9) continue; // grey-bar / system
155
+ const text = bodyText(r.body);
156
+ if (!text || !/[一-鿿A-Za-z0-9]/.test(text)) continue;
157
+ // QQ service/gray-tip config messages (e.g. com.tencent.* push configs) —
158
+ // not real chat; drop so the vault holds conversation, not service noise.
159
+ if (text.includes('com.tencent.') || text.includes('public desc')) continue;
160
+ const msgId = typeof r.msgId === 'bigint' ? r.msgId.toString() : String(r.msgId);
161
+ const sender = String(num(r.sender) || '');
162
+ const peer = String(num(r.peer) || '');
163
+ const occurredAt = num(r.t) * 1000;
164
+ if (!occurredAt) continue;
165
+ const actor = sender ? `person-qq-${sender}` : `person-qq-${self}`;
166
+ const participants = [actor];
167
+ participants.push(isGroup ? `group-qq-${peer}` : `person-qq-${peer}`);
168
+ events.push({
169
+ type: 'event', subtype: 'message', id: `qq:${table}:${msgId}`,
170
+ occurredAt, actor, participants,
171
+ content: { text: isGroup ? `[群${peer}] ${text}` : text },
172
+ topics: isGroup ? [`group-qq-${peer}`] : undefined,
173
+ source: {
174
+ adapter: 'qq-pc', adapterVersion: '0.1.0', originalId: `${table}:${msgId}`,
175
+ capturedAt: occurredAt, capturedBy: 'sqlite',
176
+ },
177
+ ingestedAt: Date.now(),
178
+ });
179
+ }
180
+ };
181
+ try {
182
+ ingestTable('c2c_msg_table', false);
183
+ ingestTable('group_msg_table', true);
184
+ } finally {
185
+ src.close();
186
+ }
187
+ return events;
188
+ }
189
+
190
+ module.exports = { extractRand, headerHmac, deriveAndDecrypt, bodyText, parseEvents };
@@ -0,0 +1,134 @@
1
+ 'use strict';
2
+ /**
3
+ * wechat-collect — on-device-ready WeChat (EnMicroMsg.db) decrypt + parse.
4
+ *
5
+ * Pure Node (crypto + a caller-provided better-sqlite3 ctor) — NO frida. Same
6
+ * model as qq-nt-collect.js: derived key (`MD5(IMEI+uin)[:7]`) → SQLCipher
7
+ * (page-level AES-256-CBC) → parse message/rcontact/chatroom → vault events.
8
+ * The bundle-shipped core behind `cc hub collect-wechat`; the Magisk daemon
9
+ * stages EnMicroMsg.db + uins (+ IMEI candidates) and this decrypts on-device.
10
+ * Extracted from scripts/android/pdh-wechat-decrypt.mjs (byte-identical crypto;
11
+ * verified WeChat 8.0.74 on chopin).
12
+ *
13
+ * IMEI on Android 13 is often unreadable → pass a SAVED 7-char key (once found
14
+ * it never changes for an account) or a frida raw key as a fallback candidate.
15
+ */
16
+ const crypto = require('crypto');
17
+ const md5 = (s) => crypto.createHash('md5').update(s).digest('hex');
18
+
19
+ const CONFIGS = [
20
+ { name: 'sc1', pageSize: 1024, reserve: 16, hmac: 0, kdf: 4000, algo: 'sha1' },
21
+ { name: 'sc2', pageSize: 1024, reserve: 48, hmac: 20, kdf: 4000, algo: 'sha1' },
22
+ { name: 'sc3', pageSize: 1024, reserve: 48, hmac: 20, kdf: 64000, algo: 'sha1' },
23
+ { name: 'sc4', pageSize: 4096, reserve: 80, hmac: 64, kdf: 256000, algo: 'sha512' },
24
+ ];
25
+
26
+ function decPage(page, key, cfg, isP1) {
27
+ const reserveOff = cfg.pageSize - cfg.reserve;
28
+ const ct = page.subarray(isP1 ? 16 : 0, reserveOff);
29
+ const reserved = page.subarray(reserveOff, cfg.pageSize);
30
+ const iv = cfg.hmac ? reserved.subarray(cfg.hmac, cfg.hmac + 16) : reserved.subarray(0, 16);
31
+ if (iv.length < 16) return null;
32
+ try {
33
+ const d = crypto.createDecipheriv('aes-256-cbc', key, iv);
34
+ d.setAutoPadding(false);
35
+ return Buffer.concat([d.update(ct), d.final()]);
36
+ } catch {
37
+ return null;
38
+ }
39
+ }
40
+ const validP1 = (pt) => pt && pt.length > 8 && pt[5] === 64 && pt[6] === 32 && pt[7] === 32;
41
+
42
+ /** key = MD5(IMEI+uin)[:7] over candidate imeis × uins, plus any saved/raw keys. */
43
+ function computeKeyCandidates(imeis, uins, savedKeys) {
44
+ const set = new Set((savedKeys || []).filter(Boolean));
45
+ for (const im of imeis || []) for (const u of uins || []) set.add(md5(String(im) + String(u)).slice(0, 7));
46
+ return [...set];
47
+ }
48
+
49
+ /**
50
+ * Brute passphrases × configs (+ raw 32-byte keys), then decrypt all pages.
51
+ * @returns {{decrypted: Buffer, cfg: string, pass?: string}|null}
52
+ */
53
+ function deriveAndDecrypt(raw, passphrases, rawKeys) {
54
+ if (!raw || raw.length < 1024) return null;
55
+ const salt = raw.subarray(0, 16);
56
+ let hit = null;
57
+ for (const cfg of CONFIGS) {
58
+ for (const p of passphrases || []) {
59
+ const k = crypto.pbkdf2Sync(Buffer.from(p, 'utf8'), salt, cfg.kdf, 32, cfg.algo === 'sha512' ? 'sha512' : 'sha1');
60
+ if (validP1(decPage(raw.subarray(0, cfg.pageSize), k, cfg, true))) { hit = { key: k, cfg, pass: p }; break; }
61
+ }
62
+ if (hit) break;
63
+ for (const rk of rawKeys || []) {
64
+ const kb = Buffer.isBuffer(rk) ? rk : Buffer.from(rk, 'hex');
65
+ if (kb.length === 32 && validP1(decPage(raw.subarray(0, cfg.pageSize), kb, cfg, true))) { hit = { key: kb, cfg }; break; }
66
+ }
67
+ if (hit) break;
68
+ }
69
+ if (!hit) return null;
70
+ const { key, cfg } = hit;
71
+ const n = Math.floor(raw.length / cfg.pageSize);
72
+ const out = [];
73
+ for (let i = 0; i < n; i++) {
74
+ const page = raw.subarray(i * cfg.pageSize, (i + 1) * cfg.pageSize);
75
+ const pt = decPage(page, key, cfg, i === 0);
76
+ const full = Buffer.alloc(cfg.pageSize);
77
+ if (i === 0) { Buffer.from('SQLite format 3\0').copy(full, 0); if (pt) pt.copy(full, 16); } else if (pt) pt.copy(full, 0);
78
+ out.push(full);
79
+ }
80
+ return { decrypted: Buffer.concat(out), cfg: cfg.name, pass: hit.pass };
81
+ }
82
+
83
+ /**
84
+ * Parse a DECRYPTED EnMicroMsg.db → vault events (wechat adapter shape).
85
+ * @param Database better-sqlite3 ctor (injected). @param self the user's wxid.
86
+ */
87
+ function parseEvents(Database, dbPath, self) {
88
+ const src = new Database(dbPath, { readonly: true });
89
+ const events = [];
90
+ try {
91
+ const nameOf = new Map();
92
+ try {
93
+ for (const r of src.prepare('SELECT username,nickname,conRemark FROM rcontact').all()) {
94
+ nameOf.set(r.username, (r.conRemark && r.conRemark.trim()) || r.nickname || r.username);
95
+ }
96
+ } catch { /* contacts optional */ }
97
+ const rows = src.prepare(
98
+ 'SELECT msgId,type,isSend,createTime,talker,content FROM message ' +
99
+ "WHERE type=1 ORDER BY createTime DESC LIMIT 5000",
100
+ ).all();
101
+ for (const r of rows) {
102
+ const isGroup = /@chatroom$/.test(r.talker || '');
103
+ let text = r.content || '';
104
+ let senderWxid = r.isSend ? self : r.talker;
105
+ if (isGroup && !r.isSend) {
106
+ const c = text.indexOf(':');
107
+ if (c > 0) { senderWxid = text.slice(0, c); text = text.slice(c + 1).replace(/^\n/, '').trim(); }
108
+ }
109
+ if (!text || !/[一-鿿A-Za-z0-9]/.test(text)) continue;
110
+ const occurredAt = Number(r.createTime) || 0; // already ms in WeChat
111
+ if (!occurredAt) continue;
112
+ const peer = String(r.talker || '');
113
+ const actor = `person-wechat-${senderWxid || self}`;
114
+ const participants = [actor];
115
+ participants.push(isGroup ? `group-wechat-${peer}` : `person-wechat-${peer}`);
116
+ events.push({
117
+ type: 'event', subtype: 'message', id: `wechat:${r.msgId}`,
118
+ occurredAt, actor, participants,
119
+ content: { text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
120
+ topics: isGroup ? [`group-wechat-${peer}`] : undefined,
121
+ source: {
122
+ adapter: 'wechat', adapterVersion: '0.1.0', originalId: String(r.msgId),
123
+ capturedAt: occurredAt, capturedBy: 'sqlite',
124
+ },
125
+ ingestedAt: Date.now(),
126
+ });
127
+ }
128
+ } finally {
129
+ src.close();
130
+ }
131
+ return events;
132
+ }
133
+
134
+ module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.4.30",
3
+ "version": "0.4.32",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",
@@ -73,7 +73,10 @@
73
73
  "./adapters/messaging-whatsapp": "./lib/adapters/messaging-whatsapp/index.js",
74
74
  "./sidecar": "./lib/sidecar/index.js",
75
75
  "./forensics/leaf-salvage": "./lib/forensics/leaf-salvage.js",
76
- "./forensics/salvage-ingest": "./lib/forensics/salvage-ingest.js"
76
+ "./forensics/salvage-ingest": "./lib/forensics/salvage-ingest.js",
77
+ "./forensics/qq-nt-collect": "./lib/forensics/qq-nt-collect.js",
78
+ "./forensics/wechat-collect": "./lib/forensics/wechat-collect.js",
79
+ "./forensics/plaintext-db-collect": "./lib/forensics/plaintext-db-collect.js"
77
80
  },
78
81
  "scripts": {
79
82
  "test": "vitest run",
@@ -104,7 +107,8 @@
104
107
  "optionalDependencies": {
105
108
  "imapflow": "^1.0.183",
106
109
  "adm-zip": "^0.5.16",
107
- "iconv-lite": "^0.6.3"
110
+ "iconv-lite": "^0.6.3",
111
+ "pdf-parse": "^1.1.1"
108
112
  },
109
113
  "devDependencies": {
110
114
  "vitest": "^4.1.5"