@chainlesschain/personal-data-hub 0.4.31 → 0.4.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* plaintext-db-collect — generic ingester for an app's PLAINTEXT SQLite dbs.
|
|
4
|
+
*
|
|
5
|
+
* Many app dbs aren't encrypted (browse/read/history/content/config). The Magisk
|
|
6
|
+
* daemon (root) can read them directly on MIUI; this turns any such db into vault
|
|
7
|
+
* events by pulling readable text records from each table — comprehensive coverage
|
|
8
|
+
* of the "明文库" personal data without per-db schema work. Encrypted IM dbs
|
|
9
|
+
* (QQNT/WeChat/WCDB2) have their own decrypt collectors; this is for the rest.
|
|
10
|
+
*
|
|
11
|
+
* Heuristics: skip system/Room-internal tables; per table, take TEXT-ish columns;
|
|
12
|
+
* keep a row only if it has a meaningful readable value (CJK, or ≥6 letters, not a
|
|
13
|
+
* uuid/hash/base64 blob); derive a time from any *time/*date/created/_at column.
|
|
14
|
+
* Pure Node + a caller-provided better-sqlite3 ctor.
|
|
15
|
+
*/
|
|
16
|
+
const crypto = require('crypto');
|
|
17
|
+
|
|
18
|
+
const SKIP_TABLE = /^(android_metadata|sqlite_|room_master_table|_room|.*_fts(_.*)?$|.*_log$|.*_index$)/i;
|
|
19
|
+
const TEXT_TYPE = /text|char|clob|json|varchar|string/i;
|
|
20
|
+
const TIME_COL = /(^|_)(time|date|created|updated|_at|timestamp|ctime|mtime)($|_)/i;
|
|
21
|
+
const NOISE_VAL = /^(([0-9a-f]{16,})|([A-Za-z0-9+/=_-]{24,})|(\d{6,})|(https?:\/\/\S+)|(\{.{0,3}\})|(\[\]))$/;
|
|
22
|
+
|
|
23
|
+
function normTime(v) {
|
|
24
|
+
const n = Number(v);
|
|
25
|
+
if (!Number.isFinite(n) || n <= 0) return 0;
|
|
26
|
+
if (n > 1e16) return Math.floor(n / 1e6); // ns? → ms
|
|
27
|
+
if (n > 1e14) return Math.floor(n / 1e3); // µs → ms
|
|
28
|
+
if (n > 1e12) return n; // ms
|
|
29
|
+
if (n > 1e9) return n * 1000; // s → ms
|
|
30
|
+
return 0;
|
|
31
|
+
}
|
|
32
|
+
function readable(v) {
|
|
33
|
+
if (typeof v !== 'string') return false;
|
|
34
|
+
const s = v.trim();
|
|
35
|
+
if (s.length < 4 || s.length > 2000) return false;
|
|
36
|
+
if (NOISE_VAL.test(s)) return false;
|
|
37
|
+
if (/[一-鿿]/.test(s)) return true; // any CJK
|
|
38
|
+
return (s.match(/[A-Za-z]/g) || []).length >= 6; // or enough letters
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* @param Database better-sqlite3 ctor (injected)
|
|
43
|
+
* @param dbPath path to a plaintext SQLite db
|
|
44
|
+
* @param app app key (→ source.adapter `local-<app>`)
|
|
45
|
+
* @returns {Array} vault events (subtype:"record")
|
|
46
|
+
*/
|
|
47
|
+
function ingestPlaintextDb(Database, dbPath, app) {
|
|
48
|
+
const db = new Database(dbPath, { readonly: true });
|
|
49
|
+
const events = [];
|
|
50
|
+
const dbName = String(dbPath).split(/[\\/]/).pop();
|
|
51
|
+
try {
|
|
52
|
+
const tables = db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all()
|
|
53
|
+
.map((r) => r.name).filter((t) => !SKIP_TABLE.test(t));
|
|
54
|
+
for (const t of tables) {
|
|
55
|
+
let cols, rows;
|
|
56
|
+
try {
|
|
57
|
+
cols = db.prepare(`PRAGMA table_info("${t}")`).all();
|
|
58
|
+
rows = db.prepare(`SELECT * FROM "${t}" LIMIT 2000`).all();
|
|
59
|
+
} catch { continue; }
|
|
60
|
+
const textCols = cols.filter((c) => TEXT_TYPE.test(c.type || '')).map((c) => c.name);
|
|
61
|
+
if (!textCols.length || !rows.length) continue;
|
|
62
|
+
const timeCol = (cols.find((c) => TIME_COL.test(c.name)) || {}).name;
|
|
63
|
+
for (const row of rows) {
|
|
64
|
+
const parts = textCols.map((c) => row[c]).filter(readable);
|
|
65
|
+
if (!parts.length) continue;
|
|
66
|
+
const text = [...new Set(parts)].join(' · ').slice(0, 800);
|
|
67
|
+
const occurredAt = timeCol ? normTime(row[timeCol]) : 0;
|
|
68
|
+
// Per-row hash → unique id AND unique source.originalId. A per-table
|
|
69
|
+
// originalId would collide on the vault's UNIQUE(source_adapter,
|
|
70
|
+
// source_original_id) and collapse every row of a table into one event.
|
|
71
|
+
const rowHash = crypto
|
|
72
|
+
.createHash('md5')
|
|
73
|
+
.update(JSON.stringify(row))
|
|
74
|
+
.digest('hex')
|
|
75
|
+
.slice(0, 12);
|
|
76
|
+
const id = `${app}:${dbName}:${t}:${rowHash}`;
|
|
77
|
+
events.push({
|
|
78
|
+
// subtype must be a schema enum value; generic plaintext records →
|
|
79
|
+
// 'other' (the vault schema has no 'record' subtype).
|
|
80
|
+
type: 'event', subtype: 'other',
|
|
81
|
+
id, occurredAt: occurredAt || Date.now(),
|
|
82
|
+
content: { text },
|
|
83
|
+
source: {
|
|
84
|
+
adapter: `local-${app}`, adapterVersion: '0.1.0',
|
|
85
|
+
originalId: `${dbName}:${t}:${rowHash}`,
|
|
86
|
+
capturedAt: occurredAt || Date.now(), capturedBy: 'sqlite',
|
|
87
|
+
},
|
|
88
|
+
topics: [`local-${app}`, `db-${dbName}`],
|
|
89
|
+
ingestedAt: Date.now(),
|
|
90
|
+
});
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
} finally {
|
|
94
|
+
db.close();
|
|
95
|
+
}
|
|
96
|
+
return events;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
module.exports = { ingestPlaintextDb, readable, normTime };
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
/**
|
|
3
|
+
* wechat-collect — on-device-ready WeChat (EnMicroMsg.db) decrypt + parse.
|
|
4
|
+
*
|
|
5
|
+
* Pure Node (crypto + a caller-provided better-sqlite3 ctor) — NO frida. Same
|
|
6
|
+
* model as qq-nt-collect.js: derived key (`MD5(IMEI+uin)[:7]`) → SQLCipher
|
|
7
|
+
* (page-level AES-256-CBC) → parse message/rcontact/chatroom → vault events.
|
|
8
|
+
* The bundle-shipped core behind `cc hub collect-wechat`; the Magisk daemon
|
|
9
|
+
* stages EnMicroMsg.db + uins (+ IMEI candidates) and this decrypts on-device.
|
|
10
|
+
* Extracted from scripts/android/pdh-wechat-decrypt.mjs (byte-identical crypto;
|
|
11
|
+
* verified WeChat 8.0.74 on chopin).
|
|
12
|
+
*
|
|
13
|
+
* IMEI on Android 13 is often unreadable → pass a SAVED 7-char key (once found
|
|
14
|
+
* it never changes for an account) or a frida raw key as a fallback candidate.
|
|
15
|
+
*/
|
|
16
|
+
const crypto = require('crypto');
|
|
17
|
+
const md5 = (s) => crypto.createHash('md5').update(s).digest('hex');
|
|
18
|
+
|
|
19
|
+
const CONFIGS = [
|
|
20
|
+
{ name: 'sc1', pageSize: 1024, reserve: 16, hmac: 0, kdf: 4000, algo: 'sha1' },
|
|
21
|
+
{ name: 'sc2', pageSize: 1024, reserve: 48, hmac: 20, kdf: 4000, algo: 'sha1' },
|
|
22
|
+
{ name: 'sc3', pageSize: 1024, reserve: 48, hmac: 20, kdf: 64000, algo: 'sha1' },
|
|
23
|
+
{ name: 'sc4', pageSize: 4096, reserve: 80, hmac: 64, kdf: 256000, algo: 'sha512' },
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
function decPage(page, key, cfg, isP1) {
|
|
27
|
+
const reserveOff = cfg.pageSize - cfg.reserve;
|
|
28
|
+
const ct = page.subarray(isP1 ? 16 : 0, reserveOff);
|
|
29
|
+
const reserved = page.subarray(reserveOff, cfg.pageSize);
|
|
30
|
+
const iv = cfg.hmac ? reserved.subarray(cfg.hmac, cfg.hmac + 16) : reserved.subarray(0, 16);
|
|
31
|
+
if (iv.length < 16) return null;
|
|
32
|
+
try {
|
|
33
|
+
const d = crypto.createDecipheriv('aes-256-cbc', key, iv);
|
|
34
|
+
d.setAutoPadding(false);
|
|
35
|
+
return Buffer.concat([d.update(ct), d.final()]);
|
|
36
|
+
} catch {
|
|
37
|
+
return null;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
const validP1 = (pt) => pt && pt.length > 8 && pt[5] === 64 && pt[6] === 32 && pt[7] === 32;
|
|
41
|
+
|
|
42
|
+
/** key = MD5(IMEI+uin)[:7] over candidate imeis × uins, plus any saved/raw keys. */
|
|
43
|
+
function computeKeyCandidates(imeis, uins, savedKeys) {
|
|
44
|
+
const set = new Set((savedKeys || []).filter(Boolean));
|
|
45
|
+
for (const im of imeis || []) for (const u of uins || []) set.add(md5(String(im) + String(u)).slice(0, 7));
|
|
46
|
+
return [...set];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Brute passphrases × configs (+ raw 32-byte keys), then decrypt all pages.
|
|
51
|
+
* @returns {{decrypted: Buffer, cfg: string, pass?: string}|null}
|
|
52
|
+
*/
|
|
53
|
+
function deriveAndDecrypt(raw, passphrases, rawKeys) {
|
|
54
|
+
if (!raw || raw.length < 1024) return null;
|
|
55
|
+
const salt = raw.subarray(0, 16);
|
|
56
|
+
let hit = null;
|
|
57
|
+
for (const cfg of CONFIGS) {
|
|
58
|
+
for (const p of passphrases || []) {
|
|
59
|
+
const k = crypto.pbkdf2Sync(Buffer.from(p, 'utf8'), salt, cfg.kdf, 32, cfg.algo === 'sha512' ? 'sha512' : 'sha1');
|
|
60
|
+
if (validP1(decPage(raw.subarray(0, cfg.pageSize), k, cfg, true))) { hit = { key: k, cfg, pass: p }; break; }
|
|
61
|
+
}
|
|
62
|
+
if (hit) break;
|
|
63
|
+
for (const rk of rawKeys || []) {
|
|
64
|
+
const kb = Buffer.isBuffer(rk) ? rk : Buffer.from(rk, 'hex');
|
|
65
|
+
if (kb.length === 32 && validP1(decPage(raw.subarray(0, cfg.pageSize), kb, cfg, true))) { hit = { key: kb, cfg }; break; }
|
|
66
|
+
}
|
|
67
|
+
if (hit) break;
|
|
68
|
+
}
|
|
69
|
+
if (!hit) return null;
|
|
70
|
+
const { key, cfg } = hit;
|
|
71
|
+
const n = Math.floor(raw.length / cfg.pageSize);
|
|
72
|
+
const out = [];
|
|
73
|
+
for (let i = 0; i < n; i++) {
|
|
74
|
+
const page = raw.subarray(i * cfg.pageSize, (i + 1) * cfg.pageSize);
|
|
75
|
+
const pt = decPage(page, key, cfg, i === 0);
|
|
76
|
+
const full = Buffer.alloc(cfg.pageSize);
|
|
77
|
+
if (i === 0) { Buffer.from('SQLite format 3\0').copy(full, 0); if (pt) pt.copy(full, 16); } else if (pt) pt.copy(full, 0);
|
|
78
|
+
out.push(full);
|
|
79
|
+
}
|
|
80
|
+
return { decrypted: Buffer.concat(out), cfg: cfg.name, pass: hit.pass };
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Parse a DECRYPTED EnMicroMsg.db → vault events (wechat adapter shape).
|
|
85
|
+
* @param Database better-sqlite3 ctor (injected). @param self the user's wxid.
|
|
86
|
+
*/
|
|
87
|
+
function parseEvents(Database, dbPath, self) {
|
|
88
|
+
const src = new Database(dbPath, { readonly: true });
|
|
89
|
+
const events = [];
|
|
90
|
+
try {
|
|
91
|
+
const nameOf = new Map();
|
|
92
|
+
try {
|
|
93
|
+
for (const r of src.prepare('SELECT username,nickname,conRemark FROM rcontact').all()) {
|
|
94
|
+
nameOf.set(r.username, (r.conRemark && r.conRemark.trim()) || r.nickname || r.username);
|
|
95
|
+
}
|
|
96
|
+
} catch { /* contacts optional */ }
|
|
97
|
+
const rows = src.prepare(
|
|
98
|
+
'SELECT msgId,type,isSend,createTime,talker,content FROM message ' +
|
|
99
|
+
"WHERE type=1 ORDER BY createTime DESC LIMIT 5000",
|
|
100
|
+
).all();
|
|
101
|
+
for (const r of rows) {
|
|
102
|
+
const isGroup = /@chatroom$/.test(r.talker || '');
|
|
103
|
+
let text = r.content || '';
|
|
104
|
+
let senderWxid = r.isSend ? self : r.talker;
|
|
105
|
+
if (isGroup && !r.isSend) {
|
|
106
|
+
const c = text.indexOf(':');
|
|
107
|
+
if (c > 0) { senderWxid = text.slice(0, c); text = text.slice(c + 1).replace(/^\n/, '').trim(); }
|
|
108
|
+
}
|
|
109
|
+
if (!text || !/[一-鿿A-Za-z0-9]/.test(text)) continue;
|
|
110
|
+
const occurredAt = Number(r.createTime) || 0; // already ms in WeChat
|
|
111
|
+
if (!occurredAt) continue;
|
|
112
|
+
const peer = String(r.talker || '');
|
|
113
|
+
const actor = `person-wechat-${senderWxid || self}`;
|
|
114
|
+
const participants = [actor];
|
|
115
|
+
participants.push(isGroup ? `group-wechat-${peer}` : `person-wechat-${peer}`);
|
|
116
|
+
events.push({
|
|
117
|
+
type: 'event', subtype: 'message', id: `wechat:${r.msgId}`,
|
|
118
|
+
occurredAt, actor, participants,
|
|
119
|
+
content: { text: isGroup ? `[群${nameOf.get(peer) || peer}] ${text}` : text },
|
|
120
|
+
topics: isGroup ? [`group-wechat-${peer}`] : undefined,
|
|
121
|
+
source: {
|
|
122
|
+
adapter: 'wechat', adapterVersion: '0.1.0', originalId: String(r.msgId),
|
|
123
|
+
capturedAt: occurredAt, capturedBy: 'sqlite',
|
|
124
|
+
},
|
|
125
|
+
ingestedAt: Date.now(),
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
} finally {
|
|
129
|
+
src.close();
|
|
130
|
+
}
|
|
131
|
+
return events;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
module.exports = { computeKeyCandidates, deriveAndDecrypt, parseEvents };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@chainlesschain/personal-data-hub",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.33",
|
|
4
4
|
"description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
|
|
5
5
|
"type": "commonjs",
|
|
6
6
|
"main": "lib/index.js",
|
|
@@ -74,7 +74,9 @@
|
|
|
74
74
|
"./sidecar": "./lib/sidecar/index.js",
|
|
75
75
|
"./forensics/leaf-salvage": "./lib/forensics/leaf-salvage.js",
|
|
76
76
|
"./forensics/salvage-ingest": "./lib/forensics/salvage-ingest.js",
|
|
77
|
-
"./forensics/qq-nt-collect": "./lib/forensics/qq-nt-collect.js"
|
|
77
|
+
"./forensics/qq-nt-collect": "./lib/forensics/qq-nt-collect.js",
|
|
78
|
+
"./forensics/wechat-collect": "./lib/forensics/wechat-collect.js",
|
|
79
|
+
"./forensics/plaintext-db-collect": "./lib/forensics/plaintext-db-collect.js"
|
|
78
80
|
},
|
|
79
81
|
"scripts": {
|
|
80
82
|
"test": "vitest run",
|