@chainlesschain/personal-data-hub 0.3.0 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
- package/__tests__/adapters/email-adapter.test.js +1 -1
- package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
- package/__tests__/adapters/email-retry-progress.test.js +1 -1
- package/__tests__/adapters/email-templates.test.js +1 -1
- package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
- package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
- package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
- package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
- package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
- package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
- package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
- package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
- package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
- package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
- package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
- package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
- package/__tests__/adapters/system-data-android.test.js +32 -1
- package/__tests__/longtail-adapters.test.js +15 -2
- package/__tests__/shopping-adapters.test.js +96 -0
- package/__tests__/sign-providers.test.js +62 -0
- package/__tests__/travel-adapters.test.js +163 -5
- package/__tests__/whatsapp-adapter.test.js +5 -2
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
- package/lib/adapters/email-imap/email-adapter.js +224 -17
- package/lib/adapters/messaging-telegram/index.js +15 -12
- package/lib/adapters/messaging-whatsapp/index.js +15 -12
- package/lib/adapters/shopping-taobao/index.js +161 -21
- package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
- package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
- package/lib/adapters/social-bilibili-adb/collector.js +190 -0
- package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
- package/lib/adapters/social-bilibili-adb/index.js +51 -0
- package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
- package/lib/adapters/social-douyin/index.js +4 -0
- package/lib/adapters/social-douyin-adb/collector.js +165 -0
- package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
- package/lib/adapters/social-douyin-adb/index.js +57 -0
- package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
- package/lib/adapters/social-weibo-adb/api-client.js +281 -0
- package/lib/adapters/social-weibo-adb/collector.js +169 -0
- package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
- package/lib/adapters/social-weibo-adb/index.js +55 -0
- package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
- package/lib/adapters/social-xiaohongshu-adb/api-client.js +278 -0
- package/lib/adapters/social-xiaohongshu-adb/collector.js +158 -0
- package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
- package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
- package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
- package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
- package/lib/adapters/system-data-android/adapter.js +77 -3
- package/lib/adapters/travel-12306/index.js +215 -29
- package/lib/adapters/travel-amap/index.js +16 -10
- package/lib/adapters/travel-ctrip/index.js +25 -9
- package/lib/adapters/vscode/vscode-reader.js +7 -1
- package/lib/sign-providers/index.js +20 -0
- package/lib/sign-providers/interface.js +82 -0
- package/lib/sign-providers/null-sign-provider.js +30 -0
- package/package.json +6 -1
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 2a (Douyin C 路径 — 2026-05-25): douyin.pull-im-db ADB extension.
|
|
5
|
+
*
|
|
6
|
+
* Plugs into the `opts.extensions` slot of `createHostAdbBridge` /
|
|
7
|
+
* `createDesktopAdbBridge`. Pipeline:
|
|
8
|
+
*
|
|
9
|
+
* 1. ADB-ls `/data/data/com.ss.android.ugc.aweme/databases/` to find
|
|
10
|
+
* `<uid>_im.db` (19-digit numeric uid prefix) — abrignoni DFIR pattern
|
|
11
|
+
* 2. ADB pull the .db cohort (main + -wal + -shm) via base64 streaming
|
|
12
|
+
* (mirrors Bilibili Phase 1a — `su -c "base64 ..."` avoids MIUI FUSE
|
|
13
|
+
* SELinux trap)
|
|
14
|
+
* 3. Verify each file's SQLite magic header before returning
|
|
15
|
+
* 4. Return `{tempPath, uid, walPath?, shmPath?, extractedAt}` for the
|
|
16
|
+
* collector to feed into im-db-parser
|
|
17
|
+
*
|
|
18
|
+
* Bilibili Phase 1a uses base64 of a single file; Douyin needs the WAL/SHM
|
|
19
|
+
* cohort because the IM db is actively written by the chat thread —
|
|
20
|
+
* skipping WAL would lose the most-recent messages. We pull all 3 files
|
|
21
|
+
* and let the sqlite reader checkpoint them on open.
|
|
22
|
+
*
|
|
23
|
+
* Failure modes (throws on each; UI maps the typed error code to a banner):
|
|
24
|
+
* - DOUYIN_NOT_INSTALLED — databases/ dir doesn't exist
|
|
25
|
+
* - DOUYIN_NO_IM_DB — no `<uid>_im.db` matching the 19-digit pattern
|
|
26
|
+
* - DOUYIN_MULTIPLE_USERS — >1 IM dbs (multi-account; need explicit uid)
|
|
27
|
+
* - DOUYIN_NO_ROOT — su not available
|
|
28
|
+
* - DOUYIN_PULL_FAILED — base64 stream error
|
|
29
|
+
* - DOUYIN_NOT_SQLITE — pulled file lacks SQLite magic header
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
const fs = require("node:fs");
|
|
33
|
+
const path = require("node:path");
|
|
34
|
+
const os = require("node:os");
|
|
35
|
+
const crypto = require("node:crypto");
|
|
36
|
+
|
|
37
|
+
const DOUYIN_DB_REMOTE_DIR =
|
|
38
|
+
"/data/data/com.ss.android.ugc.aweme/databases";
|
|
39
|
+
|
|
40
|
+
const IM_DB_PATTERN = /^(\d{19})_im\.db$/;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* List candidate IM db filenames + uid via `adb shell su -c "ls databases/"`.
|
|
44
|
+
*
|
|
45
|
+
* Returns `{candidates: [{uid, fileName}], dirMissing: boolean}` so the
|
|
46
|
+
* caller can disambiguate "no Douyin installed" vs "Douyin installed but
|
|
47
|
+
* never logged in" vs "logged in to multiple accounts".
|
|
48
|
+
*/
|
|
49
|
+
async function listImDbs(adb, serial, opts) {
|
|
50
|
+
const adbOpts = { serial, timeoutMs: opts?.timeoutMs || 30_000 };
|
|
51
|
+
// ls returns "No such file or directory" to stdout when 2>/dev/null is
|
|
52
|
+
// appended (toybox ls behavior); we use a sentinel to disambiguate.
|
|
53
|
+
const lsOut = await adb(
|
|
54
|
+
[
|
|
55
|
+
"shell",
|
|
56
|
+
"su",
|
|
57
|
+
"-c",
|
|
58
|
+
`ls ${DOUYIN_DB_REMOTE_DIR} 2>/dev/null || echo __MISSING_DIR__`,
|
|
59
|
+
],
|
|
60
|
+
adbOpts,
|
|
61
|
+
);
|
|
62
|
+
const lines = lsOut.replace(/\r/g, "").trim().split(/\n/);
|
|
63
|
+
if (lines.length === 1 && lines[0] === "__MISSING_DIR__") {
|
|
64
|
+
return { candidates: [], dirMissing: true };
|
|
65
|
+
}
|
|
66
|
+
const candidates = [];
|
|
67
|
+
for (const line of lines) {
|
|
68
|
+
const fileName = line.trim();
|
|
69
|
+
if (!fileName) continue;
|
|
70
|
+
const m = fileName.match(IM_DB_PATTERN);
|
|
71
|
+
if (m) {
|
|
72
|
+
candidates.push({ uid: m[1], fileName });
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return { candidates, dirMissing: false };
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Pull a single file via `su -c "base64 ..." | tr -d '\n\r'` streaming.
|
|
80
|
+
* Mirrors Bilibili Phase 1a:pullCookiesViaSu — same trap-mitigation reasons.
|
|
81
|
+
*
|
|
82
|
+
* Returns the decoded bytes as a Buffer. Throws on:
|
|
83
|
+
* - ENOENT (file disappeared between ls and pull)
|
|
84
|
+
* - empty base64 stream
|
|
85
|
+
* - bad base64
|
|
86
|
+
* - sqlite magic header missing
|
|
87
|
+
* - decoded size < 1024 (truncation)
|
|
88
|
+
*/
|
|
89
|
+
async function pullFileViaSu(adb, serial, remotePath, opts) {
|
|
90
|
+
const adbOpts = { serial, timeoutMs: opts?.timeoutMs || 60_000 };
|
|
91
|
+
const b64 = await adb(
|
|
92
|
+
[
|
|
93
|
+
"shell",
|
|
94
|
+
"su",
|
|
95
|
+
"-c",
|
|
96
|
+
`base64 ${remotePath} 2>/dev/null | tr -d '\\n\\r'`,
|
|
97
|
+
],
|
|
98
|
+
adbOpts,
|
|
99
|
+
);
|
|
100
|
+
const b64Clean = b64.replace(/[\r\n\t ]+/g, "");
|
|
101
|
+
if (b64Clean.length === 0) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
`DOUYIN_PULL_FAILED: base64 stream of ${remotePath} returned 0 bytes (su exec may have silently failed)`,
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
let buf;
|
|
107
|
+
try {
|
|
108
|
+
buf = Buffer.from(b64Clean, "base64");
|
|
109
|
+
} catch (e) {
|
|
110
|
+
throw new Error(
|
|
111
|
+
`DOUYIN_PULL_FAILED: base64 decode failed for ${remotePath}: ${e.message || String(e)}`,
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
return buf;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Factory: returns an extension handler suitable for the `opts.extensions`
|
|
119
|
+
* map of `createHostAdbBridge` / `createDesktopAdbBridge`.
|
|
120
|
+
*
|
|
121
|
+
* const ext = createDouyinDbExtension();
|
|
122
|
+
* const bridge = createHostAdbBridge({ extensions: { "douyin.pull-im-db": ext } });
|
|
123
|
+
* const { tempPath, uid } = await bridge.invoke("douyin.pull-im-db");
|
|
124
|
+
*
|
|
125
|
+
* Params (all optional):
|
|
126
|
+
* - uid: prefer this specific uid when multiple `<uid>_im.db` exist on
|
|
127
|
+
* the device (defaults to throwing DOUYIN_MULTIPLE_USERS so the user
|
|
128
|
+
* picks one explicitly)
|
|
129
|
+
*
|
|
130
|
+
* @param {{timeoutMs?: number, onCleanupFailed?: (path: string) => void}} [factoryOpts]
|
|
131
|
+
* @returns {(params: object, ctx: object) => Promise<{tempPath, uid, walPath?, shmPath?, extractedAt}>}
|
|
132
|
+
*/
|
|
133
|
+
function createDouyinDbExtension(factoryOpts = {}) {
|
|
134
|
+
const timeoutMs = factoryOpts.timeoutMs || 60_000;
|
|
135
|
+
const onCleanupFailed = factoryOpts.onCleanupFailed || (() => {});
|
|
136
|
+
|
|
137
|
+
return async function douyinPullImDbHandler(params, ctx) {
|
|
138
|
+
if (
|
|
139
|
+
!ctx ||
|
|
140
|
+
typeof ctx.adb !== "function" ||
|
|
141
|
+
typeof ctx.pickDevice !== "function"
|
|
142
|
+
) {
|
|
143
|
+
throw new TypeError(
|
|
144
|
+
"douyin.pull-im-db: ctx must provide {adb, pickDevice}",
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
const serial = await ctx.pickDevice();
|
|
148
|
+
|
|
149
|
+
// Step 0: probe su availability — clearer error than "ls failed".
|
|
150
|
+
const idOut = await ctx.adb(
|
|
151
|
+
["shell", "su", "-c", "id -u"],
|
|
152
|
+
{ serial, timeoutMs },
|
|
153
|
+
);
|
|
154
|
+
const idLine = idOut.replace(/\r+$/gm, "").trim();
|
|
155
|
+
if (idLine !== "0" && !idLine.includes("uid=0")) {
|
|
156
|
+
throw new Error(
|
|
157
|
+
`DOUYIN_NO_ROOT: phone isn't rooted (su -c id -u returned \`${idLine.substring(0, 60)}\`). Douyin release APK isn't debuggable, so root is required to read /data/data/com.ss.android.ugc.aweme/databases/.`,
|
|
158
|
+
);
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
// Step 1: discover candidate IM dbs.
|
|
162
|
+
const { candidates, dirMissing } = await listImDbs(ctx.adb, serial, {
|
|
163
|
+
timeoutMs,
|
|
164
|
+
});
|
|
165
|
+
if (dirMissing) {
|
|
166
|
+
throw new Error(
|
|
167
|
+
"DOUYIN_NOT_INSTALLED: " +
|
|
168
|
+
DOUYIN_DB_REMOTE_DIR +
|
|
169
|
+
" does not exist. Install Douyin App on the phone, then retry.",
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
if (candidates.length === 0) {
|
|
173
|
+
throw new Error(
|
|
174
|
+
"DOUYIN_NO_IM_DB: no `<19-digit-uid>_im.db` found in databases/. Open the Douyin App + log in once + open any chat thread to materialize the IM database, then retry.",
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
let chosen;
|
|
178
|
+
const requestedUid = params && typeof params.uid === "string" ? params.uid : null;
|
|
179
|
+
if (requestedUid) {
|
|
180
|
+
chosen = candidates.find((c) => c.uid === requestedUid);
|
|
181
|
+
if (!chosen) {
|
|
182
|
+
throw new Error(
|
|
183
|
+
`DOUYIN_UID_NOT_FOUND: requested uid=${requestedUid} not in ${JSON.stringify(candidates.map((c) => c.uid))}`,
|
|
184
|
+
);
|
|
185
|
+
}
|
|
186
|
+
} else if (candidates.length === 1) {
|
|
187
|
+
chosen = candidates[0];
|
|
188
|
+
} else {
|
|
189
|
+
throw new Error(
|
|
190
|
+
`DOUYIN_MULTIPLE_USERS: multiple IM dbs found (${candidates.map((c) => c.uid).join(", ")}). Pass {uid: "<19-digit>"} to disambiguate.`,
|
|
191
|
+
);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// Step 2: pull the cohort (main + -wal + -shm).
|
|
195
|
+
// Brignoni's article notes the WAL sibling holds the most-recent
|
|
196
|
+
// messages — Douyin commits to WAL on send/receive but only
|
|
197
|
+
// checkpoints back to main on app idle. Skipping WAL loses the last
|
|
198
|
+
// ~hour of chat. Best-effort: WAL/SHM may not exist if app just
|
|
199
|
+
// checkpointed.
|
|
200
|
+
const remoteDb = `${DOUYIN_DB_REMOTE_DIR}/${chosen.fileName}`;
|
|
201
|
+
const remoteWal = remoteDb + "-wal";
|
|
202
|
+
const remoteShm = remoteDb + "-shm";
|
|
203
|
+
|
|
204
|
+
const tmpDir = os.tmpdir();
|
|
205
|
+
const tmpId = crypto.randomUUID();
|
|
206
|
+
const tempPath = path.join(tmpDir, `cc-douyin-im-${tmpId}.db`);
|
|
207
|
+
let walPath = null;
|
|
208
|
+
let shmPath = null;
|
|
209
|
+
|
|
210
|
+
const dbBuf = await pullFileViaSu(ctx.adb, serial, remoteDb, { timeoutMs });
|
|
211
|
+
// Magic check on the main file.
|
|
212
|
+
if (dbBuf.length < 1024) {
|
|
213
|
+
throw new Error(
|
|
214
|
+
`DOUYIN_PULL_FAILED: decoded ${remoteDb} is only ${dbBuf.length} bytes — expected ≥4KB sqlite. Possible MIUI silent su fail.`,
|
|
215
|
+
);
|
|
216
|
+
}
|
|
217
|
+
const magic = dbBuf.subarray(0, 16).toString("latin1");
|
|
218
|
+
if (!magic.startsWith("SQLite format 3")) {
|
|
219
|
+
throw new Error(
|
|
220
|
+
`DOUYIN_NOT_SQLITE: ${remoteDb} decoded but lacks 'SQLite format 3' magic header. Got: ${dbBuf.subarray(0, 16).toString("hex")}`,
|
|
221
|
+
);
|
|
222
|
+
}
|
|
223
|
+
fs.writeFileSync(tempPath, dbBuf);
|
|
224
|
+
|
|
225
|
+
// Best-effort: pull WAL+SHM if present. Errors here just skip — main
|
|
226
|
+
// db parses fine without them, only loses recent messages.
|
|
227
|
+
try {
|
|
228
|
+
const walBuf = await pullFileViaSu(ctx.adb, serial, remoteWal, {
|
|
229
|
+
timeoutMs,
|
|
230
|
+
});
|
|
231
|
+
if (walBuf.length > 0) {
|
|
232
|
+
walPath = path.join(tmpDir, `cc-douyin-im-${tmpId}.db-wal`);
|
|
233
|
+
fs.writeFileSync(walPath, walBuf);
|
|
234
|
+
}
|
|
235
|
+
} catch (_e) {
|
|
236
|
+
// No WAL — typical if app idle for >a few hours
|
|
237
|
+
}
|
|
238
|
+
try {
|
|
239
|
+
const shmBuf = await pullFileViaSu(ctx.adb, serial, remoteShm, {
|
|
240
|
+
timeoutMs,
|
|
241
|
+
});
|
|
242
|
+
if (shmBuf.length > 0) {
|
|
243
|
+
shmPath = path.join(tmpDir, `cc-douyin-im-${tmpId}.db-shm`);
|
|
244
|
+
fs.writeFileSync(shmPath, shmBuf);
|
|
245
|
+
}
|
|
246
|
+
} catch (_e) {
|
|
247
|
+
// No SHM — same as WAL
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
tempPath,
|
|
252
|
+
uid: chosen.uid,
|
|
253
|
+
walPath,
|
|
254
|
+
shmPath,
|
|
255
|
+
extractedAt: Date.now(),
|
|
256
|
+
// Caller is responsible for cleanup. We expose the cleanup helper
|
|
257
|
+
// separately so the caller can run it in a finally block.
|
|
258
|
+
cleanup() {
|
|
259
|
+
for (const p of [tempPath, walPath, shmPath]) {
|
|
260
|
+
if (!p) continue;
|
|
261
|
+
try {
|
|
262
|
+
fs.unlinkSync(p);
|
|
263
|
+
} catch (_e) {
|
|
264
|
+
onCleanupFailed(p);
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
},
|
|
268
|
+
};
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
module.exports = {
|
|
273
|
+
createDouyinDbExtension,
|
|
274
|
+
DOUYIN_DB_REMOTE_DIR,
|
|
275
|
+
IM_DB_PATTERN,
|
|
276
|
+
// Exposed for tests
|
|
277
|
+
_internals: {
|
|
278
|
+
listImDbs,
|
|
279
|
+
pullFileViaSu,
|
|
280
|
+
},
|
|
281
|
+
};
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 2a (Douyin C 路径 — 2026-05-25): Douyin IM sqlite parser.
|
|
5
|
+
*
|
|
6
|
+
* Parses the Douyin Android App's per-user IM sqlite:
|
|
7
|
+
* /data/data/com.ss.android.ugc.aweme/databases/<uid>_im.db
|
|
8
|
+
*
|
|
9
|
+
* Where `<uid>` is the 19-digit numeric Douyin UID (matches what the app
|
|
10
|
+
* shows in passport/account/info/v2 as `user_id`, not the secUid).
|
|
11
|
+
*
|
|
12
|
+
* Schema reference: Alexis Brignoni's TIKTOK DFIR SQL repo
|
|
13
|
+
* https://github.com/abrignoni/DFIR-SQL-Query-Repo/blob/master/Android/TIKTOK/TikTokMessages.sql
|
|
14
|
+
*
|
|
15
|
+
* Two tables we parse:
|
|
16
|
+
*
|
|
17
|
+
* msg
|
|
18
|
+
* sender INTEGER — sender UID (numeric, matches DB filename uid for self-sent)
|
|
19
|
+
* created_time INTEGER — Unix epoch milliseconds
|
|
20
|
+
* content TEXT — JSON: {text, display_name, url:{url_list:[...]}}
|
|
21
|
+
* read_status INTEGER
|
|
22
|
+
* local_info TEXT
|
|
23
|
+
* conversation_id TEXT — peer thread identifier
|
|
24
|
+
*
|
|
25
|
+
* SIMPLE_USER (contacts cache; mutual-follow visible)
|
|
26
|
+
* UID INTEGER
|
|
27
|
+
* short_id INTEGER
|
|
28
|
+
* name TEXT
|
|
29
|
+
* avatar_url TEXT
|
|
30
|
+
* follow_status INTEGER — 0/1/2 (none/following/mutual)
|
|
31
|
+
*
|
|
32
|
+
* Both tables are **unencrypted SQLite**. No SQLCipher. Douyin (and global
|
|
33
|
+
* TikTok) stores its IM db in plaintext on Android per multiple academic
|
|
34
|
+
* forensic studies (Brignoni 2018, ACM ARES 2020). This is the key
|
|
35
|
+
* differentiator from WeChat/QQ which need frida hooks for the key.
|
|
36
|
+
*
|
|
37
|
+
* What this parser DOES NOT do:
|
|
38
|
+
* - Decrypt encrypted message attachments (separate `attachment_<id>` files
|
|
39
|
+
* in the same dir; not in scope for v0.1)
|
|
40
|
+
* - Resolve sender UID → nickname (would need a JOIN to SIMPLE_USER; we
|
|
41
|
+
* emit both tables separately so the consumer can correlate)
|
|
42
|
+
* - Sticker / voice / video message content (the content JSON has type
|
|
43
|
+
* discriminators we ignore — only `text` is extracted; other types
|
|
44
|
+
* yield empty `text` field with the raw payload preserved)
|
|
45
|
+
*
|
|
46
|
+
* Test seam: callers can inject a synthetic `_databaseClass` to bypass the
|
|
47
|
+
* dual-load probe (Phase 1a chromium-cookies-reader pattern).
|
|
48
|
+
*/
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Dual-load: prefers bs3mc (Electron ABI 140 runtime), falls back to plain
|
|
52
|
+
* better-sqlite3 (Node ABI 127 test path). Same pattern as
|
|
53
|
+
* social-bilibili-adb/chromium-cookies-reader.js.
|
|
54
|
+
*/
|
|
55
|
+
function loadDatabaseClass() {
|
|
56
|
+
for (const mod of ["better-sqlite3-multiple-ciphers", "better-sqlite3"]) {
|
|
57
|
+
let cls;
|
|
58
|
+
try {
|
|
59
|
+
// eslint-disable-next-line global-require
|
|
60
|
+
cls = require(mod);
|
|
61
|
+
} catch (_e) {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
try {
|
|
65
|
+
const probe = new cls(":memory:");
|
|
66
|
+
probe.close();
|
|
67
|
+
return cls;
|
|
68
|
+
} catch (_e) {
|
|
69
|
+
// ABI mismatch — try next
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
throw new Error(
|
|
73
|
+
"douyin-im-db-parser: neither better-sqlite3-multiple-ciphers nor better-sqlite3 loaded — both ABI-mismatched",
|
|
74
|
+
);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Parse a content blob (TEXT column) for the user-visible text. The blob
|
|
79
|
+
* is JSON for most modern Douyin versions, but some legacy rows have the
|
|
80
|
+
* text directly. We try JSON first, fall back to the raw string.
|
|
81
|
+
*
|
|
82
|
+
* @param {string} blob raw content column value
|
|
83
|
+
* @returns {string|null} extracted text, or null if blob is empty/unparseable
|
|
84
|
+
*/
|
|
85
|
+
function extractTextFromContent(blob) {
|
|
86
|
+
if (typeof blob !== "string" || blob.length === 0) return null;
|
|
87
|
+
try {
|
|
88
|
+
const parsed = JSON.parse(blob);
|
|
89
|
+
if (parsed && typeof parsed === "object") {
|
|
90
|
+
// Modern shape: {text: "...", display_name: "...", url: {url_list: [...]}}
|
|
91
|
+
if (typeof parsed.text === "string") return parsed.text;
|
|
92
|
+
// Some versions wrap text in `content` nested
|
|
93
|
+
if (parsed.content && typeof parsed.content.text === "string") return parsed.content.text;
|
|
94
|
+
}
|
|
95
|
+
} catch (_e) {
|
|
96
|
+
// Not JSON — return the raw value (could be a legacy plaintext row)
|
|
97
|
+
return blob;
|
|
98
|
+
}
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* Parse the msg + SIMPLE_USER tables from a Douyin IM sqlite at [dbPath].
|
|
104
|
+
*
|
|
105
|
+
* Returns `{ messages, contacts, diagnostic }` where:
|
|
106
|
+
* - messages: Array<{senderUid, conversationId, createdTimeMs, text, readStatus, contentBlob}>
|
|
107
|
+
* - contacts: Array<{uid, shortId, name, avatarUrl, followStatus}>
|
|
108
|
+
* - diagnostic: {messageCount, contactCount, hadMsgTable, hadSimpleUserTable}
|
|
109
|
+
*
|
|
110
|
+
* If either table is missing (older Douyin version, or non-IM db opened
|
|
111
|
+
* by mistake), the missing array is empty + `hadXxxTable=false` so the
|
|
112
|
+
* caller can warn the user. Throws only when the db file itself isn't
|
|
113
|
+
* openable (file corrupted / wrong magic header).
|
|
114
|
+
*
|
|
115
|
+
* @param {string} dbPath absolute path to the IM sqlite db
|
|
116
|
+
* @param {{_databaseClass?: any, limitMessages?: number, limitContacts?: number}} [opts]
|
|
117
|
+
* @returns {{messages: Array, contacts: Array, diagnostic: object}}
|
|
118
|
+
*/
|
|
119
|
+
function parseImDb(dbPath, opts = {}) {
|
|
120
|
+
if (typeof dbPath !== "string" || dbPath.length === 0) {
|
|
121
|
+
throw new TypeError("parseImDb: dbPath must be a non-empty string");
|
|
122
|
+
}
|
|
123
|
+
const limitMessages =
|
|
124
|
+
Number.isInteger(opts.limitMessages) && opts.limitMessages > 0
|
|
125
|
+
? opts.limitMessages
|
|
126
|
+
: 10_000;
|
|
127
|
+
const limitContacts =
|
|
128
|
+
Number.isInteger(opts.limitContacts) && opts.limitContacts > 0
|
|
129
|
+
? opts.limitContacts
|
|
130
|
+
: 5_000;
|
|
131
|
+
const Database = opts._databaseClass || loadDatabaseClass();
|
|
132
|
+
const db = new Database(dbPath, { readonly: true });
|
|
133
|
+
const out = {
|
|
134
|
+
messages: [],
|
|
135
|
+
contacts: [],
|
|
136
|
+
diagnostic: {
|
|
137
|
+
messageCount: 0,
|
|
138
|
+
contactCount: 0,
|
|
139
|
+
hadMsgTable: false,
|
|
140
|
+
hadSimpleUserTable: false,
|
|
141
|
+
},
|
|
142
|
+
};
|
|
143
|
+
try {
|
|
144
|
+
// ─── msg table ───────────────────────────────────────────────────────
|
|
145
|
+
const msgTableInfo = trySelect(
|
|
146
|
+
db,
|
|
147
|
+
"PRAGMA table_info(msg)",
|
|
148
|
+
);
|
|
149
|
+
if (Array.isArray(msgTableInfo) && msgTableInfo.length > 0) {
|
|
150
|
+
out.diagnostic.hadMsgTable = true;
|
|
151
|
+
const columns = new Set(msgTableInfo.map((r) => r.name));
|
|
152
|
+
// Defensive column picker — Douyin app versions add/drop columns.
|
|
153
|
+
// We need: sender + created_time + content. Other fields nice-to-have.
|
|
154
|
+
const senderCol = pickCol(columns, ["sender", "from_user_id", "uid"]);
|
|
155
|
+
const timeCol = pickCol(columns, [
|
|
156
|
+
"created_time",
|
|
157
|
+
"create_time",
|
|
158
|
+
"created_at",
|
|
159
|
+
]);
|
|
160
|
+
const contentCol = pickCol(columns, ["content", "message_content"]);
|
|
161
|
+
const convCol = pickCol(columns, [
|
|
162
|
+
"conversation_id",
|
|
163
|
+
"conv_id",
|
|
164
|
+
"session_id",
|
|
165
|
+
]);
|
|
166
|
+
const readCol = pickCol(columns, ["read_status", "read", "is_read"]);
|
|
167
|
+
if (senderCol && timeCol && contentCol) {
|
|
168
|
+
const sql =
|
|
169
|
+
`SELECT ${senderCol} AS sender, ${timeCol} AS createdTime, ${contentCol} AS content` +
|
|
170
|
+
(convCol ? `, ${convCol} AS conversationId` : "") +
|
|
171
|
+
(readCol ? `, ${readCol} AS readStatus` : "") +
|
|
172
|
+
` FROM msg ORDER BY ${timeCol} DESC LIMIT ${limitMessages}`;
|
|
173
|
+
const rows = trySelect(db, sql) || [];
|
|
174
|
+
for (const r of rows) {
|
|
175
|
+
const createdTimeMs = normalizeEpochMs(r.createdTime);
|
|
176
|
+
out.messages.push({
|
|
177
|
+
senderUid:
|
|
178
|
+
typeof r.sender === "number"
|
|
179
|
+
? String(r.sender)
|
|
180
|
+
: r.sender != null
|
|
181
|
+
? String(r.sender)
|
|
182
|
+
: null,
|
|
183
|
+
conversationId: r.conversationId ? String(r.conversationId) : null,
|
|
184
|
+
createdTimeMs,
|
|
185
|
+
text: extractTextFromContent(r.content),
|
|
186
|
+
readStatus:
|
|
187
|
+
typeof r.readStatus === "number" ? r.readStatus : null,
|
|
188
|
+
contentBlob: typeof r.content === "string" ? r.content : null,
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
out.diagnostic.messageCount = out.messages.length;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ─── SIMPLE_USER table ───────────────────────────────────────────────
|
|
196
|
+
const userTableInfo = trySelect(
|
|
197
|
+
db,
|
|
198
|
+
"PRAGMA table_info(SIMPLE_USER)",
|
|
199
|
+
);
|
|
200
|
+
if (Array.isArray(userTableInfo) && userTableInfo.length > 0) {
|
|
201
|
+
out.diagnostic.hadSimpleUserTable = true;
|
|
202
|
+
const columns = new Set(userTableInfo.map((r) => r.name));
|
|
203
|
+
const uidCol = pickCol(columns, ["UID", "uid", "user_id"]);
|
|
204
|
+
const shortIdCol = pickCol(columns, ["short_id", "shortId", "ShortId"]);
|
|
205
|
+
const nameCol = pickCol(columns, ["name", "nick_name", "nickname"]);
|
|
206
|
+
const avatarCol = pickCol(columns, ["avatar_url", "avatarUrl", "avatar"]);
|
|
207
|
+
const followCol = pickCol(columns, [
|
|
208
|
+
"follow_status",
|
|
209
|
+
"followStatus",
|
|
210
|
+
"follow_state",
|
|
211
|
+
]);
|
|
212
|
+
if (uidCol) {
|
|
213
|
+
const fields = [`${uidCol} AS uid`];
|
|
214
|
+
if (shortIdCol) fields.push(`${shortIdCol} AS shortId`);
|
|
215
|
+
if (nameCol) fields.push(`${nameCol} AS name`);
|
|
216
|
+
if (avatarCol) fields.push(`${avatarCol} AS avatarUrl`);
|
|
217
|
+
if (followCol) fields.push(`${followCol} AS followStatus`);
|
|
218
|
+
const sql = `SELECT ${fields.join(", ")} FROM SIMPLE_USER LIMIT ${limitContacts}`;
|
|
219
|
+
const rows = trySelect(db, sql) || [];
|
|
220
|
+
for (const r of rows) {
|
|
221
|
+
out.contacts.push({
|
|
222
|
+
uid: r.uid != null ? String(r.uid) : null,
|
|
223
|
+
shortId: r.shortId != null ? String(r.shortId) : null,
|
|
224
|
+
name: r.name || null,
|
|
225
|
+
avatarUrl: r.avatarUrl || null,
|
|
226
|
+
followStatus:
|
|
227
|
+
typeof r.followStatus === "number" ? r.followStatus : null,
|
|
228
|
+
});
|
|
229
|
+
}
|
|
230
|
+
out.diagnostic.contactCount = out.contacts.length;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
} finally {
|
|
234
|
+
db.close();
|
|
235
|
+
}
|
|
236
|
+
return out;
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
/**
|
|
240
|
+
* Normalize various epoch units to ms. Douyin sometimes writes seconds,
|
|
241
|
+
* sometimes microseconds, sometimes ms. Heuristic: 13-digit = ms,
|
|
242
|
+
* 10-digit = seconds, 16-digit = microseconds.
|
|
243
|
+
*/
|
|
244
|
+
function normalizeEpochMs(v) {
|
|
245
|
+
if (typeof v !== "number" || !Number.isFinite(v) || v <= 0) return null;
|
|
246
|
+
// > 1e16 µs → / 1000
|
|
247
|
+
if (v > 1e15) return Math.floor(v / 1000);
|
|
248
|
+
// > 1e12 ms → keep
|
|
249
|
+
if (v > 1e12) return Math.floor(v);
|
|
250
|
+
// <= 1e12 seconds → × 1000
|
|
251
|
+
return Math.floor(v * 1000);
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
/**
|
|
255
|
+
* Try a SELECT; return the row array on success or null on any error
|
|
256
|
+
* (missing table / syntax error / driver throw). Mirrors social-bilibili
|
|
257
|
+
* adapter.js:trySelect.
|
|
258
|
+
*/
|
|
259
|
+
function trySelect(db, sql) {
|
|
260
|
+
try {
|
|
261
|
+
return db.prepare(sql).all();
|
|
262
|
+
} catch (_e) {
|
|
263
|
+
return null;
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
/**
|
|
268
|
+
* Return the first column in [candidates] that exists in [columns], or
|
|
269
|
+
* null. Used to handle Douyin's schema drift across versions.
|
|
270
|
+
*/
|
|
271
|
+
function pickCol(columns, candidates) {
|
|
272
|
+
for (const c of candidates) {
|
|
273
|
+
if (columns.has(c)) return c;
|
|
274
|
+
}
|
|
275
|
+
return null;
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
module.exports = {
|
|
279
|
+
parseImDb,
|
|
280
|
+
// Exposed for tests
|
|
281
|
+
_internals: {
|
|
282
|
+
loadDatabaseClass,
|
|
283
|
+
extractTextFromContent,
|
|
284
|
+
normalizeEpochMs,
|
|
285
|
+
pickCol,
|
|
286
|
+
},
|
|
287
|
+
};
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* social-douyin-adb — Phase 2 (Douyin C 路径) entry.
|
|
5
|
+
*
|
|
6
|
+
* Phase 2a (this commit) — desktop ADB-based IM db extraction:
|
|
7
|
+
* - douyin.pull-im-db extension pulls <uid>_im.db cohort to host
|
|
8
|
+
* - parseImDb parse msg + SIMPLE_USER tables
|
|
9
|
+
* - buildSnapshot → schemaVersion=1 events JSON
|
|
10
|
+
* - collect / collectAndSync orchestrator
|
|
11
|
+
*
|
|
12
|
+
* Phase 2b (next) — Android Kotlin B path (in-APK root):
|
|
13
|
+
* - reuse Phase B0 LocalRootCollector / BaseRootCredentialsStore /
|
|
14
|
+
* RootShellRunner / DbCohortCopier scaffold
|
|
15
|
+
* - libmsaoaidsec.so frida bypass for the anti-debug TracerPid check
|
|
16
|
+
*
|
|
17
|
+
* Pipeline (C path):
|
|
18
|
+
* bridge.invoke("douyin.pull-im-db")
|
|
19
|
+
* → parseImDb(tempPath)
|
|
20
|
+
* → buildSnapshot + writeSnapshotJson
|
|
21
|
+
* → registry.syncAdapter("social-douyin", { inputPath })
|
|
22
|
+
*
|
|
23
|
+
* Reuses the existing `social-douyin` adapter's snapshot mode — no 2nd
|
|
24
|
+
* adapter, same vault schema / dedup / event types. Phase 2a extended
|
|
25
|
+
* VALID_SNAPSHOT_KINDS in social-douyin/index.js to include `message` +
|
|
26
|
+
* `contact` for the abrignoni-DFIR-parsed IM tables.
|
|
27
|
+
*/
|
|
28
|
+
|
|
29
|
+
const {
|
|
30
|
+
createDouyinDbExtension,
|
|
31
|
+
DOUYIN_DB_REMOTE_DIR,
|
|
32
|
+
IM_DB_PATTERN,
|
|
33
|
+
} = require("./db-extension");
|
|
34
|
+
const { parseImDb } = require("./im-db-parser");
|
|
35
|
+
const {
|
|
36
|
+
buildSnapshot,
|
|
37
|
+
writeSnapshotJson,
|
|
38
|
+
cleanupSnapshotJson,
|
|
39
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
40
|
+
} = require("./snapshot-builder");
|
|
41
|
+
const { collect, collectAndSync } = require("./collector");
|
|
42
|
+
|
|
43
|
+
module.exports = {
|
|
44
|
+
// Extension factory (wiring registers this on the bridge)
|
|
45
|
+
createDouyinDbExtension,
|
|
46
|
+
DOUYIN_DB_REMOTE_DIR,
|
|
47
|
+
IM_DB_PATTERN,
|
|
48
|
+
// Parser + builder (also exposed for advanced callers / tests)
|
|
49
|
+
parseImDb,
|
|
50
|
+
buildSnapshot,
|
|
51
|
+
writeSnapshotJson,
|
|
52
|
+
cleanupSnapshotJson,
|
|
53
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
54
|
+
// Collector orchestrator
|
|
55
|
+
collect,
|
|
56
|
+
collectAndSync,
|
|
57
|
+
};
|