@chainlesschain/personal-data-hub 0.3.0 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
  2. package/__tests__/adapters/email-adapter.test.js +1 -1
  3. package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
  4. package/__tests__/adapters/email-retry-progress.test.js +1 -1
  5. package/__tests__/adapters/email-templates.test.js +1 -1
  6. package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
  7. package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
  8. package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
  9. package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
  10. package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
  11. package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
  12. package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
  13. package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
  14. package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
  15. package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
  16. package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
  17. package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
  18. package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
  19. package/__tests__/adapters/system-data-android.test.js +32 -1
  20. package/__tests__/longtail-adapters.test.js +15 -2
  21. package/__tests__/shopping-adapters.test.js +96 -0
  22. package/__tests__/sign-providers.test.js +62 -0
  23. package/__tests__/travel-adapters.test.js +163 -5
  24. package/__tests__/whatsapp-adapter.test.js +5 -2
  25. package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
  26. package/lib/adapters/email-imap/email-adapter.js +224 -17
  27. package/lib/adapters/messaging-telegram/index.js +15 -12
  28. package/lib/adapters/messaging-whatsapp/index.js +15 -12
  29. package/lib/adapters/shopping-taobao/index.js +161 -21
  30. package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
  31. package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
  32. package/lib/adapters/social-bilibili-adb/collector.js +190 -0
  33. package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
  34. package/lib/adapters/social-bilibili-adb/index.js +51 -0
  35. package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
  36. package/lib/adapters/social-douyin/index.js +4 -0
  37. package/lib/adapters/social-douyin-adb/collector.js +165 -0
  38. package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
  39. package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
  40. package/lib/adapters/social-douyin-adb/index.js +57 -0
  41. package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
  42. package/lib/adapters/social-weibo-adb/api-client.js +281 -0
  43. package/lib/adapters/social-weibo-adb/collector.js +169 -0
  44. package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
  45. package/lib/adapters/social-weibo-adb/index.js +55 -0
  46. package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
  47. package/lib/adapters/social-xiaohongshu-adb/api-client.js +278 -0
  48. package/lib/adapters/social-xiaohongshu-adb/collector.js +158 -0
  49. package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
  50. package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
  51. package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
  52. package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
  53. package/lib/adapters/system-data-android/adapter.js +77 -3
  54. package/lib/adapters/travel-12306/index.js +215 -29
  55. package/lib/adapters/travel-amap/index.js +16 -10
  56. package/lib/adapters/travel-ctrip/index.js +25 -9
  57. package/lib/adapters/vscode/vscode-reader.js +7 -1
  58. package/lib/sign-providers/index.js +20 -0
  59. package/lib/sign-providers/interface.js +82 -0
  60. package/lib/sign-providers/null-sign-provider.js +30 -0
  61. package/package.json +6 -1
@@ -0,0 +1,281 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * Phase 2a (Douyin C 路径 — 2026-05-25): douyin.pull-im-db ADB extension.
5
+ *
6
+ * Plugs into the `opts.extensions` slot of `createHostAdbBridge` /
7
+ * `createDesktopAdbBridge`. Pipeline:
8
+ *
9
+ * 1. ADB-ls `/data/data/com.ss.android.ugc.aweme/databases/` to find
10
+ * `<uid>_im.db` (19-digit numeric uid prefix) — abrignoni DFIR pattern
11
+ * 2. ADB pull the .db cohort (main + -wal + -shm) via base64 streaming
12
+ * (mirrors Bilibili Phase 1a — `su -c "base64 ..."` avoids MIUI FUSE
13
+ * SELinux trap)
14
+ * 3. Verify each file's SQLite magic header before returning
15
+ * 4. Return `{tempPath, uid, walPath?, shmPath?, extractedAt}` for the
16
+ * collector to feed into im-db-parser
17
+ *
18
+ * Bilibili Phase 1a uses base64 of a single file; Douyin needs the WAL/SHM
19
+ * cohort because the IM db is actively written by the chat thread —
20
+ * skipping WAL would lose the most-recent messages. We pull all 3 files
21
+ * and let the sqlite reader checkpoint them on open.
22
+ *
23
+ * Failure modes (throws on each; UI maps the typed error code to a banner):
24
+ * - DOUYIN_NOT_INSTALLED — databases/ dir doesn't exist
25
+ * - DOUYIN_NO_IM_DB — no `<uid>_im.db` matching the 19-digit pattern
26
+ * - DOUYIN_MULTIPLE_USERS — >1 IM dbs (multi-account; need explicit uid)
27
+ * - DOUYIN_NO_ROOT — su not available
28
+ * - DOUYIN_PULL_FAILED — base64 stream error
29
+ * - DOUYIN_NOT_SQLITE — pulled file lacks SQLite magic header
30
+ */
31
+
32
+ const fs = require("node:fs");
33
+ const path = require("node:path");
34
+ const os = require("node:os");
35
+ const crypto = require("node:crypto");
36
+
37
+ const DOUYIN_DB_REMOTE_DIR =
38
+ "/data/data/com.ss.android.ugc.aweme/databases";
39
+
40
+ const IM_DB_PATTERN = /^(\d{19})_im\.db$/;
41
+
42
+ /**
43
+ * List candidate IM db filenames + uid via `adb shell su -c "ls databases/"`.
44
+ *
45
+ * Returns `{candidates: [{uid, fileName}], dirMissing: boolean}` so the
46
+ * caller can disambiguate "no Douyin installed" vs "Douyin installed but
47
+ * never logged in" vs "logged in to multiple accounts".
48
+ */
49
+ async function listImDbs(adb, serial, opts) {
50
+ const adbOpts = { serial, timeoutMs: opts?.timeoutMs || 30_000 };
51
+ // ls returns "No such file or directory" to stdout when 2>/dev/null is
52
+ // appended (toybox ls behavior); we use a sentinel to disambiguate.
53
+ const lsOut = await adb(
54
+ [
55
+ "shell",
56
+ "su",
57
+ "-c",
58
+ `ls ${DOUYIN_DB_REMOTE_DIR} 2>/dev/null || echo __MISSING_DIR__`,
59
+ ],
60
+ adbOpts,
61
+ );
62
+ const lines = lsOut.replace(/\r/g, "").trim().split(/\n/);
63
+ if (lines.length === 1 && lines[0] === "__MISSING_DIR__") {
64
+ return { candidates: [], dirMissing: true };
65
+ }
66
+ const candidates = [];
67
+ for (const line of lines) {
68
+ const fileName = line.trim();
69
+ if (!fileName) continue;
70
+ const m = fileName.match(IM_DB_PATTERN);
71
+ if (m) {
72
+ candidates.push({ uid: m[1], fileName });
73
+ }
74
+ }
75
+ return { candidates, dirMissing: false };
76
+ }
77
+
78
+ /**
79
+ * Pull a single file via `su -c "base64 ..." | tr -d '\n\r'` streaming.
80
+ * Mirrors Bilibili Phase 1a:pullCookiesViaSu — same trap-mitigation reasons.
81
+ *
82
+ * Returns the decoded bytes as a Buffer. Throws on:
83
+ * - ENOENT (file disappeared between ls and pull)
84
+ * - empty base64 stream
85
+ * - bad base64
86
+ * - sqlite magic header missing
87
+ * - decoded size < 1024 (truncation)
88
+ */
89
+ async function pullFileViaSu(adb, serial, remotePath, opts) {
90
+ const adbOpts = { serial, timeoutMs: opts?.timeoutMs || 60_000 };
91
+ const b64 = await adb(
92
+ [
93
+ "shell",
94
+ "su",
95
+ "-c",
96
+ `base64 ${remotePath} 2>/dev/null | tr -d '\\n\\r'`,
97
+ ],
98
+ adbOpts,
99
+ );
100
+ const b64Clean = b64.replace(/[\r\n\t ]+/g, "");
101
+ if (b64Clean.length === 0) {
102
+ throw new Error(
103
+ `DOUYIN_PULL_FAILED: base64 stream of ${remotePath} returned 0 bytes (su exec may have silently failed)`,
104
+ );
105
+ }
106
+ let buf;
107
+ try {
108
+ buf = Buffer.from(b64Clean, "base64");
109
+ } catch (e) {
110
+ throw new Error(
111
+ `DOUYIN_PULL_FAILED: base64 decode failed for ${remotePath}: ${e.message || String(e)}`,
112
+ );
113
+ }
114
+ return buf;
115
+ }
116
+
117
+ /**
118
+ * Factory: returns an extension handler suitable for the `opts.extensions`
119
+ * map of `createHostAdbBridge` / `createDesktopAdbBridge`.
120
+ *
121
+ * const ext = createDouyinDbExtension();
122
+ * const bridge = createHostAdbBridge({ extensions: { "douyin.pull-im-db": ext } });
123
+ * const { tempPath, uid } = await bridge.invoke("douyin.pull-im-db");
124
+ *
125
+ * Params (all optional):
126
+ * - uid: prefer this specific uid when multiple `<uid>_im.db` exist on
127
+ * the device (defaults to throwing DOUYIN_MULTIPLE_USERS so the user
128
+ * picks one explicitly)
129
+ *
130
+ * @param {{timeoutMs?: number, onCleanupFailed?: (path: string) => void}} [factoryOpts]
131
+ * @returns {(params: object, ctx: object) => Promise<{tempPath, uid, walPath?, shmPath?, extractedAt}>}
132
+ */
133
+ function createDouyinDbExtension(factoryOpts = {}) {
134
+ const timeoutMs = factoryOpts.timeoutMs || 60_000;
135
+ const onCleanupFailed = factoryOpts.onCleanupFailed || (() => {});
136
+
137
+ return async function douyinPullImDbHandler(params, ctx) {
138
+ if (
139
+ !ctx ||
140
+ typeof ctx.adb !== "function" ||
141
+ typeof ctx.pickDevice !== "function"
142
+ ) {
143
+ throw new TypeError(
144
+ "douyin.pull-im-db: ctx must provide {adb, pickDevice}",
145
+ );
146
+ }
147
+ const serial = await ctx.pickDevice();
148
+
149
+ // Step 0: probe su availability — clearer error than "ls failed".
150
+ const idOut = await ctx.adb(
151
+ ["shell", "su", "-c", "id -u"],
152
+ { serial, timeoutMs },
153
+ );
154
+ const idLine = idOut.replace(/\r+$/gm, "").trim();
155
+ if (idLine !== "0" && !idLine.includes("uid=0")) {
156
+ throw new Error(
157
+ `DOUYIN_NO_ROOT: phone isn't rooted (su -c id -u returned \`${idLine.substring(0, 60)}\`). Douyin release APK isn't debuggable, so root is required to read /data/data/com.ss.android.ugc.aweme/databases/.`,
158
+ );
159
+ }
160
+
161
+ // Step 1: discover candidate IM dbs.
162
+ const { candidates, dirMissing } = await listImDbs(ctx.adb, serial, {
163
+ timeoutMs,
164
+ });
165
+ if (dirMissing) {
166
+ throw new Error(
167
+ "DOUYIN_NOT_INSTALLED: " +
168
+ DOUYIN_DB_REMOTE_DIR +
169
+ " does not exist. Install Douyin App on the phone, then retry.",
170
+ );
171
+ }
172
+ if (candidates.length === 0) {
173
+ throw new Error(
174
+ "DOUYIN_NO_IM_DB: no `<19-digit-uid>_im.db` found in databases/. Open the Douyin App + log in once + open any chat thread to materialize the IM database, then retry.",
175
+ );
176
+ }
177
+ let chosen;
178
+ const requestedUid = params && typeof params.uid === "string" ? params.uid : null;
179
+ if (requestedUid) {
180
+ chosen = candidates.find((c) => c.uid === requestedUid);
181
+ if (!chosen) {
182
+ throw new Error(
183
+ `DOUYIN_UID_NOT_FOUND: requested uid=${requestedUid} not in ${JSON.stringify(candidates.map((c) => c.uid))}`,
184
+ );
185
+ }
186
+ } else if (candidates.length === 1) {
187
+ chosen = candidates[0];
188
+ } else {
189
+ throw new Error(
190
+ `DOUYIN_MULTIPLE_USERS: multiple IM dbs found (${candidates.map((c) => c.uid).join(", ")}). Pass {uid: "<19-digit>"} to disambiguate.`,
191
+ );
192
+ }
193
+
194
+ // Step 2: pull the cohort (main + -wal + -shm).
195
+ // Brignoni's article notes the WAL sibling holds the most-recent
196
+ // messages — Douyin commits to WAL on send/receive but only
197
+ // checkpoints back to main on app idle. Skipping WAL loses the last
198
+ // ~hour of chat. Best-effort: WAL/SHM may not exist if app just
199
+ // checkpointed.
200
+ const remoteDb = `${DOUYIN_DB_REMOTE_DIR}/${chosen.fileName}`;
201
+ const remoteWal = remoteDb + "-wal";
202
+ const remoteShm = remoteDb + "-shm";
203
+
204
+ const tmpDir = os.tmpdir();
205
+ const tmpId = crypto.randomUUID();
206
+ const tempPath = path.join(tmpDir, `cc-douyin-im-${tmpId}.db`);
207
+ let walPath = null;
208
+ let shmPath = null;
209
+
210
+ const dbBuf = await pullFileViaSu(ctx.adb, serial, remoteDb, { timeoutMs });
211
+ // Magic check on the main file.
212
+ if (dbBuf.length < 1024) {
213
+ throw new Error(
214
+ `DOUYIN_PULL_FAILED: decoded ${remoteDb} is only ${dbBuf.length} bytes — expected ≥4KB sqlite. Possible MIUI silent su fail.`,
215
+ );
216
+ }
217
+ const magic = dbBuf.subarray(0, 16).toString("latin1");
218
+ if (!magic.startsWith("SQLite format 3")) {
219
+ throw new Error(
220
+ `DOUYIN_NOT_SQLITE: ${remoteDb} decoded but lacks 'SQLite format 3' magic header. Got: ${dbBuf.subarray(0, 16).toString("hex")}`,
221
+ );
222
+ }
223
+ fs.writeFileSync(tempPath, dbBuf);
224
+
225
+ // Best-effort: pull WAL+SHM if present. Errors here just skip — main
226
+ // db parses fine without them, only loses recent messages.
227
+ try {
228
+ const walBuf = await pullFileViaSu(ctx.adb, serial, remoteWal, {
229
+ timeoutMs,
230
+ });
231
+ if (walBuf.length > 0) {
232
+ walPath = path.join(tmpDir, `cc-douyin-im-${tmpId}.db-wal`);
233
+ fs.writeFileSync(walPath, walBuf);
234
+ }
235
+ } catch (_e) {
236
+ // No WAL — typical if app idle for >a few hours
237
+ }
238
+ try {
239
+ const shmBuf = await pullFileViaSu(ctx.adb, serial, remoteShm, {
240
+ timeoutMs,
241
+ });
242
+ if (shmBuf.length > 0) {
243
+ shmPath = path.join(tmpDir, `cc-douyin-im-${tmpId}.db-shm`);
244
+ fs.writeFileSync(shmPath, shmBuf);
245
+ }
246
+ } catch (_e) {
247
+ // No SHM — same as WAL
248
+ }
249
+
250
+ return {
251
+ tempPath,
252
+ uid: chosen.uid,
253
+ walPath,
254
+ shmPath,
255
+ extractedAt: Date.now(),
256
+ // Caller is responsible for cleanup. We expose the cleanup helper
257
+ // separately so the caller can run it in a finally block.
258
+ cleanup() {
259
+ for (const p of [tempPath, walPath, shmPath]) {
260
+ if (!p) continue;
261
+ try {
262
+ fs.unlinkSync(p);
263
+ } catch (_e) {
264
+ onCleanupFailed(p);
265
+ }
266
+ }
267
+ },
268
+ };
269
+ };
270
+ }
271
+
272
+ module.exports = {
273
+ createDouyinDbExtension,
274
+ DOUYIN_DB_REMOTE_DIR,
275
+ IM_DB_PATTERN,
276
+ // Exposed for tests
277
+ _internals: {
278
+ listImDbs,
279
+ pullFileViaSu,
280
+ },
281
+ };
@@ -0,0 +1,287 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * Phase 2a (Douyin C 路径 — 2026-05-25): Douyin IM sqlite parser.
5
+ *
6
+ * Parses the Douyin Android App's per-user IM sqlite:
7
+ * /data/data/com.ss.android.ugc.aweme/databases/<uid>_im.db
8
+ *
9
+ * Where `<uid>` is the 19-digit numeric Douyin UID (matches what the app
10
+ * shows in passport/account/info/v2 as `user_id`, not the secUid).
11
+ *
12
+ * Schema reference: Alexis Brignoni's TIKTOK DFIR SQL repo
13
+ * https://github.com/abrignoni/DFIR-SQL-Query-Repo/blob/master/Android/TIKTOK/TikTokMessages.sql
14
+ *
15
+ * Two tables we parse:
16
+ *
17
+ * msg
18
+ * sender INTEGER — sender UID (numeric, matches DB filename uid for self-sent)
19
+ * created_time INTEGER — Unix epoch milliseconds
20
+ * content TEXT — JSON: {text, display_name, url:{url_list:[...]}}
21
+ * read_status INTEGER
22
+ * local_info TEXT
23
+ * conversation_id TEXT — peer thread identifier
24
+ *
25
+ * SIMPLE_USER (contacts cache; mutual-follow visible)
26
+ * UID INTEGER
27
+ * short_id INTEGER
28
+ * name TEXT
29
+ * avatar_url TEXT
30
+ * follow_status INTEGER — 0/1/2 (none/following/mutual)
31
+ *
32
+ * Both tables are **unencrypted SQLite**. No SQLCipher. Douyin (and global
33
+ * TikTok) stores its IM db in plaintext on Android per multiple academic
34
+ * forensic studies (Brignoni 2018, ACM ARES 2020). This is the key
35
+ * differentiator from WeChat/QQ which need frida hooks for the key.
36
+ *
37
+ * What this parser DOES NOT do:
38
+ * - Decrypt encrypted message attachments (separate `attachment_<id>` files
39
+ * in the same dir; not in scope for v0.1)
40
+ * - Resolve sender UID → nickname (would need a JOIN to SIMPLE_USER; we
41
+ * emit both tables separately so the consumer can correlate)
42
+ * - Sticker / voice / video message content (the content JSON has type
43
+ * discriminators we ignore — only `text` is extracted; other types
44
+ * yield empty `text` field with the raw payload preserved)
45
+ *
46
+ * Test seam: callers can inject a synthetic `_databaseClass` to bypass the
47
+ * dual-load probe (Phase 1a chromium-cookies-reader pattern).
48
+ */
49
+
50
+ /**
51
+ * Dual-load: prefers bs3mc (Electron ABI 140 runtime), falls back to plain
52
+ * better-sqlite3 (Node ABI 127 test path). Same pattern as
53
+ * social-bilibili-adb/chromium-cookies-reader.js.
54
+ */
55
+ function loadDatabaseClass() {
56
+ for (const mod of ["better-sqlite3-multiple-ciphers", "better-sqlite3"]) {
57
+ let cls;
58
+ try {
59
+ // eslint-disable-next-line global-require
60
+ cls = require(mod);
61
+ } catch (_e) {
62
+ continue;
63
+ }
64
+ try {
65
+ const probe = new cls(":memory:");
66
+ probe.close();
67
+ return cls;
68
+ } catch (_e) {
69
+ // ABI mismatch — try next
70
+ }
71
+ }
72
+ throw new Error(
73
+ "douyin-im-db-parser: neither better-sqlite3-multiple-ciphers nor better-sqlite3 loaded — both ABI-mismatched",
74
+ );
75
+ }
76
+
77
+ /**
78
+ * Parse a content blob (TEXT column) for the user-visible text. The blob
79
+ * is JSON for most modern Douyin versions, but some legacy rows have the
80
+ * text directly. We try JSON first, fall back to the raw string.
81
+ *
82
+ * @param {string} blob raw content column value
83
+ * @returns {string|null} extracted text, or null if blob is empty/unparseable
84
+ */
85
+ function extractTextFromContent(blob) {
86
+ if (typeof blob !== "string" || blob.length === 0) return null;
87
+ try {
88
+ const parsed = JSON.parse(blob);
89
+ if (parsed && typeof parsed === "object") {
90
+ // Modern shape: {text: "...", display_name: "...", url: {url_list: [...]}}
91
+ if (typeof parsed.text === "string") return parsed.text;
92
+ // Some versions wrap text in `content` nested
93
+ if (parsed.content && typeof parsed.content.text === "string") return parsed.content.text;
94
+ }
95
+ } catch (_e) {
96
+ // Not JSON — return the raw value (could be a legacy plaintext row)
97
+ return blob;
98
+ }
99
+ return null;
100
+ }
101
+
102
+ /**
103
+ * Parse the msg + SIMPLE_USER tables from a Douyin IM sqlite at [dbPath].
104
+ *
105
+ * Returns `{ messages, contacts, diagnostic }` where:
106
+ * - messages: Array<{senderUid, conversationId, createdTimeMs, text, readStatus, contentBlob}>
107
+ * - contacts: Array<{uid, shortId, name, avatarUrl, followStatus}>
108
+ * - diagnostic: {messageCount, contactCount, hadMsgTable, hadSimpleUserTable}
109
+ *
110
+ * If either table is missing (older Douyin version, or non-IM db opened
111
+ * by mistake), the missing array is empty + `hadXxxTable=false` so the
112
+ * caller can warn the user. Throws only when the db file itself isn't
113
+ * openable (file corrupted / wrong magic header).
114
+ *
115
+ * @param {string} dbPath absolute path to the IM sqlite db
116
+ * @param {{_databaseClass?: any, limitMessages?: number, limitContacts?: number}} [opts]
117
+ * @returns {{messages: Array, contacts: Array, diagnostic: object}}
118
+ */
119
+ function parseImDb(dbPath, opts = {}) {
120
+ if (typeof dbPath !== "string" || dbPath.length === 0) {
121
+ throw new TypeError("parseImDb: dbPath must be a non-empty string");
122
+ }
123
+ const limitMessages =
124
+ Number.isInteger(opts.limitMessages) && opts.limitMessages > 0
125
+ ? opts.limitMessages
126
+ : 10_000;
127
+ const limitContacts =
128
+ Number.isInteger(opts.limitContacts) && opts.limitContacts > 0
129
+ ? opts.limitContacts
130
+ : 5_000;
131
+ const Database = opts._databaseClass || loadDatabaseClass();
132
+ const db = new Database(dbPath, { readonly: true });
133
+ const out = {
134
+ messages: [],
135
+ contacts: [],
136
+ diagnostic: {
137
+ messageCount: 0,
138
+ contactCount: 0,
139
+ hadMsgTable: false,
140
+ hadSimpleUserTable: false,
141
+ },
142
+ };
143
+ try {
144
+ // ─── msg table ───────────────────────────────────────────────────────
145
+ const msgTableInfo = trySelect(
146
+ db,
147
+ "PRAGMA table_info(msg)",
148
+ );
149
+ if (Array.isArray(msgTableInfo) && msgTableInfo.length > 0) {
150
+ out.diagnostic.hadMsgTable = true;
151
+ const columns = new Set(msgTableInfo.map((r) => r.name));
152
+ // Defensive column picker — Douyin app versions add/drop columns.
153
+ // We need: sender + created_time + content. Other fields nice-to-have.
154
+ const senderCol = pickCol(columns, ["sender", "from_user_id", "uid"]);
155
+ const timeCol = pickCol(columns, [
156
+ "created_time",
157
+ "create_time",
158
+ "created_at",
159
+ ]);
160
+ const contentCol = pickCol(columns, ["content", "message_content"]);
161
+ const convCol = pickCol(columns, [
162
+ "conversation_id",
163
+ "conv_id",
164
+ "session_id",
165
+ ]);
166
+ const readCol = pickCol(columns, ["read_status", "read", "is_read"]);
167
+ if (senderCol && timeCol && contentCol) {
168
+ const sql =
169
+ `SELECT ${senderCol} AS sender, ${timeCol} AS createdTime, ${contentCol} AS content` +
170
+ (convCol ? `, ${convCol} AS conversationId` : "") +
171
+ (readCol ? `, ${readCol} AS readStatus` : "") +
172
+ ` FROM msg ORDER BY ${timeCol} DESC LIMIT ${limitMessages}`;
173
+ const rows = trySelect(db, sql) || [];
174
+ for (const r of rows) {
175
+ const createdTimeMs = normalizeEpochMs(r.createdTime);
176
+ out.messages.push({
177
+ senderUid:
178
+ typeof r.sender === "number"
179
+ ? String(r.sender)
180
+ : r.sender != null
181
+ ? String(r.sender)
182
+ : null,
183
+ conversationId: r.conversationId ? String(r.conversationId) : null,
184
+ createdTimeMs,
185
+ text: extractTextFromContent(r.content),
186
+ readStatus:
187
+ typeof r.readStatus === "number" ? r.readStatus : null,
188
+ contentBlob: typeof r.content === "string" ? r.content : null,
189
+ });
190
+ }
191
+ out.diagnostic.messageCount = out.messages.length;
192
+ }
193
+ }
194
+
195
+ // ─── SIMPLE_USER table ───────────────────────────────────────────────
196
+ const userTableInfo = trySelect(
197
+ db,
198
+ "PRAGMA table_info(SIMPLE_USER)",
199
+ );
200
+ if (Array.isArray(userTableInfo) && userTableInfo.length > 0) {
201
+ out.diagnostic.hadSimpleUserTable = true;
202
+ const columns = new Set(userTableInfo.map((r) => r.name));
203
+ const uidCol = pickCol(columns, ["UID", "uid", "user_id"]);
204
+ const shortIdCol = pickCol(columns, ["short_id", "shortId", "ShortId"]);
205
+ const nameCol = pickCol(columns, ["name", "nick_name", "nickname"]);
206
+ const avatarCol = pickCol(columns, ["avatar_url", "avatarUrl", "avatar"]);
207
+ const followCol = pickCol(columns, [
208
+ "follow_status",
209
+ "followStatus",
210
+ "follow_state",
211
+ ]);
212
+ if (uidCol) {
213
+ const fields = [`${uidCol} AS uid`];
214
+ if (shortIdCol) fields.push(`${shortIdCol} AS shortId`);
215
+ if (nameCol) fields.push(`${nameCol} AS name`);
216
+ if (avatarCol) fields.push(`${avatarCol} AS avatarUrl`);
217
+ if (followCol) fields.push(`${followCol} AS followStatus`);
218
+ const sql = `SELECT ${fields.join(", ")} FROM SIMPLE_USER LIMIT ${limitContacts}`;
219
+ const rows = trySelect(db, sql) || [];
220
+ for (const r of rows) {
221
+ out.contacts.push({
222
+ uid: r.uid != null ? String(r.uid) : null,
223
+ shortId: r.shortId != null ? String(r.shortId) : null,
224
+ name: r.name || null,
225
+ avatarUrl: r.avatarUrl || null,
226
+ followStatus:
227
+ typeof r.followStatus === "number" ? r.followStatus : null,
228
+ });
229
+ }
230
+ out.diagnostic.contactCount = out.contacts.length;
231
+ }
232
+ }
233
+ } finally {
234
+ db.close();
235
+ }
236
+ return out;
237
+ }
238
+
239
+ /**
240
+ * Normalize various epoch units to ms. Douyin sometimes writes seconds,
241
+ * sometimes microseconds, sometimes ms. Heuristic: 13-digit = ms,
242
+ * 10-digit = seconds, 16-digit = microseconds.
243
+ */
244
+ function normalizeEpochMs(v) {
245
+ if (typeof v !== "number" || !Number.isFinite(v) || v <= 0) return null;
246
+ // > 1e16 µs → / 1000
247
+ if (v > 1e15) return Math.floor(v / 1000);
248
+ // > 1e12 ms → keep
249
+ if (v > 1e12) return Math.floor(v);
250
+ // <= 1e12 seconds → × 1000
251
+ return Math.floor(v * 1000);
252
+ }
253
+
254
+ /**
255
+ * Try a SELECT; return the row array on success or null on any error
256
+ * (missing table / syntax error / driver throw). Mirrors social-bilibili
257
+ * adapter.js:trySelect.
258
+ */
259
+ function trySelect(db, sql) {
260
+ try {
261
+ return db.prepare(sql).all();
262
+ } catch (_e) {
263
+ return null;
264
+ }
265
+ }
266
+
267
+ /**
268
+ * Return the first column in [candidates] that exists in [columns], or
269
+ * null. Used to handle Douyin's schema drift across versions.
270
+ */
271
+ function pickCol(columns, candidates) {
272
+ for (const c of candidates) {
273
+ if (columns.has(c)) return c;
274
+ }
275
+ return null;
276
+ }
277
+
278
+ module.exports = {
279
+ parseImDb,
280
+ // Exposed for tests
281
+ _internals: {
282
+ loadDatabaseClass,
283
+ extractTextFromContent,
284
+ normalizeEpochMs,
285
+ pickCol,
286
+ },
287
+ };
@@ -0,0 +1,57 @@
1
+ "use strict";
2
+
3
+ /**
4
+ * social-douyin-adb — Phase 2 (Douyin C 路径) entry.
5
+ *
6
+ * Phase 2a (this commit) — desktop ADB-based IM db extraction:
7
+ * - douyin.pull-im-db extension pulls <uid>_im.db cohort to host
8
+ * - parseImDb parse msg + SIMPLE_USER tables
9
+ * - buildSnapshot → schemaVersion=1 events JSON
10
+ * - collect / collectAndSync orchestrator
11
+ *
12
+ * Phase 2b (next) — Android Kotlin B path (in-APK root):
13
+ * - reuse Phase B0 LocalRootCollector / BaseRootCredentialsStore /
14
+ * RootShellRunner / DbCohortCopier scaffold
15
+ * - libmsaoaidsec.so frida bypass for the anti-debug TracerPid check
16
+ *
17
+ * Pipeline (C path):
18
+ * bridge.invoke("douyin.pull-im-db")
19
+ * → parseImDb(tempPath)
20
+ * → buildSnapshot + writeSnapshotJson
21
+ * → registry.syncAdapter("social-douyin", { inputPath })
22
+ *
23
+ * Reuses the existing `social-douyin` adapter's snapshot mode — no 2nd
24
+ * adapter, same vault schema / dedup / event types. Phase 2a extended
25
+ * VALID_SNAPSHOT_KINDS in social-douyin/index.js to include `message` +
26
+ * `contact` for the abrignoni-DFIR-parsed IM tables.
27
+ */
28
+
29
+ const {
30
+ createDouyinDbExtension,
31
+ DOUYIN_DB_REMOTE_DIR,
32
+ IM_DB_PATTERN,
33
+ } = require("./db-extension");
34
+ const { parseImDb } = require("./im-db-parser");
35
+ const {
36
+ buildSnapshot,
37
+ writeSnapshotJson,
38
+ cleanupSnapshotJson,
39
+ SNAPSHOT_SCHEMA_VERSION,
40
+ } = require("./snapshot-builder");
41
+ const { collect, collectAndSync } = require("./collector");
42
+
43
+ module.exports = {
44
+ // Extension factory (wiring registers this on the bridge)
45
+ createDouyinDbExtension,
46
+ DOUYIN_DB_REMOTE_DIR,
47
+ IM_DB_PATTERN,
48
+ // Parser + builder (also exposed for advanced callers / tests)
49
+ parseImDb,
50
+ buildSnapshot,
51
+ writeSnapshotJson,
52
+ cleanupSnapshotJson,
53
+ SNAPSHOT_SCHEMA_VERSION,
54
+ // Collector orchestrator
55
+ collect,
56
+ collectAndSync,
57
+ };