@chainlesschain/personal-data-hub 0.4.24 → 0.4.26
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/gov-ixiamen.test.js +2 -2
- package/__tests__/analysis-skills.test.js +71 -2
- package/__tests__/analysis.test.js +46 -0
- package/__tests__/social-douyin-im-direct-read.test.js +69 -3
- package/__tests__/social-douyin-salvage-collector.test.js +98 -0
- package/__tests__/social-douyin-salvage-mapper.test.js +90 -0
- package/__tests__/social-weibo-sqlite-device.test.js +174 -0
- package/__tests__/sqlite-leaf-salvage.test.js +97 -0
- package/lib/adapters/gov-ixiamen/index.js +17 -10
- package/lib/adapters/social-douyin/index.js +56 -2
- package/lib/adapters/social-douyin-adb/collector.js +100 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +85 -0
- package/lib/adapters/social-douyin-adb/index.js +5 -0
- package/lib/adapters/social-douyin-adb/salvage-mapper.js +119 -0
- package/lib/adapters/social-weibo/index.js +110 -30
- package/lib/analysis-skills/index.js +3 -0
- package/lib/analysis-skills/overview.js +157 -0
- package/lib/analysis.js +50 -0
- package/lib/forensics/leaf-salvage.js +185 -0
- package/lib/prompt-builder.js +9 -0
- package/package.json +1 -1
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
4
|
+
|
|
5
|
+
const fs = require("node:fs");
|
|
6
|
+
const path = require("node:path");
|
|
7
|
+
const os = require("node:os");
|
|
8
|
+
|
|
9
|
+
// The salvager lives in scripts/ (a standalone forensic tool) but exports its
|
|
10
|
+
// pure parsers for testing.
|
|
11
|
+
const {
|
|
12
|
+
parseLeafPage,
|
|
13
|
+
readVarint,
|
|
14
|
+
serialTypeSize,
|
|
15
|
+
} = require("../../../scripts/android/pdh-sqlite-leaf-salvage.js");
|
|
16
|
+
|
|
17
|
+
// Build a real (UTF-8) SQLite DB via the SQLCipher-capable driver, then salvage
|
|
18
|
+
// records straight from its raw page bytes — proving the leaf-page parser reads
|
|
19
|
+
// rowids + columns + UTF-8 text correctly (the Method-B reconstruction step for
|
|
20
|
+
// scattered/malformed memory dumps). See docs/internal/pdh-db-decryption-runbook.md.
|
|
21
|
+
describe("pdh-sqlite-leaf-salvage — leaf-page record salvager", () => {
|
|
22
|
+
let dir, dbPath, buf;
|
|
23
|
+
beforeAll(() => {
|
|
24
|
+
const Database = require("better-sqlite3-multiple-ciphers");
|
|
25
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-"));
|
|
26
|
+
dbPath = path.join(dir, "u.db");
|
|
27
|
+
const db = new Database(dbPath);
|
|
28
|
+
db.exec(
|
|
29
|
+
"CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)",
|
|
30
|
+
);
|
|
31
|
+
const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
|
|
32
|
+
ins.run("uuid-1", "conv-1", 111, "你好呀 hello", 1700000000000);
|
|
33
|
+
ins.run("uuid-2", "conv-1", 222, "在吗?晚上一起吃饭", 1700000001000);
|
|
34
|
+
ins.run("uuid-3", "conv-2", 333, "ok 👍", 1700000002000);
|
|
35
|
+
db.close();
|
|
36
|
+
buf = fs.readFileSync(dbPath);
|
|
37
|
+
});
|
|
38
|
+
afterAll(() => {
|
|
39
|
+
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("varint + serialTypeSize basics", () => {
|
|
43
|
+
expect(readVarint(Buffer.from([0x01]), 0)[0]).toBe(1n);
|
|
44
|
+
expect(readVarint(Buffer.from([0x81, 0x00]), 0)[0]).toBe(128n);
|
|
45
|
+
expect(serialTypeSize(1n)).toBe(1); // 1-byte int
|
|
46
|
+
expect(serialTypeSize(6n)).toBe(8); // 8-byte int
|
|
47
|
+
expect(serialTypeSize(13n)).toBe(0); // text len 0
|
|
48
|
+
expect(serialTypeSize(0x29n)).toBe(14); // text serial 41 → 14 bytes
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("salvages all 3 msg rows with correct columns + UTF-8 text", () => {
|
|
52
|
+
const PAGE = 4096;
|
|
53
|
+
const records = [];
|
|
54
|
+
for (let base = 0; base + PAGE <= buf.length; base += PAGE) {
|
|
55
|
+
const recs = parseLeafPage(buf, base, PAGE, 3);
|
|
56
|
+
if (recs) records.push(...recs);
|
|
57
|
+
}
|
|
58
|
+
// find the msg rows (5 cols, content is the 4th)
|
|
59
|
+
const msgRows = records.filter((r) => r.cols.length === 5);
|
|
60
|
+
expect(msgRows.length).toBe(3);
|
|
61
|
+
const byUuid = Object.fromEntries(msgRows.map((r) => [r.cols[0], r]));
|
|
62
|
+
expect(byUuid["uuid-1"].cols[2]).toBe(111); // sender int
|
|
63
|
+
expect(byUuid["uuid-1"].cols[3]).toBe("你好呀 hello"); // UTF-8 intact
|
|
64
|
+
expect(byUuid["uuid-2"].cols[3]).toBe("在吗?晚上一起吃饭");
|
|
65
|
+
expect(byUuid["uuid-2"].cols[4]).toBe(1700000001000); // created_time
|
|
66
|
+
expect(byUuid["uuid-3"].cols[3]).toBe("ok 👍"); // emoji (4-byte UTF-8)
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("finds a leaf page at a NON-4096-aligned offset (unaligned scan)", () => {
|
|
70
|
+
const PAGE = 4096;
|
|
71
|
+
// locate a real data leaf page on the aligned grid
|
|
72
|
+
let leaf = null;
|
|
73
|
+
for (let base = 0; base + PAGE <= buf.length; base += PAGE) {
|
|
74
|
+
const recs = parseLeafPage(buf, base, PAGE, 3);
|
|
75
|
+
if (recs && recs.some((r) => r.cols.length === 5)) { leaf = buf.slice(base, base + PAGE); break; }
|
|
76
|
+
}
|
|
77
|
+
expect(leaf).not.toBeNull();
|
|
78
|
+
// embed it at a 512-aligned-but-not-4096-aligned offset inside a zero buffer
|
|
79
|
+
const big = Buffer.alloc(PAGE * 4, 0);
|
|
80
|
+
const off = 512 * 3; // 1536: hit by stride-512, missed by stride-4096
|
|
81
|
+
leaf.copy(big, off);
|
|
82
|
+
// aligned 4096-grid misses it
|
|
83
|
+
expect(parseLeafPage(big, 0, PAGE, 3)).toBeNull();
|
|
84
|
+
expect(parseLeafPage(big, PAGE, PAGE, 3)).toBeNull();
|
|
85
|
+
// unaligned stride finds it at its true offset
|
|
86
|
+
const recs = parseLeafPage(big, off, PAGE, 3);
|
|
87
|
+
expect(recs).not.toBeNull();
|
|
88
|
+
expect(recs.some((r) => r.cols[0] === "uuid-1")).toBe(true);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("returns null for non-leaf / garbage pages", () => {
|
|
92
|
+
const garbage = Buffer.alloc(4096, 0xff);
|
|
93
|
+
expect(parseLeafPage(garbage, 0, 4096, 3)).toBeNull();
|
|
94
|
+
const zeros = Buffer.alloc(4096, 0);
|
|
95
|
+
expect(parseLeafPage(zeros, 0, 4096, 3)).toBeNull();
|
|
96
|
+
});
|
|
97
|
+
});
|
|
@@ -4,12 +4,14 @@
|
|
|
4
4
|
* ⚠️ BEST-EFFORT SCAFFOLD (user-requested). i 厦门 is a local-government
|
|
5
5
|
* super-app (社保 / 公积金 / 医保 / 政务办事 / 预约) behind real-name gov SSO.
|
|
6
6
|
* Unlike the document / shopping / travel adapters it has **no verifiable
|
|
7
|
-
* public API
|
|
8
|
-
* (
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
7
|
+
* public API**. Static APK analysis (com.xmgov.xmapp, 2026-06-16) CONFIRMED the
|
|
8
|
+
* real backend host (see IXIAMEN_LIST_URL), but the exact 办事记录 list sub-path
|
|
9
|
+
* and request/response body stay UNVERIFIED — bodies are encrypted by
|
|
10
|
+
* libzxprotect/libijmDataEncryption, so the endpoint cannot be fully derived
|
|
11
|
+
* statically and cannot authenticate without gov real-name login. The reliable
|
|
12
|
+
* path is therefore **snapshot mode** (the app / a manual export produces a JSON
|
|
13
|
+
* of the user's 办事记录). The cookie path is kept as a seam (overridable via
|
|
14
|
+
* opts.listUrl) so it can be wired once a live capture confirms path + sign.
|
|
13
15
|
*
|
|
14
16
|
* Personal footprint modelled: 政务办事记录 (government-service handling). Each
|
|
15
17
|
* record → an INTERACTION event ("办理: <服务名>") + a Topic for the service
|
|
@@ -42,15 +44,20 @@ const { ENTITY_TYPES, EVENT_SUBTYPES, CAPTURED_BY } = require("../../constants")
|
|
|
42
44
|
const { CookieAuth } = require("../shopping-base");
|
|
43
45
|
|
|
44
46
|
const NAME = "gov-ixiamen";
|
|
45
|
-
const VERSION = "0.
|
|
47
|
+
const VERSION = "0.2.0";
|
|
46
48
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
47
49
|
|
|
48
50
|
const KIND_SERVICE = "service";
|
|
49
51
|
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_SERVICE]);
|
|
50
52
|
|
|
51
|
-
//
|
|
52
|
-
//
|
|
53
|
-
|
|
53
|
+
// Host CONFIRMED via static APK analysis (com.xmgov.xmapp, 2026-06-16): the real
|
|
54
|
+
// i厦门 backend domain is *.ixiamen.org.cn — production business gateway is
|
|
55
|
+
// https://buss.ixiamen.org.cn/pbc/ (usercenter auth under /pbc/usercenter/;
|
|
56
|
+
// 市民卡/社保 under https://smk.ixiamen.org.cn/smk/). The host + /pbc/ business
|
|
57
|
+
// prefix are confirmed; the "/handle/list" tail is still BEST-EFFORT and the
|
|
58
|
+
// request/response body is UNVERIFIED (encrypted by libzxprotect — opaque to
|
|
59
|
+
// static analysis). Overridable via opts.listUrl once a live capture confirms it.
|
|
60
|
+
const IXIAMEN_LIST_URL = "https://buss.ixiamen.org.cn/pbc/handle/list";
|
|
54
61
|
const PAGE_SIZE = 20;
|
|
55
62
|
|
|
56
63
|
// Coarse service-category keyword map → grouping Topic name. Best-effort; the
|
|
@@ -63,7 +63,8 @@ const KIND_FAVOURITE = "favourite"; // v0.3 (X-Bogus required)
|
|
|
63
63
|
const KIND_LIKE = "like"; // v0.3 (X-Bogus required)
|
|
64
64
|
const KIND_SEARCH = "search"; // legacy sqlite-mode only
|
|
65
65
|
const KIND_MESSAGE = "message"; // Phase 2a — IM private messages from <uid>_im.db (abrignoni DFIR)
|
|
66
|
-
const KIND_CONTACT = "contact"; // Phase 2a — SIMPLE_USER
|
|
66
|
+
const KIND_CONTACT = "contact"; // Phase 2a — SIMPLE_USER/participant contacts from <uid>_im.db
|
|
67
|
+
const KIND_CONVERSATION = "conversation"; // device-verified — conversation_list thread → TOPIC
|
|
67
68
|
|
|
68
69
|
// Forward-compat: list every kind v0.3+ may emit so cc adapter accepts
|
|
69
70
|
// snapshots from a newer Android even if this JS hasn't been bumped yet.
|
|
@@ -258,7 +259,7 @@ class DouyinAdapter {
|
|
|
258
259
|
if (Number.isInteger(opts.limitContacts)) parseOpts.limitContacts = opts.limitContacts;
|
|
259
260
|
if (this._deps.dbDriverFactory) parseOpts._databaseClass = this._deps.dbDriverFactory();
|
|
260
261
|
|
|
261
|
-
const { messages, contacts, diagnostic } = parseImDb(dbPath, parseOpts);
|
|
262
|
+
const { messages, contacts, conversations, diagnostic } = parseImDb(dbPath, parseOpts);
|
|
262
263
|
if (typeof opts.onProgress === "function") {
|
|
263
264
|
try {
|
|
264
265
|
opts.onProgress({ phase: "im-db-parsed", adapter: NAME, ...diagnostic });
|
|
@@ -314,6 +315,27 @@ class DouyinAdapter {
|
|
|
314
315
|
emitted += 1;
|
|
315
316
|
}
|
|
316
317
|
}
|
|
318
|
+
|
|
319
|
+
if (include[KIND_CONVERSATION] !== false) {
|
|
320
|
+
for (const cv of conversations || []) {
|
|
321
|
+
if (emitted >= limit) return;
|
|
322
|
+
if (!cv || typeof cv !== "object" || !cv.conversationId) continue;
|
|
323
|
+
yield {
|
|
324
|
+
adapter: NAME,
|
|
325
|
+
kind: KIND_CONVERSATION,
|
|
326
|
+
originalId: stableOriginalId(
|
|
327
|
+
KIND_CONVERSATION,
|
|
328
|
+
`conv-${cv.conversationId}`,
|
|
329
|
+
),
|
|
330
|
+
capturedAt:
|
|
331
|
+
typeof cv.lastMsgTimeMs === "number" && cv.lastMsgTimeMs > 0
|
|
332
|
+
? cv.lastMsgTimeMs
|
|
333
|
+
: fallbackCapturedAt,
|
|
334
|
+
payload: { kind: KIND_CONVERSATION, ...cv },
|
|
335
|
+
};
|
|
336
|
+
emitted += 1;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
317
339
|
}
|
|
318
340
|
|
|
319
341
|
async *_syncViaSnapshot(opts) {
|
|
@@ -454,6 +476,9 @@ class DouyinAdapter {
|
|
|
454
476
|
if (kind === KIND_CONTACT) {
|
|
455
477
|
return normalizeContact(p, raw, ingestedAt);
|
|
456
478
|
}
|
|
479
|
+
if (kind === KIND_CONVERSATION) {
|
|
480
|
+
return normalizeConversation(p, raw, ingestedAt);
|
|
481
|
+
}
|
|
457
482
|
throw new Error(`DouyinAdapter.normalize: unknown kind ${kind}`);
|
|
458
483
|
}
|
|
459
484
|
}
|
|
@@ -697,6 +722,35 @@ function normalizeContact(p, raw, ingestedAt) {
|
|
|
697
722
|
};
|
|
698
723
|
}
|
|
699
724
|
|
|
725
|
+
function normalizeConversation(p, raw, ingestedAt) {
|
|
726
|
+
// conversation_list row from <uid>_im.db → a TOPIC (one chat thread).
|
|
727
|
+
const convId =
|
|
728
|
+
(typeof p.conversationId === "string" && p.conversationId) ||
|
|
729
|
+
(typeof p.conversationId === "number" && String(p.conversationId)) ||
|
|
730
|
+
null;
|
|
731
|
+
const occurredAt = raw.capturedAt || ingestedAt;
|
|
732
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
733
|
+
return {
|
|
734
|
+
events: [], persons: [], places: [], items: [],
|
|
735
|
+
topics: [{
|
|
736
|
+
id: convId ? `topic-douyin-conv-${convId}` : `topic-douyin-conv-${newId()}`,
|
|
737
|
+
type: ENTITY_TYPES.TOPIC,
|
|
738
|
+
name: convId ? `抖音会话 ${convId}` : "抖音会话",
|
|
739
|
+
ingestedAt,
|
|
740
|
+
source,
|
|
741
|
+
extra: {
|
|
742
|
+
platform: "douyin",
|
|
743
|
+
conversationId: convId,
|
|
744
|
+
conversationType:
|
|
745
|
+
typeof p.conversationType === "number" ? p.conversationType : null,
|
|
746
|
+
lastMsgTimeMs:
|
|
747
|
+
typeof p.lastMsgTimeMs === "number" ? p.lastMsgTimeMs : null,
|
|
748
|
+
stranger: typeof p.stranger === "boolean" ? p.stranger : null,
|
|
749
|
+
},
|
|
750
|
+
}],
|
|
751
|
+
};
|
|
752
|
+
}
|
|
753
|
+
|
|
700
754
|
module.exports = {
|
|
701
755
|
DouyinAdapter,
|
|
702
756
|
NAME,
|
|
@@ -25,6 +25,8 @@ const {
|
|
|
25
25
|
writeSnapshotJson,
|
|
26
26
|
cleanupSnapshotJson,
|
|
27
27
|
} = require("./snapshot-builder");
|
|
28
|
+
const { salvageFile } = require("../../forensics/leaf-salvage");
|
|
29
|
+
const { mapMsgRecords, inferMsgColumns } = require("./salvage-mapper");
|
|
28
30
|
|
|
29
31
|
/**
|
|
30
32
|
* Pull IM db → parse → write snapshot. Returns the staging path + counts
|
|
@@ -159,6 +161,102 @@ async function collectAndSync(bridge, registry, opts = {}) {
|
|
|
159
161
|
};
|
|
160
162
|
}
|
|
161
163
|
|
|
164
|
+
// ── Salvage path (Method B /proc/mem dump → leaf-salvage → snapshot) ──────
|
|
165
|
+
//
|
|
166
|
+
// The key-free decryption breakthrough: a rooted device dumps a running app's
|
|
167
|
+
// decrypted SQLite pages from /proc/<pid>/mem, then this salvages the message
|
|
168
|
+
// records straight out of the leaf pages (no key, no password) and ingests them
|
|
169
|
+
// through the same social-douyin snapshot path. Closes the loop: dump → salvage
|
|
170
|
+
// → THIS → PDH entities. See docs/internal/pdh-db-decryption-runbook.md §3.5.
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Salvage records from a memory dump → social-douyin snapshot JSON.
|
|
174
|
+
*
|
|
175
|
+
* @param {string} dumpPath path to the /proc/mem dump (or concatenated dumps)
|
|
176
|
+
* @param {{
|
|
177
|
+
* uid?: string, // account uid; defaults to "salvage" placeholder
|
|
178
|
+
* columns?: string[], // explicit msg column order; else inferMsgColumns
|
|
179
|
+
* pageSize?: number, minCols?: number, unaligned?: boolean, stride?: number,
|
|
180
|
+
* displayName?: string,
|
|
181
|
+
* stagingDir?: string,
|
|
182
|
+
* now?: () => number,
|
|
183
|
+
* }} [opts]
|
|
184
|
+
* @returns {{snapshotPath: string, uid: string, eventCounts: object, salvage: object}}
|
|
185
|
+
*/
|
|
186
|
+
function salvageDumpToSnapshot(dumpPath, opts = {}) {
|
|
187
|
+
if (typeof dumpPath !== "string" || dumpPath.length === 0) {
|
|
188
|
+
throw new TypeError("salvageDumpToSnapshot: dumpPath must be a non-empty string");
|
|
189
|
+
}
|
|
190
|
+
const now = opts.now || Date.now;
|
|
191
|
+
const { records, pages } = salvageFile(dumpPath, {
|
|
192
|
+
pageSize: opts.pageSize,
|
|
193
|
+
minCols: opts.minCols,
|
|
194
|
+
unaligned: opts.unaligned,
|
|
195
|
+
stride: opts.stride,
|
|
196
|
+
});
|
|
197
|
+
// Leaf pages carry no column names — use the caller's explicit order when
|
|
198
|
+
// known (most accurate), else heuristically infer content/created_time.
|
|
199
|
+
const columns = Array.isArray(opts.columns) && opts.columns.length
|
|
200
|
+
? opts.columns
|
|
201
|
+
: inferMsgColumns(records);
|
|
202
|
+
const messages = mapMsgRecords(records, columns);
|
|
203
|
+
const uid = typeof opts.uid === "string" && opts.uid.length ? opts.uid : "salvage";
|
|
204
|
+
const snapshot = buildSnapshot({
|
|
205
|
+
uid,
|
|
206
|
+
displayName: opts.displayName,
|
|
207
|
+
messages,
|
|
208
|
+
contacts: [],
|
|
209
|
+
snapshottedAt: now(),
|
|
210
|
+
});
|
|
211
|
+
const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
|
|
212
|
+
return {
|
|
213
|
+
snapshotPath,
|
|
214
|
+
uid,
|
|
215
|
+
eventCounts: { message: messages.length, total: messages.length },
|
|
216
|
+
salvage: { leafPages: pages, recordsSalvaged: records.length, columns },
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* One-shot: salvage dump → snapshot → syncAdapter("social-douyin") → cleanup.
|
|
222
|
+
*
|
|
223
|
+
* @param {object} registry AdapterRegistry (must expose syncAdapter)
|
|
224
|
+
* @param {string} dumpPath
|
|
225
|
+
* @param {object} [opts] forwarded to salvageDumpToSnapshot
|
|
226
|
+
* @returns {Promise<object>} SyncReport + salvage diagnostic
|
|
227
|
+
*/
|
|
228
|
+
async function salvageAndSync(registry, dumpPath, opts = {}) {
|
|
229
|
+
if (!registry || typeof registry.syncAdapter !== "function") {
|
|
230
|
+
throw new TypeError(
|
|
231
|
+
"salvageAndSync: registry must expose syncAdapter(name, options)",
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
const res = salvageDumpToSnapshot(dumpPath, opts);
|
|
235
|
+
let syncReport = null;
|
|
236
|
+
let cleanupFailed = false;
|
|
237
|
+
try {
|
|
238
|
+
syncReport = await registry.syncAdapter("social-douyin", {
|
|
239
|
+
inputPath: res.snapshotPath,
|
|
240
|
+
});
|
|
241
|
+
} finally {
|
|
242
|
+
try {
|
|
243
|
+
cleanupSnapshotJson(res.snapshotPath);
|
|
244
|
+
} catch (_e) {
|
|
245
|
+
cleanupFailed = true;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return {
|
|
249
|
+
...syncReport,
|
|
250
|
+
douyin: {
|
|
251
|
+
uid: res.uid,
|
|
252
|
+
eventCounts: res.eventCounts,
|
|
253
|
+
salvage: res.salvage,
|
|
254
|
+
mode: "salvage",
|
|
255
|
+
cleanupFailed,
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
162
260
|
// ── Watch-history (video_record.db) path ─────────────────────────────────
|
|
163
261
|
// Distinct from the IM-db path above: pulls the plaintext video_record.db and
|
|
164
262
|
// emits `history` events (KIND_HISTORY → BROWSE) the social-douyin adapter
|
|
@@ -276,4 +374,6 @@ module.exports = {
|
|
|
276
374
|
collectAndSync,
|
|
277
375
|
collectWatchHistory,
|
|
278
376
|
collectWatchHistoryAndSync,
|
|
377
|
+
salvageDumpToSnapshot,
|
|
378
|
+
salvageAndSync,
|
|
279
379
|
};
|
|
@@ -130,14 +130,22 @@ function parseImDb(dbPath, opts = {}) {
|
|
|
130
130
|
: 5_000;
|
|
131
131
|
const Database = opts._databaseClass || loadDatabaseClass();
|
|
132
132
|
const db = new Database(dbPath, { readonly: true });
|
|
133
|
+
const limitConversations =
|
|
134
|
+
Number.isInteger(opts.limitConversations) && opts.limitConversations > 0
|
|
135
|
+
? opts.limitConversations
|
|
136
|
+
: 5_000;
|
|
133
137
|
const out = {
|
|
134
138
|
messages: [],
|
|
135
139
|
contacts: [],
|
|
140
|
+
conversations: [],
|
|
136
141
|
diagnostic: {
|
|
137
142
|
messageCount: 0,
|
|
138
143
|
contactCount: 0,
|
|
144
|
+
conversationCount: 0,
|
|
139
145
|
hadMsgTable: false,
|
|
140
146
|
hadSimpleUserTable: false,
|
|
147
|
+
hadParticipantTable: false,
|
|
148
|
+
hadConversationListTable: false,
|
|
141
149
|
},
|
|
142
150
|
};
|
|
143
151
|
try {
|
|
@@ -230,6 +238,83 @@ function parseImDb(dbPath, opts = {}) {
|
|
|
230
238
|
out.diagnostic.contactCount = out.contacts.length;
|
|
231
239
|
}
|
|
232
240
|
}
|
|
241
|
+
|
|
242
|
+
// ─── participant table (device-verified 2026-06-16) ──────────────────
|
|
243
|
+
// Real Douyin IM schema keeps conversation members in `participant`
|
|
244
|
+
// (conversation_id, user_id, sort_order; UNIQUE(conversation_id,user_id)),
|
|
245
|
+
// NOT SIMPLE_USER (which is older/other builds). Pull distinct member uids
|
|
246
|
+
// as contacts — uid-only (nickname/avatar live in a separate user table),
|
|
247
|
+
// so a PERSON gets created keyed by douyin-uid even without a name.
|
|
248
|
+
// Dedup against contacts already harvested from SIMPLE_USER.
|
|
249
|
+
const partTableInfo = trySelect(db, "PRAGMA table_info(participant)");
|
|
250
|
+
if (Array.isArray(partTableInfo) && partTableInfo.length > 0) {
|
|
251
|
+
out.diagnostic.hadParticipantTable = true;
|
|
252
|
+
const columns = new Set(partTableInfo.map((r) => r.name));
|
|
253
|
+
const uidCol = pickCol(columns, ["user_id", "uid", "UID"]);
|
|
254
|
+
if (uidCol) {
|
|
255
|
+
const seen = new Set(
|
|
256
|
+
out.contacts.map((c) => c.uid).filter(Boolean),
|
|
257
|
+
);
|
|
258
|
+
const sql =
|
|
259
|
+
`SELECT DISTINCT ${uidCol} AS uid FROM participant ` +
|
|
260
|
+
`WHERE ${uidCol} IS NOT NULL LIMIT ${limitContacts}`;
|
|
261
|
+
const rows = trySelect(db, sql) || [];
|
|
262
|
+
for (const r of rows) {
|
|
263
|
+
const uid = r.uid != null ? String(r.uid) : null;
|
|
264
|
+
if (!uid || seen.has(uid)) continue;
|
|
265
|
+
seen.add(uid);
|
|
266
|
+
out.contacts.push({
|
|
267
|
+
uid,
|
|
268
|
+
shortId: null,
|
|
269
|
+
name: null,
|
|
270
|
+
avatarUrl: null,
|
|
271
|
+
followStatus: null,
|
|
272
|
+
fromParticipant: true,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
out.diagnostic.contactCount = out.contacts.length;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ─── conversation_list table (device-verified 2026-06-16) ────────────
|
|
280
|
+
// Each row is a chat thread → PDH TOPIC. Columns vary by build; pick
|
|
281
|
+
// defensively. conversation_id is the only hard requirement.
|
|
282
|
+
const convTableInfo = trySelect(db, "PRAGMA table_info(conversation_list)");
|
|
283
|
+
if (Array.isArray(convTableInfo) && convTableInfo.length > 0) {
|
|
284
|
+
out.diagnostic.hadConversationListTable = true;
|
|
285
|
+
const columns = new Set(convTableInfo.map((r) => r.name));
|
|
286
|
+
const idCol = pickCol(columns, ["conversation_id", "conv_id", "id"]);
|
|
287
|
+
const typeCol = pickCol(columns, ["type", "conversation_type", "conv_type"]);
|
|
288
|
+
const lastTimeCol = pickCol(columns, [
|
|
289
|
+
"last_msg_create_time",
|
|
290
|
+
"last_message_time",
|
|
291
|
+
"updated_time",
|
|
292
|
+
]);
|
|
293
|
+
const strangerCol = pickCol(columns, ["stranger", "is_stranger"]);
|
|
294
|
+
if (idCol) {
|
|
295
|
+
const fields = [`${idCol} AS convId`];
|
|
296
|
+
if (typeCol) fields.push(`${typeCol} AS convType`);
|
|
297
|
+
if (lastTimeCol) fields.push(`${lastTimeCol} AS lastMsgTime`);
|
|
298
|
+
if (strangerCol) fields.push(`${strangerCol} AS stranger`);
|
|
299
|
+
const orderBy = lastTimeCol ? ` ORDER BY ${lastTimeCol} DESC` : "";
|
|
300
|
+
const sql =
|
|
301
|
+
`SELECT ${fields.join(", ")} FROM conversation_list` +
|
|
302
|
+
`${orderBy} LIMIT ${limitConversations}`;
|
|
303
|
+
const rows = trySelect(db, sql) || [];
|
|
304
|
+
for (const r of rows) {
|
|
305
|
+
if (r.convId == null) continue;
|
|
306
|
+
out.conversations.push({
|
|
307
|
+
conversationId: String(r.convId),
|
|
308
|
+
conversationType:
|
|
309
|
+
typeof r.convType === "number" ? r.convType : null,
|
|
310
|
+
lastMsgTimeMs: normalizeEpochMs(r.lastMsgTime),
|
|
311
|
+
stranger:
|
|
312
|
+
typeof r.stranger === "number" ? r.stranger === 1 : null,
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
out.diagnostic.conversationCount = out.conversations.length;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
233
318
|
} finally {
|
|
234
319
|
db.close();
|
|
235
320
|
}
|
|
@@ -43,6 +43,8 @@ const {
|
|
|
43
43
|
collectAndSync,
|
|
44
44
|
collectWatchHistory,
|
|
45
45
|
collectWatchHistoryAndSync,
|
|
46
|
+
salvageDumpToSnapshot,
|
|
47
|
+
salvageAndSync,
|
|
46
48
|
} = require("./collector");
|
|
47
49
|
const {
|
|
48
50
|
createDouyinWatchExtension,
|
|
@@ -71,4 +73,7 @@ module.exports = {
|
|
|
71
73
|
// Collector orchestrator
|
|
72
74
|
collect,
|
|
73
75
|
collectAndSync,
|
|
76
|
+
// Method B salvage path (/proc/mem dump → leaf-salvage → snapshot → ingest)
|
|
77
|
+
salvageDumpToSnapshot,
|
|
78
|
+
salvageAndSync,
|
|
74
79
|
};
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/*
|
|
3
|
+
* Glue: leaf-salvaged records → parseImDb-shaped output.
|
|
4
|
+
*
|
|
5
|
+
* The leaf-page salvager (scripts/android/pdh-sqlite-leaf-salvage.js) emits raw
|
|
6
|
+
* positional tuples {rowid, cols:[...]} (leaf pages carry no column names). This
|
|
7
|
+
* maps them into the SAME shape `parseImDb` returns ({messages, contacts,
|
|
8
|
+
* conversations}) so the existing DouyinAdapter.normalize path ingests them
|
|
9
|
+
* unchanged — closing the loop: Method-B dump → salvage → THIS → PDH entities.
|
|
10
|
+
*
|
|
11
|
+
* Column order comes from the table's CREATE TABLE (see docs/internal/
|
|
12
|
+
* pdh-app-db-schemas.md or grep the dump). Pass it explicitly for correctness;
|
|
13
|
+
* `inferMsgColumns` offers a heuristic fallback (content=JSON/longest text,
|
|
14
|
+
* created_time=epoch int) when the exact order is unknown.
|
|
15
|
+
*/
|
|
16
|
+
const { _internals } = require("./im-db-parser");
|
|
17
|
+
const { extractTextFromContent, normalizeEpochMs } = _internals;
|
|
18
|
+
|
|
19
|
+
function zip(cols, names) {
|
|
20
|
+
const o = {};
|
|
21
|
+
for (let i = 0; i < names.length; i++) o[names[i]] = cols[i];
|
|
22
|
+
return o;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
// Map msg-table salvaged records given the ordered column names.
|
|
26
|
+
function mapMsgRecords(records, columns) {
|
|
27
|
+
const out = [];
|
|
28
|
+
for (const r of records || []) {
|
|
29
|
+
if (!r || !Array.isArray(r.cols)) continue;
|
|
30
|
+
const row = zip(r.cols, columns);
|
|
31
|
+
if (row.content == null && row.created_time == null) continue;
|
|
32
|
+
const t = typeof row.created_time === "number" ? row.created_time
|
|
33
|
+
: Number(row.created_time);
|
|
34
|
+
out.push({
|
|
35
|
+
senderUid: row.sender != null ? String(row.sender) : null,
|
|
36
|
+
conversationId: row.conversation_id != null ? String(row.conversation_id) : null,
|
|
37
|
+
createdTimeMs: normalizeEpochMs(Number.isFinite(t) ? t : 0),
|
|
38
|
+
text: extractTextFromContent(row.content),
|
|
39
|
+
readStatus: typeof row.read_status === "number" ? row.read_status : null,
|
|
40
|
+
contentBlob: typeof row.content === "string" ? row.content : null,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
return out;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function mapParticipantRecords(records, columns) {
|
|
47
|
+
const seen = new Set();
|
|
48
|
+
const out = [];
|
|
49
|
+
for (const r of records || []) {
|
|
50
|
+
if (!r || !Array.isArray(r.cols)) continue;
|
|
51
|
+
const row = zip(r.cols, columns);
|
|
52
|
+
const uid = row.user_id != null ? String(row.user_id) : null;
|
|
53
|
+
if (!uid || seen.has(uid)) continue;
|
|
54
|
+
seen.add(uid);
|
|
55
|
+
out.push({ uid, shortId: null, name: null, avatarUrl: null, followStatus: null, fromParticipant: true });
|
|
56
|
+
}
|
|
57
|
+
return out;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function mapConversationRecords(records, columns) {
|
|
61
|
+
const out = [];
|
|
62
|
+
for (const r of records || []) {
|
|
63
|
+
if (!r || !Array.isArray(r.cols)) continue;
|
|
64
|
+
const row = zip(r.cols, columns);
|
|
65
|
+
if (row.conversation_id == null) continue;
|
|
66
|
+
out.push({
|
|
67
|
+
conversationId: String(row.conversation_id),
|
|
68
|
+
conversationType: typeof row.type === "number" ? row.type : null,
|
|
69
|
+
lastMsgTimeMs: normalizeEpochMs(Number(row.last_msg_create_time) || 0),
|
|
70
|
+
stranger: typeof row.stranger === "number" ? row.stranger === 1 : null,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
return out;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Heuristic: when the exact `msg` column order is unknown, guess content +
|
|
78
|
+
* created_time positions from value shapes (content = a JSON-ish / longest
|
|
79
|
+
* string; created_time = the largest plausible-epoch integer). Returns a column
|
|
80
|
+
* name array usable with mapMsgRecords (unknown slots get c0,c1,...).
|
|
81
|
+
*/
|
|
82
|
+
function inferMsgColumns(records) {
|
|
83
|
+
const sample = (records || []).find((r) => r && Array.isArray(r.cols) && r.cols.length >= 3);
|
|
84
|
+
if (!sample) return [];
|
|
85
|
+
const cols = sample.cols;
|
|
86
|
+
const names = cols.map((_, i) => `c${i}`);
|
|
87
|
+
let contentIdx = -1, contentScore = -1;
|
|
88
|
+
let timeIdx = -1, timeVal = -1;
|
|
89
|
+
for (let i = 0; i < cols.length; i++) {
|
|
90
|
+
const v = cols[i];
|
|
91
|
+
if (typeof v === "string") {
|
|
92
|
+
const score = (v.trim().startsWith("{") ? 1e6 : 0) + v.length;
|
|
93
|
+
if (score > contentScore) { contentScore = score; contentIdx = i; }
|
|
94
|
+
} else if (typeof v === "number" && v > 1e9 && v > timeVal) {
|
|
95
|
+
// largest epoch-ish int → created_time (ms/sec/us all > 1e9)
|
|
96
|
+
timeVal = v; timeIdx = i;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
if (contentIdx >= 0) names[contentIdx] = "content";
|
|
100
|
+
if (timeIdx >= 0 && timeIdx !== contentIdx) names[timeIdx] = "created_time";
|
|
101
|
+
return names;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// One-shot: salvaged records (mixed) → parseImDb shape, given per-table columns.
|
|
105
|
+
function mapSalvaged({ msg, participant, conversation } = {}) {
|
|
106
|
+
return {
|
|
107
|
+
messages: msg ? mapMsgRecords(msg.records, msg.columns) : [],
|
|
108
|
+
contacts: participant ? mapParticipantRecords(participant.records, participant.columns) : [],
|
|
109
|
+
conversations: conversation ? mapConversationRecords(conversation.records, conversation.columns) : [],
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
module.exports = {
|
|
114
|
+
mapMsgRecords,
|
|
115
|
+
mapParticipantRecords,
|
|
116
|
+
mapConversationRecords,
|
|
117
|
+
inferMsgColumns,
|
|
118
|
+
mapSalvaged,
|
|
119
|
+
};
|