@chainlesschain/personal-data-hub 0.4.25 → 0.4.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/analysis-skills.test.js +71 -2
- package/__tests__/analysis.test.js +46 -0
- package/__tests__/salvage-ingest.test.js +97 -0
- package/__tests__/social-douyin-im-direct-read.test.js +69 -3
- package/__tests__/social-douyin-salvage-collector.test.js +98 -0
- package/__tests__/social-douyin-salvage-mapper.test.js +90 -0
- package/__tests__/social-weibo-sqlite-device.test.js +174 -0
- package/__tests__/sqlite-leaf-salvage.test.js +97 -0
- package/lib/adapters/social-douyin/index.js +56 -2
- package/lib/adapters/social-douyin-adb/collector.js +100 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +85 -0
- package/lib/adapters/social-douyin-adb/index.js +5 -0
- package/lib/adapters/social-douyin-adb/salvage-mapper.js +119 -0
- package/lib/adapters/social-weibo/index.js +110 -30
- package/lib/analysis-skills/index.js +3 -0
- package/lib/analysis-skills/overview.js +157 -0
- package/lib/analysis.js +50 -0
- package/lib/forensics/leaf-salvage.js +185 -0
- package/lib/forensics/salvage-ingest.js +160 -0
- package/lib/prompt-builder.js +9 -0
- package/package.json +4 -2
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
|
4
|
+
|
|
5
|
+
const fs = require("node:fs");
|
|
6
|
+
const path = require("node:path");
|
|
7
|
+
const os = require("node:os");
|
|
8
|
+
|
|
9
|
+
const { WeiboAdapter } = require("../lib/adapters/social-weibo");
|
|
10
|
+
|
|
11
|
+
// §A8 sqlite mode — device-verified schema regression tests.
|
|
12
|
+
//
|
|
13
|
+
// The legacy sqlite path queried `post`/`status`/`search_history`, but a real
|
|
14
|
+
// Weibo install (Redmi M2104K10AC, 微博 16.5.3, verified 2026-06-16) has NO
|
|
15
|
+
// such tables — its data lives in `home_table` (timeline), `like_table`
|
|
16
|
+
// (likes), `follower_table` (following). So the legacy path silently
|
|
17
|
+
// collected ZERO on a real device. These tests pin the device-verified
|
|
18
|
+
// table/column mapping + the legacy fallback.
|
|
19
|
+
//
|
|
20
|
+
// A fake driver returns synthetic rows keyed off the table name in the SQL,
|
|
21
|
+
// and throws "no such table" for absent tables (mirroring better-sqlite3) so
|
|
22
|
+
// `trySelect` falls through exactly as on a real DB.
|
|
23
|
+
|
|
24
|
+
function makeFakeDriver(tables) {
|
|
25
|
+
return function dbDriverFactory() {
|
|
26
|
+
return class FakeDb {
|
|
27
|
+
constructor() {}
|
|
28
|
+
prepare(sql) {
|
|
29
|
+
return {
|
|
30
|
+
all: () => {
|
|
31
|
+
for (const [name, rows] of Object.entries(tables)) {
|
|
32
|
+
if (new RegExp(`FROM ${name}\\b`).test(sql)) return rows;
|
|
33
|
+
}
|
|
34
|
+
throw new Error("no such table");
|
|
35
|
+
},
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
close() {}
|
|
39
|
+
};
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
const SELF_UID = "2075014533";
|
|
44
|
+
|
|
45
|
+
// Device-verified column shapes.
|
|
46
|
+
const HOME_ROW = {
|
|
47
|
+
mblogid: "MID_001",
|
|
48
|
+
uid: SELF_UID,
|
|
49
|
+
own_uid: SELF_UID,
|
|
50
|
+
nick: "me",
|
|
51
|
+
content: "今天去爬山了 ⛰️",
|
|
52
|
+
time: "1718500000",
|
|
53
|
+
src: "微博 weibo.com",
|
|
54
|
+
rtnum: 3,
|
|
55
|
+
commentnum: 7,
|
|
56
|
+
attitudenum: 42,
|
|
57
|
+
};
|
|
58
|
+
const LIKE_ROW = {
|
|
59
|
+
mblogid: "MID_LIKED",
|
|
60
|
+
uid: "999",
|
|
61
|
+
nick: "好友A",
|
|
62
|
+
content: "一条被点赞的微博",
|
|
63
|
+
time: "1718400000",
|
|
64
|
+
attitudenum: 100,
|
|
65
|
+
};
|
|
66
|
+
const FOLLOW_ROW = {
|
|
67
|
+
user_id: "555",
|
|
68
|
+
id: "555",
|
|
69
|
+
screen_name: "关注的人",
|
|
70
|
+
remark: "",
|
|
71
|
+
gender: "f",
|
|
72
|
+
following: 1,
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
function newAdapter(tables) {
|
|
76
|
+
const tmp = fs.mkdtempSync(path.join(os.tmpdir(), "weibo-sqlite-"));
|
|
77
|
+
const dbPath = path.join(tmp, "sina_weibo");
|
|
78
|
+
fs.writeFileSync(dbPath, "x"); // existsSync gate
|
|
79
|
+
const a = new WeiboAdapter({
|
|
80
|
+
account: { uid: SELF_UID },
|
|
81
|
+
dbPath,
|
|
82
|
+
dbDriverFactory: makeFakeDriver(tables),
|
|
83
|
+
});
|
|
84
|
+
return { a, dbPath, tmp };
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async function collect(a, dbPath) {
|
|
88
|
+
const out = [];
|
|
89
|
+
for await (const raw of a.sync({ dbPath })) out.push(raw);
|
|
90
|
+
return out;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
describe("WeiboAdapter sqlite mode — device-verified schema", () => {
|
|
94
|
+
let dirs = [];
|
|
95
|
+
afterEach(() => {
|
|
96
|
+
for (const d of dirs) {
|
|
97
|
+
try { fs.rmSync(d, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
|
|
98
|
+
}
|
|
99
|
+
dirs = [];
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it("modern device (home/like/follower only) collects posts+favs+follows", async () => {
|
|
103
|
+
const { a, dbPath, tmp } = newAdapter({
|
|
104
|
+
home_table: [HOME_ROW],
|
|
105
|
+
like_table: [LIKE_ROW],
|
|
106
|
+
follower_table: [FOLLOW_ROW],
|
|
107
|
+
});
|
|
108
|
+
dirs.push(tmp);
|
|
109
|
+
const raws = await collect(a, dbPath);
|
|
110
|
+
const kinds = raws.map((r) => r.payload.kind).sort();
|
|
111
|
+
expect(kinds).toEqual(["favourite", "follow", "post"]);
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
it("home_table post normalizes content/time/counts correctly", async () => {
|
|
115
|
+
const { a, dbPath, tmp } = newAdapter({ home_table: [HOME_ROW] });
|
|
116
|
+
dirs.push(tmp);
|
|
117
|
+
const raws = await collect(a, dbPath);
|
|
118
|
+
expect(raws).toHaveLength(1);
|
|
119
|
+
const norm = a.normalize(raws[0]);
|
|
120
|
+
const ev = norm.events[0];
|
|
121
|
+
expect(ev.subtype).toBe("post");
|
|
122
|
+
expect(ev.content.text).toBe("今天去爬山了 ⛰️");
|
|
123
|
+
expect(ev.extra.weiboMid).toBe("MID_001");
|
|
124
|
+
expect(ev.extra.likesCount).toBe(42);
|
|
125
|
+
expect(ev.extra.repostsCount).toBe(3);
|
|
126
|
+
expect(ev.extra.commentsCount).toBe(7);
|
|
127
|
+
// time '1718500000' (epoch seconds) → ms
|
|
128
|
+
expect(ev.occurredAt).toBe(1718500000 * 1000);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
it("like_table normalizes to a LIKE event with author nick", async () => {
|
|
132
|
+
const { a, dbPath, tmp } = newAdapter({ like_table: [LIKE_ROW] });
|
|
133
|
+
dirs.push(tmp);
|
|
134
|
+
const raws = await collect(a, dbPath);
|
|
135
|
+
const norm = a.normalize(raws[0]);
|
|
136
|
+
const ev = norm.events[0];
|
|
137
|
+
expect(ev.subtype).toBe("like");
|
|
138
|
+
expect(ev.content.text).toBe("一条被点赞的微博");
|
|
139
|
+
expect(ev.extra.weiboMid).toBe("MID_LIKED");
|
|
140
|
+
expect(ev.extra.authorScreenName).toBe("好友A");
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("follower_table normalizes to a CONTACT person with weibo-uid", async () => {
|
|
144
|
+
const { a, dbPath, tmp } = newAdapter({ follower_table: [FOLLOW_ROW] });
|
|
145
|
+
dirs.push(tmp);
|
|
146
|
+
const raws = await collect(a, dbPath);
|
|
147
|
+
const norm = a.normalize(raws[0]);
|
|
148
|
+
expect(norm.events).toHaveLength(0);
|
|
149
|
+
expect(norm.persons).toHaveLength(1);
|
|
150
|
+
const person = norm.persons[0];
|
|
151
|
+
expect(person.names).toEqual(["关注的人"]);
|
|
152
|
+
expect(person.identifiers["weibo-uid"]).toEqual(["555"]);
|
|
153
|
+
});
|
|
154
|
+
|
|
155
|
+
it("legacy device (post table, no home_table) still works via fallback", async () => {
|
|
156
|
+
const { a, dbPath, tmp } = newAdapter({
|
|
157
|
+
post: [{ id: "L1", text: "legacy post", created_at: "1700000000", attitudes_count: 5 }],
|
|
158
|
+
});
|
|
159
|
+
dirs.push(tmp);
|
|
160
|
+
const raws = await collect(a, dbPath);
|
|
161
|
+
expect(raws).toHaveLength(1);
|
|
162
|
+
const ev = a.normalize(raws[0]).events[0];
|
|
163
|
+
expect(ev.subtype).toBe("post");
|
|
164
|
+
expect(ev.content.text).toBe("legacy post");
|
|
165
|
+
expect(ev.extra.likesCount).toBe(5);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it("empty DB (none of the tables exist) collects nothing, no throw", async () => {
|
|
169
|
+
const { a, dbPath, tmp } = newAdapter({});
|
|
170
|
+
dirs.push(tmp);
|
|
171
|
+
const raws = await collect(a, dbPath);
|
|
172
|
+
expect(raws).toEqual([]);
|
|
173
|
+
});
|
|
174
|
+
});
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
4
|
+
|
|
5
|
+
const fs = require("node:fs");
|
|
6
|
+
const path = require("node:path");
|
|
7
|
+
const os = require("node:os");
|
|
8
|
+
|
|
9
|
+
// The salvager lives in scripts/ (a standalone forensic tool) but exports its
|
|
10
|
+
// pure parsers for testing.
|
|
11
|
+
const {
|
|
12
|
+
parseLeafPage,
|
|
13
|
+
readVarint,
|
|
14
|
+
serialTypeSize,
|
|
15
|
+
} = require("../../../scripts/android/pdh-sqlite-leaf-salvage.js");
|
|
16
|
+
|
|
17
|
+
// Build a real (UTF-8) SQLite DB via the SQLCipher-capable driver, then salvage
|
|
18
|
+
// records straight from its raw page bytes — proving the leaf-page parser reads
|
|
19
|
+
// rowids + columns + UTF-8 text correctly (the Method-B reconstruction step for
|
|
20
|
+
// scattered/malformed memory dumps). See docs/internal/pdh-db-decryption-runbook.md.
|
|
21
|
+
describe("pdh-sqlite-leaf-salvage — leaf-page record salvager", () => {
|
|
22
|
+
let dir, dbPath, buf;
|
|
23
|
+
beforeAll(() => {
|
|
24
|
+
const Database = require("better-sqlite3-multiple-ciphers");
|
|
25
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-"));
|
|
26
|
+
dbPath = path.join(dir, "u.db");
|
|
27
|
+
const db = new Database(dbPath);
|
|
28
|
+
db.exec(
|
|
29
|
+
"CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)",
|
|
30
|
+
);
|
|
31
|
+
const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
|
|
32
|
+
ins.run("uuid-1", "conv-1", 111, "你好呀 hello", 1700000000000);
|
|
33
|
+
ins.run("uuid-2", "conv-1", 222, "在吗?晚上一起吃饭", 1700000001000);
|
|
34
|
+
ins.run("uuid-3", "conv-2", 333, "ok 👍", 1700000002000);
|
|
35
|
+
db.close();
|
|
36
|
+
buf = fs.readFileSync(dbPath);
|
|
37
|
+
});
|
|
38
|
+
afterAll(() => {
|
|
39
|
+
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it("varint + serialTypeSize basics", () => {
|
|
43
|
+
expect(readVarint(Buffer.from([0x01]), 0)[0]).toBe(1n);
|
|
44
|
+
expect(readVarint(Buffer.from([0x81, 0x00]), 0)[0]).toBe(128n);
|
|
45
|
+
expect(serialTypeSize(1n)).toBe(1); // 1-byte int
|
|
46
|
+
expect(serialTypeSize(6n)).toBe(8); // 8-byte int
|
|
47
|
+
expect(serialTypeSize(13n)).toBe(0); // text len 0
|
|
48
|
+
expect(serialTypeSize(0x29n)).toBe(14); // text serial 41 → 14 bytes
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
it("salvages all 3 msg rows with correct columns + UTF-8 text", () => {
|
|
52
|
+
const PAGE = 4096;
|
|
53
|
+
const records = [];
|
|
54
|
+
for (let base = 0; base + PAGE <= buf.length; base += PAGE) {
|
|
55
|
+
const recs = parseLeafPage(buf, base, PAGE, 3);
|
|
56
|
+
if (recs) records.push(...recs);
|
|
57
|
+
}
|
|
58
|
+
// find the msg rows (5 cols, content is the 4th)
|
|
59
|
+
const msgRows = records.filter((r) => r.cols.length === 5);
|
|
60
|
+
expect(msgRows.length).toBe(3);
|
|
61
|
+
const byUuid = Object.fromEntries(msgRows.map((r) => [r.cols[0], r]));
|
|
62
|
+
expect(byUuid["uuid-1"].cols[2]).toBe(111); // sender int
|
|
63
|
+
expect(byUuid["uuid-1"].cols[3]).toBe("你好呀 hello"); // UTF-8 intact
|
|
64
|
+
expect(byUuid["uuid-2"].cols[3]).toBe("在吗?晚上一起吃饭");
|
|
65
|
+
expect(byUuid["uuid-2"].cols[4]).toBe(1700000001000); // created_time
|
|
66
|
+
expect(byUuid["uuid-3"].cols[3]).toBe("ok 👍"); // emoji (4-byte UTF-8)
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("finds a leaf page at a NON-4096-aligned offset (unaligned scan)", () => {
|
|
70
|
+
const PAGE = 4096;
|
|
71
|
+
// locate a real data leaf page on the aligned grid
|
|
72
|
+
let leaf = null;
|
|
73
|
+
for (let base = 0; base + PAGE <= buf.length; base += PAGE) {
|
|
74
|
+
const recs = parseLeafPage(buf, base, PAGE, 3);
|
|
75
|
+
if (recs && recs.some((r) => r.cols.length === 5)) { leaf = buf.slice(base, base + PAGE); break; }
|
|
76
|
+
}
|
|
77
|
+
expect(leaf).not.toBeNull();
|
|
78
|
+
// embed it at a 512-aligned-but-not-4096-aligned offset inside a zero buffer
|
|
79
|
+
const big = Buffer.alloc(PAGE * 4, 0);
|
|
80
|
+
const off = 512 * 3; // 1536: hit by stride-512, missed by stride-4096
|
|
81
|
+
leaf.copy(big, off);
|
|
82
|
+
// aligned 4096-grid misses it
|
|
83
|
+
expect(parseLeafPage(big, 0, PAGE, 3)).toBeNull();
|
|
84
|
+
expect(parseLeafPage(big, PAGE, PAGE, 3)).toBeNull();
|
|
85
|
+
// unaligned stride finds it at its true offset
|
|
86
|
+
const recs = parseLeafPage(big, off, PAGE, 3);
|
|
87
|
+
expect(recs).not.toBeNull();
|
|
88
|
+
expect(recs.some((r) => r.cols[0] === "uuid-1")).toBe(true);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("returns null for non-leaf / garbage pages", () => {
|
|
92
|
+
const garbage = Buffer.alloc(4096, 0xff);
|
|
93
|
+
expect(parseLeafPage(garbage, 0, 4096, 3)).toBeNull();
|
|
94
|
+
const zeros = Buffer.alloc(4096, 0);
|
|
95
|
+
expect(parseLeafPage(zeros, 0, 4096, 3)).toBeNull();
|
|
96
|
+
});
|
|
97
|
+
});
|
|
@@ -63,7 +63,8 @@ const KIND_FAVOURITE = "favourite"; // v0.3 (X-Bogus required)
|
|
|
63
63
|
const KIND_LIKE = "like"; // v0.3 (X-Bogus required)
|
|
64
64
|
const KIND_SEARCH = "search"; // legacy sqlite-mode only
|
|
65
65
|
const KIND_MESSAGE = "message"; // Phase 2a — IM private messages from <uid>_im.db (abrignoni DFIR)
|
|
66
|
-
const KIND_CONTACT = "contact"; // Phase 2a — SIMPLE_USER
|
|
66
|
+
const KIND_CONTACT = "contact"; // Phase 2a — SIMPLE_USER/participant contacts from <uid>_im.db
|
|
67
|
+
const KIND_CONVERSATION = "conversation"; // device-verified — conversation_list thread → TOPIC
|
|
67
68
|
|
|
68
69
|
// Forward-compat: list every kind v0.3+ may emit so cc adapter accepts
|
|
69
70
|
// snapshots from a newer Android even if this JS hasn't been bumped yet.
|
|
@@ -258,7 +259,7 @@ class DouyinAdapter {
|
|
|
258
259
|
if (Number.isInteger(opts.limitContacts)) parseOpts.limitContacts = opts.limitContacts;
|
|
259
260
|
if (this._deps.dbDriverFactory) parseOpts._databaseClass = this._deps.dbDriverFactory();
|
|
260
261
|
|
|
261
|
-
const { messages, contacts, diagnostic } = parseImDb(dbPath, parseOpts);
|
|
262
|
+
const { messages, contacts, conversations, diagnostic } = parseImDb(dbPath, parseOpts);
|
|
262
263
|
if (typeof opts.onProgress === "function") {
|
|
263
264
|
try {
|
|
264
265
|
opts.onProgress({ phase: "im-db-parsed", adapter: NAME, ...diagnostic });
|
|
@@ -314,6 +315,27 @@ class DouyinAdapter {
|
|
|
314
315
|
emitted += 1;
|
|
315
316
|
}
|
|
316
317
|
}
|
|
318
|
+
|
|
319
|
+
if (include[KIND_CONVERSATION] !== false) {
|
|
320
|
+
for (const cv of conversations || []) {
|
|
321
|
+
if (emitted >= limit) return;
|
|
322
|
+
if (!cv || typeof cv !== "object" || !cv.conversationId) continue;
|
|
323
|
+
yield {
|
|
324
|
+
adapter: NAME,
|
|
325
|
+
kind: KIND_CONVERSATION,
|
|
326
|
+
originalId: stableOriginalId(
|
|
327
|
+
KIND_CONVERSATION,
|
|
328
|
+
`conv-${cv.conversationId}`,
|
|
329
|
+
),
|
|
330
|
+
capturedAt:
|
|
331
|
+
typeof cv.lastMsgTimeMs === "number" && cv.lastMsgTimeMs > 0
|
|
332
|
+
? cv.lastMsgTimeMs
|
|
333
|
+
: fallbackCapturedAt,
|
|
334
|
+
payload: { kind: KIND_CONVERSATION, ...cv },
|
|
335
|
+
};
|
|
336
|
+
emitted += 1;
|
|
337
|
+
}
|
|
338
|
+
}
|
|
317
339
|
}
|
|
318
340
|
|
|
319
341
|
async *_syncViaSnapshot(opts) {
|
|
@@ -454,6 +476,9 @@ class DouyinAdapter {
|
|
|
454
476
|
if (kind === KIND_CONTACT) {
|
|
455
477
|
return normalizeContact(p, raw, ingestedAt);
|
|
456
478
|
}
|
|
479
|
+
if (kind === KIND_CONVERSATION) {
|
|
480
|
+
return normalizeConversation(p, raw, ingestedAt);
|
|
481
|
+
}
|
|
457
482
|
throw new Error(`DouyinAdapter.normalize: unknown kind ${kind}`);
|
|
458
483
|
}
|
|
459
484
|
}
|
|
@@ -697,6 +722,35 @@ function normalizeContact(p, raw, ingestedAt) {
|
|
|
697
722
|
};
|
|
698
723
|
}
|
|
699
724
|
|
|
725
|
+
function normalizeConversation(p, raw, ingestedAt) {
|
|
726
|
+
// conversation_list row from <uid>_im.db → a TOPIC (one chat thread).
|
|
727
|
+
const convId =
|
|
728
|
+
(typeof p.conversationId === "string" && p.conversationId) ||
|
|
729
|
+
(typeof p.conversationId === "number" && String(p.conversationId)) ||
|
|
730
|
+
null;
|
|
731
|
+
const occurredAt = raw.capturedAt || ingestedAt;
|
|
732
|
+
const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
|
|
733
|
+
return {
|
|
734
|
+
events: [], persons: [], places: [], items: [],
|
|
735
|
+
topics: [{
|
|
736
|
+
id: convId ? `topic-douyin-conv-${convId}` : `topic-douyin-conv-${newId()}`,
|
|
737
|
+
type: ENTITY_TYPES.TOPIC,
|
|
738
|
+
name: convId ? `抖音会话 ${convId}` : "抖音会话",
|
|
739
|
+
ingestedAt,
|
|
740
|
+
source,
|
|
741
|
+
extra: {
|
|
742
|
+
platform: "douyin",
|
|
743
|
+
conversationId: convId,
|
|
744
|
+
conversationType:
|
|
745
|
+
typeof p.conversationType === "number" ? p.conversationType : null,
|
|
746
|
+
lastMsgTimeMs:
|
|
747
|
+
typeof p.lastMsgTimeMs === "number" ? p.lastMsgTimeMs : null,
|
|
748
|
+
stranger: typeof p.stranger === "boolean" ? p.stranger : null,
|
|
749
|
+
},
|
|
750
|
+
}],
|
|
751
|
+
};
|
|
752
|
+
}
|
|
753
|
+
|
|
700
754
|
module.exports = {
|
|
701
755
|
DouyinAdapter,
|
|
702
756
|
NAME,
|
|
@@ -25,6 +25,8 @@ const {
|
|
|
25
25
|
writeSnapshotJson,
|
|
26
26
|
cleanupSnapshotJson,
|
|
27
27
|
} = require("./snapshot-builder");
|
|
28
|
+
const { salvageFile } = require("../../forensics/leaf-salvage");
|
|
29
|
+
const { mapMsgRecords, inferMsgColumns } = require("./salvage-mapper");
|
|
28
30
|
|
|
29
31
|
/**
|
|
30
32
|
* Pull IM db → parse → write snapshot. Returns the staging path + counts
|
|
@@ -159,6 +161,102 @@ async function collectAndSync(bridge, registry, opts = {}) {
|
|
|
159
161
|
};
|
|
160
162
|
}
|
|
161
163
|
|
|
164
|
+
// ── Salvage path (Method B /proc/mem dump → leaf-salvage → snapshot) ──────
|
|
165
|
+
//
|
|
166
|
+
// The key-free decryption breakthrough: a rooted device dumps a running app's
|
|
167
|
+
// decrypted SQLite pages from /proc/<pid>/mem, then this salvages the message
|
|
168
|
+
// records straight out of the leaf pages (no key, no password) and ingests them
|
|
169
|
+
// through the same social-douyin snapshot path. Closes the loop: dump → salvage
|
|
170
|
+
// → THIS → PDH entities. See docs/internal/pdh-db-decryption-runbook.md §3.5.
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Salvage records from a memory dump → social-douyin snapshot JSON.
|
|
174
|
+
*
|
|
175
|
+
* @param {string} dumpPath path to the /proc/mem dump (or concatenated dumps)
|
|
176
|
+
* @param {{
|
|
177
|
+
* uid?: string, // account uid; defaults to "salvage" placeholder
|
|
178
|
+
* columns?: string[], // explicit msg column order; else inferMsgColumns
|
|
179
|
+
* pageSize?: number, minCols?: number, unaligned?: boolean, stride?: number,
|
|
180
|
+
* displayName?: string,
|
|
181
|
+
* stagingDir?: string,
|
|
182
|
+
* now?: () => number,
|
|
183
|
+
* }} [opts]
|
|
184
|
+
* @returns {{snapshotPath: string, uid: string, eventCounts: object, salvage: object}}
|
|
185
|
+
*/
|
|
186
|
+
function salvageDumpToSnapshot(dumpPath, opts = {}) {
|
|
187
|
+
if (typeof dumpPath !== "string" || dumpPath.length === 0) {
|
|
188
|
+
throw new TypeError("salvageDumpToSnapshot: dumpPath must be a non-empty string");
|
|
189
|
+
}
|
|
190
|
+
const now = opts.now || Date.now;
|
|
191
|
+
const { records, pages } = salvageFile(dumpPath, {
|
|
192
|
+
pageSize: opts.pageSize,
|
|
193
|
+
minCols: opts.minCols,
|
|
194
|
+
unaligned: opts.unaligned,
|
|
195
|
+
stride: opts.stride,
|
|
196
|
+
});
|
|
197
|
+
// Leaf pages carry no column names — use the caller's explicit order when
|
|
198
|
+
// known (most accurate), else heuristically infer content/created_time.
|
|
199
|
+
const columns = Array.isArray(opts.columns) && opts.columns.length
|
|
200
|
+
? opts.columns
|
|
201
|
+
: inferMsgColumns(records);
|
|
202
|
+
const messages = mapMsgRecords(records, columns);
|
|
203
|
+
const uid = typeof opts.uid === "string" && opts.uid.length ? opts.uid : "salvage";
|
|
204
|
+
const snapshot = buildSnapshot({
|
|
205
|
+
uid,
|
|
206
|
+
displayName: opts.displayName,
|
|
207
|
+
messages,
|
|
208
|
+
contacts: [],
|
|
209
|
+
snapshottedAt: now(),
|
|
210
|
+
});
|
|
211
|
+
const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
|
|
212
|
+
return {
|
|
213
|
+
snapshotPath,
|
|
214
|
+
uid,
|
|
215
|
+
eventCounts: { message: messages.length, total: messages.length },
|
|
216
|
+
salvage: { leafPages: pages, recordsSalvaged: records.length, columns },
|
|
217
|
+
};
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
/**
|
|
221
|
+
* One-shot: salvage dump → snapshot → syncAdapter("social-douyin") → cleanup.
|
|
222
|
+
*
|
|
223
|
+
* @param {object} registry AdapterRegistry (must expose syncAdapter)
|
|
224
|
+
* @param {string} dumpPath
|
|
225
|
+
* @param {object} [opts] forwarded to salvageDumpToSnapshot
|
|
226
|
+
* @returns {Promise<object>} SyncReport + salvage diagnostic
|
|
227
|
+
*/
|
|
228
|
+
async function salvageAndSync(registry, dumpPath, opts = {}) {
|
|
229
|
+
if (!registry || typeof registry.syncAdapter !== "function") {
|
|
230
|
+
throw new TypeError(
|
|
231
|
+
"salvageAndSync: registry must expose syncAdapter(name, options)",
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
const res = salvageDumpToSnapshot(dumpPath, opts);
|
|
235
|
+
let syncReport = null;
|
|
236
|
+
let cleanupFailed = false;
|
|
237
|
+
try {
|
|
238
|
+
syncReport = await registry.syncAdapter("social-douyin", {
|
|
239
|
+
inputPath: res.snapshotPath,
|
|
240
|
+
});
|
|
241
|
+
} finally {
|
|
242
|
+
try {
|
|
243
|
+
cleanupSnapshotJson(res.snapshotPath);
|
|
244
|
+
} catch (_e) {
|
|
245
|
+
cleanupFailed = true;
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
return {
|
|
249
|
+
...syncReport,
|
|
250
|
+
douyin: {
|
|
251
|
+
uid: res.uid,
|
|
252
|
+
eventCounts: res.eventCounts,
|
|
253
|
+
salvage: res.salvage,
|
|
254
|
+
mode: "salvage",
|
|
255
|
+
cleanupFailed,
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
|
|
162
260
|
// ── Watch-history (video_record.db) path ─────────────────────────────────
|
|
163
261
|
// Distinct from the IM-db path above: pulls the plaintext video_record.db and
|
|
164
262
|
// emits `history` events (KIND_HISTORY → BROWSE) the social-douyin adapter
|
|
@@ -276,4 +374,6 @@ module.exports = {
|
|
|
276
374
|
collectAndSync,
|
|
277
375
|
collectWatchHistory,
|
|
278
376
|
collectWatchHistoryAndSync,
|
|
377
|
+
salvageDumpToSnapshot,
|
|
378
|
+
salvageAndSync,
|
|
279
379
|
};
|
|
@@ -130,14 +130,22 @@ function parseImDb(dbPath, opts = {}) {
|
|
|
130
130
|
: 5_000;
|
|
131
131
|
const Database = opts._databaseClass || loadDatabaseClass();
|
|
132
132
|
const db = new Database(dbPath, { readonly: true });
|
|
133
|
+
const limitConversations =
|
|
134
|
+
Number.isInteger(opts.limitConversations) && opts.limitConversations > 0
|
|
135
|
+
? opts.limitConversations
|
|
136
|
+
: 5_000;
|
|
133
137
|
const out = {
|
|
134
138
|
messages: [],
|
|
135
139
|
contacts: [],
|
|
140
|
+
conversations: [],
|
|
136
141
|
diagnostic: {
|
|
137
142
|
messageCount: 0,
|
|
138
143
|
contactCount: 0,
|
|
144
|
+
conversationCount: 0,
|
|
139
145
|
hadMsgTable: false,
|
|
140
146
|
hadSimpleUserTable: false,
|
|
147
|
+
hadParticipantTable: false,
|
|
148
|
+
hadConversationListTable: false,
|
|
141
149
|
},
|
|
142
150
|
};
|
|
143
151
|
try {
|
|
@@ -230,6 +238,83 @@ function parseImDb(dbPath, opts = {}) {
|
|
|
230
238
|
out.diagnostic.contactCount = out.contacts.length;
|
|
231
239
|
}
|
|
232
240
|
}
|
|
241
|
+
|
|
242
|
+
// ─── participant table (device-verified 2026-06-16) ──────────────────
|
|
243
|
+
// Real Douyin IM schema keeps conversation members in `participant`
|
|
244
|
+
// (conversation_id, user_id, sort_order; UNIQUE(conversation_id,user_id)),
|
|
245
|
+
// NOT SIMPLE_USER (which is older/other builds). Pull distinct member uids
|
|
246
|
+
// as contacts — uid-only (nickname/avatar live in a separate user table),
|
|
247
|
+
// so a PERSON gets created keyed by douyin-uid even without a name.
|
|
248
|
+
// Dedup against contacts already harvested from SIMPLE_USER.
|
|
249
|
+
const partTableInfo = trySelect(db, "PRAGMA table_info(participant)");
|
|
250
|
+
if (Array.isArray(partTableInfo) && partTableInfo.length > 0) {
|
|
251
|
+
out.diagnostic.hadParticipantTable = true;
|
|
252
|
+
const columns = new Set(partTableInfo.map((r) => r.name));
|
|
253
|
+
const uidCol = pickCol(columns, ["user_id", "uid", "UID"]);
|
|
254
|
+
if (uidCol) {
|
|
255
|
+
const seen = new Set(
|
|
256
|
+
out.contacts.map((c) => c.uid).filter(Boolean),
|
|
257
|
+
);
|
|
258
|
+
const sql =
|
|
259
|
+
`SELECT DISTINCT ${uidCol} AS uid FROM participant ` +
|
|
260
|
+
`WHERE ${uidCol} IS NOT NULL LIMIT ${limitContacts}`;
|
|
261
|
+
const rows = trySelect(db, sql) || [];
|
|
262
|
+
for (const r of rows) {
|
|
263
|
+
const uid = r.uid != null ? String(r.uid) : null;
|
|
264
|
+
if (!uid || seen.has(uid)) continue;
|
|
265
|
+
seen.add(uid);
|
|
266
|
+
out.contacts.push({
|
|
267
|
+
uid,
|
|
268
|
+
shortId: null,
|
|
269
|
+
name: null,
|
|
270
|
+
avatarUrl: null,
|
|
271
|
+
followStatus: null,
|
|
272
|
+
fromParticipant: true,
|
|
273
|
+
});
|
|
274
|
+
}
|
|
275
|
+
out.diagnostic.contactCount = out.contacts.length;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// ─── conversation_list table (device-verified 2026-06-16) ────────────
|
|
280
|
+
// Each row is a chat thread → PDH TOPIC. Columns vary by build; pick
|
|
281
|
+
// defensively. conversation_id is the only hard requirement.
|
|
282
|
+
const convTableInfo = trySelect(db, "PRAGMA table_info(conversation_list)");
|
|
283
|
+
if (Array.isArray(convTableInfo) && convTableInfo.length > 0) {
|
|
284
|
+
out.diagnostic.hadConversationListTable = true;
|
|
285
|
+
const columns = new Set(convTableInfo.map((r) => r.name));
|
|
286
|
+
const idCol = pickCol(columns, ["conversation_id", "conv_id", "id"]);
|
|
287
|
+
const typeCol = pickCol(columns, ["type", "conversation_type", "conv_type"]);
|
|
288
|
+
const lastTimeCol = pickCol(columns, [
|
|
289
|
+
"last_msg_create_time",
|
|
290
|
+
"last_message_time",
|
|
291
|
+
"updated_time",
|
|
292
|
+
]);
|
|
293
|
+
const strangerCol = pickCol(columns, ["stranger", "is_stranger"]);
|
|
294
|
+
if (idCol) {
|
|
295
|
+
const fields = [`${idCol} AS convId`];
|
|
296
|
+
if (typeCol) fields.push(`${typeCol} AS convType`);
|
|
297
|
+
if (lastTimeCol) fields.push(`${lastTimeCol} AS lastMsgTime`);
|
|
298
|
+
if (strangerCol) fields.push(`${strangerCol} AS stranger`);
|
|
299
|
+
const orderBy = lastTimeCol ? ` ORDER BY ${lastTimeCol} DESC` : "";
|
|
300
|
+
const sql =
|
|
301
|
+
`SELECT ${fields.join(", ")} FROM conversation_list` +
|
|
302
|
+
`${orderBy} LIMIT ${limitConversations}`;
|
|
303
|
+
const rows = trySelect(db, sql) || [];
|
|
304
|
+
for (const r of rows) {
|
|
305
|
+
if (r.convId == null) continue;
|
|
306
|
+
out.conversations.push({
|
|
307
|
+
conversationId: String(r.convId),
|
|
308
|
+
conversationType:
|
|
309
|
+
typeof r.convType === "number" ? r.convType : null,
|
|
310
|
+
lastMsgTimeMs: normalizeEpochMs(r.lastMsgTime),
|
|
311
|
+
stranger:
|
|
312
|
+
typeof r.stranger === "number" ? r.stranger === 1 : null,
|
|
313
|
+
});
|
|
314
|
+
}
|
|
315
|
+
out.diagnostic.conversationCount = out.conversations.length;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
233
318
|
} finally {
|
|
234
319
|
db.close();
|
|
235
320
|
}
|
|
@@ -43,6 +43,8 @@ const {
|
|
|
43
43
|
collectAndSync,
|
|
44
44
|
collectWatchHistory,
|
|
45
45
|
collectWatchHistoryAndSync,
|
|
46
|
+
salvageDumpToSnapshot,
|
|
47
|
+
salvageAndSync,
|
|
46
48
|
} = require("./collector");
|
|
47
49
|
const {
|
|
48
50
|
createDouyinWatchExtension,
|
|
@@ -71,4 +73,7 @@ module.exports = {
|
|
|
71
73
|
// Collector orchestrator
|
|
72
74
|
collect,
|
|
73
75
|
collectAndSync,
|
|
76
|
+
// Method B salvage path (/proc/mem dump → leaf-salvage → snapshot → ingest)
|
|
77
|
+
salvageDumpToSnapshot,
|
|
78
|
+
salvageAndSync,
|
|
74
79
|
};
|