@chainlesschain/personal-data-hub 0.4.25 → 0.4.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+
3
+ import { describe, it, expect, beforeAll, afterAll } from "vitest";
4
+
5
+ const fs = require("node:fs");
6
+ const path = require("node:path");
7
+ const os = require("node:os");
8
+
9
+ // The salvager lives in scripts/ (a standalone forensic tool) but exports its
10
+ // pure parsers for testing.
11
+ const {
12
+ parseLeafPage,
13
+ readVarint,
14
+ serialTypeSize,
15
+ } = require("../../../scripts/android/pdh-sqlite-leaf-salvage.js");
16
+
17
+ // Build a real (UTF-8) SQLite DB via the SQLCipher-capable driver, then salvage
18
+ // records straight from its raw page bytes — proving the leaf-page parser reads
19
+ // rowids + columns + UTF-8 text correctly (the Method-B reconstruction step for
20
+ // scattered/malformed memory dumps). See docs/internal/pdh-db-decryption-runbook.md.
21
+ describe("pdh-sqlite-leaf-salvage — leaf-page record salvager", () => {
22
+ let dir, dbPath, buf;
23
+ beforeAll(() => {
24
+ const Database = require("better-sqlite3-multiple-ciphers");
25
+ dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-"));
26
+ dbPath = path.join(dir, "u.db");
27
+ const db = new Database(dbPath);
28
+ db.exec(
29
+ "CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)",
30
+ );
31
+ const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
32
+ ins.run("uuid-1", "conv-1", 111, "你好呀 hello", 1700000000000);
33
+ ins.run("uuid-2", "conv-1", 222, "在吗?晚上一起吃饭", 1700000001000);
34
+ ins.run("uuid-3", "conv-2", 333, "ok 👍", 1700000002000);
35
+ db.close();
36
+ buf = fs.readFileSync(dbPath);
37
+ });
38
+ afterAll(() => {
39
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
40
+ });
41
+
42
+ it("varint + serialTypeSize basics", () => {
43
+ expect(readVarint(Buffer.from([0x01]), 0)[0]).toBe(1n);
44
+ expect(readVarint(Buffer.from([0x81, 0x00]), 0)[0]).toBe(128n);
45
+ expect(serialTypeSize(1n)).toBe(1); // 1-byte int
46
+ expect(serialTypeSize(6n)).toBe(8); // 8-byte int
47
+ expect(serialTypeSize(13n)).toBe(0); // text len 0
48
+ expect(serialTypeSize(0x29n)).toBe(14); // text serial 41 → 14 bytes
49
+ });
50
+
51
+ it("salvages all 3 msg rows with correct columns + UTF-8 text", () => {
52
+ const PAGE = 4096;
53
+ const records = [];
54
+ for (let base = 0; base + PAGE <= buf.length; base += PAGE) {
55
+ const recs = parseLeafPage(buf, base, PAGE, 3);
56
+ if (recs) records.push(...recs);
57
+ }
58
+ // find the msg rows (5 cols, content is the 4th)
59
+ const msgRows = records.filter((r) => r.cols.length === 5);
60
+ expect(msgRows.length).toBe(3);
61
+ const byUuid = Object.fromEntries(msgRows.map((r) => [r.cols[0], r]));
62
+ expect(byUuid["uuid-1"].cols[2]).toBe(111); // sender int
63
+ expect(byUuid["uuid-1"].cols[3]).toBe("你好呀 hello"); // UTF-8 intact
64
+ expect(byUuid["uuid-2"].cols[3]).toBe("在吗?晚上一起吃饭");
65
+ expect(byUuid["uuid-2"].cols[4]).toBe(1700000001000); // created_time
66
+ expect(byUuid["uuid-3"].cols[3]).toBe("ok 👍"); // emoji (4-byte UTF-8)
67
+ });
68
+
69
+ it("finds a leaf page at a NON-4096-aligned offset (unaligned scan)", () => {
70
+ const PAGE = 4096;
71
+ // locate a real data leaf page on the aligned grid
72
+ let leaf = null;
73
+ for (let base = 0; base + PAGE <= buf.length; base += PAGE) {
74
+ const recs = parseLeafPage(buf, base, PAGE, 3);
75
+ if (recs && recs.some((r) => r.cols.length === 5)) { leaf = buf.slice(base, base + PAGE); break; }
76
+ }
77
+ expect(leaf).not.toBeNull();
78
+ // embed it at a 512-aligned-but-not-4096-aligned offset inside a zero buffer
79
+ const big = Buffer.alloc(PAGE * 4, 0);
80
+ const off = 512 * 3; // 1536: hit by stride-512, missed by stride-4096
81
+ leaf.copy(big, off);
82
+ // aligned 4096-grid misses it
83
+ expect(parseLeafPage(big, 0, PAGE, 3)).toBeNull();
84
+ expect(parseLeafPage(big, PAGE, PAGE, 3)).toBeNull();
85
+ // unaligned stride finds it at its true offset
86
+ const recs = parseLeafPage(big, off, PAGE, 3);
87
+ expect(recs).not.toBeNull();
88
+ expect(recs.some((r) => r.cols[0] === "uuid-1")).toBe(true);
89
+ });
90
+
91
+ it("returns null for non-leaf / garbage pages", () => {
92
+ const garbage = Buffer.alloc(4096, 0xff);
93
+ expect(parseLeafPage(garbage, 0, 4096, 3)).toBeNull();
94
+ const zeros = Buffer.alloc(4096, 0);
95
+ expect(parseLeafPage(zeros, 0, 4096, 3)).toBeNull();
96
+ });
97
+ });
@@ -63,7 +63,8 @@ const KIND_FAVOURITE = "favourite"; // v0.3 (X-Bogus required)
63
63
  const KIND_LIKE = "like"; // v0.3 (X-Bogus required)
64
64
  const KIND_SEARCH = "search"; // legacy sqlite-mode only
65
65
  const KIND_MESSAGE = "message"; // Phase 2a — IM private messages from <uid>_im.db (abrignoni DFIR)
66
- const KIND_CONTACT = "contact"; // Phase 2a — SIMPLE_USER table contacts/follows from <uid>_im.db
66
+ const KIND_CONTACT = "contact"; // Phase 2a — SIMPLE_USER/participant contacts from <uid>_im.db
67
+ const KIND_CONVERSATION = "conversation"; // device-verified — conversation_list thread → TOPIC
67
68
 
68
69
  // Forward-compat: list every kind v0.3+ may emit so cc adapter accepts
69
70
  // snapshots from a newer Android even if this JS hasn't been bumped yet.
@@ -258,7 +259,7 @@ class DouyinAdapter {
258
259
  if (Number.isInteger(opts.limitContacts)) parseOpts.limitContacts = opts.limitContacts;
259
260
  if (this._deps.dbDriverFactory) parseOpts._databaseClass = this._deps.dbDriverFactory();
260
261
 
261
- const { messages, contacts, diagnostic } = parseImDb(dbPath, parseOpts);
262
+ const { messages, contacts, conversations, diagnostic } = parseImDb(dbPath, parseOpts);
262
263
  if (typeof opts.onProgress === "function") {
263
264
  try {
264
265
  opts.onProgress({ phase: "im-db-parsed", adapter: NAME, ...diagnostic });
@@ -314,6 +315,27 @@ class DouyinAdapter {
314
315
  emitted += 1;
315
316
  }
316
317
  }
318
+
319
+ if (include[KIND_CONVERSATION] !== false) {
320
+ for (const cv of conversations || []) {
321
+ if (emitted >= limit) return;
322
+ if (!cv || typeof cv !== "object" || !cv.conversationId) continue;
323
+ yield {
324
+ adapter: NAME,
325
+ kind: KIND_CONVERSATION,
326
+ originalId: stableOriginalId(
327
+ KIND_CONVERSATION,
328
+ `conv-${cv.conversationId}`,
329
+ ),
330
+ capturedAt:
331
+ typeof cv.lastMsgTimeMs === "number" && cv.lastMsgTimeMs > 0
332
+ ? cv.lastMsgTimeMs
333
+ : fallbackCapturedAt,
334
+ payload: { kind: KIND_CONVERSATION, ...cv },
335
+ };
336
+ emitted += 1;
337
+ }
338
+ }
317
339
  }
318
340
 
319
341
  async *_syncViaSnapshot(opts) {
@@ -454,6 +476,9 @@ class DouyinAdapter {
454
476
  if (kind === KIND_CONTACT) {
455
477
  return normalizeContact(p, raw, ingestedAt);
456
478
  }
479
+ if (kind === KIND_CONVERSATION) {
480
+ return normalizeConversation(p, raw, ingestedAt);
481
+ }
457
482
  throw new Error(`DouyinAdapter.normalize: unknown kind ${kind}`);
458
483
  }
459
484
  }
@@ -697,6 +722,35 @@ function normalizeContact(p, raw, ingestedAt) {
697
722
  };
698
723
  }
699
724
 
725
+ function normalizeConversation(p, raw, ingestedAt) {
726
+ // conversation_list row from <uid>_im.db → a TOPIC (one chat thread).
727
+ const convId =
728
+ (typeof p.conversationId === "string" && p.conversationId) ||
729
+ (typeof p.conversationId === "number" && String(p.conversationId)) ||
730
+ null;
731
+ const occurredAt = raw.capturedAt || ingestedAt;
732
+ const source = buildSource(raw, occurredAt, CAPTURED_BY.SQLITE);
733
+ return {
734
+ events: [], persons: [], places: [], items: [],
735
+ topics: [{
736
+ id: convId ? `topic-douyin-conv-${convId}` : `topic-douyin-conv-${newId()}`,
737
+ type: ENTITY_TYPES.TOPIC,
738
+ name: convId ? `抖音会话 ${convId}` : "抖音会话",
739
+ ingestedAt,
740
+ source,
741
+ extra: {
742
+ platform: "douyin",
743
+ conversationId: convId,
744
+ conversationType:
745
+ typeof p.conversationType === "number" ? p.conversationType : null,
746
+ lastMsgTimeMs:
747
+ typeof p.lastMsgTimeMs === "number" ? p.lastMsgTimeMs : null,
748
+ stranger: typeof p.stranger === "boolean" ? p.stranger : null,
749
+ },
750
+ }],
751
+ };
752
+ }
753
+
700
754
  module.exports = {
701
755
  DouyinAdapter,
702
756
  NAME,
@@ -25,6 +25,8 @@ const {
25
25
  writeSnapshotJson,
26
26
  cleanupSnapshotJson,
27
27
  } = require("./snapshot-builder");
28
+ const { salvageFile } = require("../../forensics/leaf-salvage");
29
+ const { mapMsgRecords, inferMsgColumns } = require("./salvage-mapper");
28
30
 
29
31
  /**
30
32
  * Pull IM db → parse → write snapshot. Returns the staging path + counts
@@ -159,6 +161,102 @@ async function collectAndSync(bridge, registry, opts = {}) {
159
161
  };
160
162
  }
161
163
 
164
+ // ── Salvage path (Method B /proc/mem dump → leaf-salvage → snapshot) ──────
165
+ //
166
+ // The key-free decryption breakthrough: a rooted device dumps a running app's
167
+ // decrypted SQLite pages from /proc/<pid>/mem, then this salvages the message
168
+ // records straight out of the leaf pages (no key, no password) and ingests them
169
+ // through the same social-douyin snapshot path. Closes the loop: dump → salvage
170
+ // → THIS → PDH entities. See docs/internal/pdh-db-decryption-runbook.md §3.5.
171
+
172
+ /**
173
+ * Salvage records from a memory dump → social-douyin snapshot JSON.
174
+ *
175
+ * @param {string} dumpPath path to the /proc/mem dump (or concatenated dumps)
176
+ * @param {{
177
+ * uid?: string, // account uid; defaults to "salvage" placeholder
178
+ * columns?: string[], // explicit msg column order; else inferMsgColumns
179
+ * pageSize?: number, minCols?: number, unaligned?: boolean, stride?: number,
180
+ * displayName?: string,
181
+ * stagingDir?: string,
182
+ * now?: () => number,
183
+ * }} [opts]
184
+ * @returns {{snapshotPath: string, uid: string, eventCounts: object, salvage: object}}
185
+ */
186
+ function salvageDumpToSnapshot(dumpPath, opts = {}) {
187
+ if (typeof dumpPath !== "string" || dumpPath.length === 0) {
188
+ throw new TypeError("salvageDumpToSnapshot: dumpPath must be a non-empty string");
189
+ }
190
+ const now = opts.now || Date.now;
191
+ const { records, pages } = salvageFile(dumpPath, {
192
+ pageSize: opts.pageSize,
193
+ minCols: opts.minCols,
194
+ unaligned: opts.unaligned,
195
+ stride: opts.stride,
196
+ });
197
+ // Leaf pages carry no column names — use the caller's explicit order when
198
+ // known (most accurate), else heuristically infer content/created_time.
199
+ const columns = Array.isArray(opts.columns) && opts.columns.length
200
+ ? opts.columns
201
+ : inferMsgColumns(records);
202
+ const messages = mapMsgRecords(records, columns);
203
+ const uid = typeof opts.uid === "string" && opts.uid.length ? opts.uid : "salvage";
204
+ const snapshot = buildSnapshot({
205
+ uid,
206
+ displayName: opts.displayName,
207
+ messages,
208
+ contacts: [],
209
+ snapshottedAt: now(),
210
+ });
211
+ const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
212
+ return {
213
+ snapshotPath,
214
+ uid,
215
+ eventCounts: { message: messages.length, total: messages.length },
216
+ salvage: { leafPages: pages, recordsSalvaged: records.length, columns },
217
+ };
218
+ }
219
+
220
+ /**
221
+ * One-shot: salvage dump → snapshot → syncAdapter("social-douyin") → cleanup.
222
+ *
223
+ * @param {object} registry AdapterRegistry (must expose syncAdapter)
224
+ * @param {string} dumpPath
225
+ * @param {object} [opts] forwarded to salvageDumpToSnapshot
226
+ * @returns {Promise<object>} SyncReport + salvage diagnostic
227
+ */
228
+ async function salvageAndSync(registry, dumpPath, opts = {}) {
229
+ if (!registry || typeof registry.syncAdapter !== "function") {
230
+ throw new TypeError(
231
+ "salvageAndSync: registry must expose syncAdapter(name, options)",
232
+ );
233
+ }
234
+ const res = salvageDumpToSnapshot(dumpPath, opts);
235
+ let syncReport = null;
236
+ let cleanupFailed = false;
237
+ try {
238
+ syncReport = await registry.syncAdapter("social-douyin", {
239
+ inputPath: res.snapshotPath,
240
+ });
241
+ } finally {
242
+ try {
243
+ cleanupSnapshotJson(res.snapshotPath);
244
+ } catch (_e) {
245
+ cleanupFailed = true;
246
+ }
247
+ }
248
+ return {
249
+ ...syncReport,
250
+ douyin: {
251
+ uid: res.uid,
252
+ eventCounts: res.eventCounts,
253
+ salvage: res.salvage,
254
+ mode: "salvage",
255
+ cleanupFailed,
256
+ },
257
+ };
258
+ }
259
+
162
260
  // ── Watch-history (video_record.db) path ─────────────────────────────────
163
261
  // Distinct from the IM-db path above: pulls the plaintext video_record.db and
164
262
  // emits `history` events (KIND_HISTORY → BROWSE) the social-douyin adapter
@@ -276,4 +374,6 @@ module.exports = {
276
374
  collectAndSync,
277
375
  collectWatchHistory,
278
376
  collectWatchHistoryAndSync,
377
+ salvageDumpToSnapshot,
378
+ salvageAndSync,
279
379
  };
@@ -130,14 +130,22 @@ function parseImDb(dbPath, opts = {}) {
130
130
  : 5_000;
131
131
  const Database = opts._databaseClass || loadDatabaseClass();
132
132
  const db = new Database(dbPath, { readonly: true });
133
+ const limitConversations =
134
+ Number.isInteger(opts.limitConversations) && opts.limitConversations > 0
135
+ ? opts.limitConversations
136
+ : 5_000;
133
137
  const out = {
134
138
  messages: [],
135
139
  contacts: [],
140
+ conversations: [],
136
141
  diagnostic: {
137
142
  messageCount: 0,
138
143
  contactCount: 0,
144
+ conversationCount: 0,
139
145
  hadMsgTable: false,
140
146
  hadSimpleUserTable: false,
147
+ hadParticipantTable: false,
148
+ hadConversationListTable: false,
141
149
  },
142
150
  };
143
151
  try {
@@ -230,6 +238,83 @@ function parseImDb(dbPath, opts = {}) {
230
238
  out.diagnostic.contactCount = out.contacts.length;
231
239
  }
232
240
  }
241
+
242
+ // ─── participant table (device-verified 2026-06-16) ──────────────────
243
+ // Real Douyin IM schema keeps conversation members in `participant`
244
+ // (conversation_id, user_id, sort_order; UNIQUE(conversation_id,user_id)),
245
+ // NOT SIMPLE_USER (which is older/other builds). Pull distinct member uids
246
+ // as contacts — uid-only (nickname/avatar live in a separate user table),
247
+ // so a PERSON gets created keyed by douyin-uid even without a name.
248
+ // Dedup against contacts already harvested from SIMPLE_USER.
249
+ const partTableInfo = trySelect(db, "PRAGMA table_info(participant)");
250
+ if (Array.isArray(partTableInfo) && partTableInfo.length > 0) {
251
+ out.diagnostic.hadParticipantTable = true;
252
+ const columns = new Set(partTableInfo.map((r) => r.name));
253
+ const uidCol = pickCol(columns, ["user_id", "uid", "UID"]);
254
+ if (uidCol) {
255
+ const seen = new Set(
256
+ out.contacts.map((c) => c.uid).filter(Boolean),
257
+ );
258
+ const sql =
259
+ `SELECT DISTINCT ${uidCol} AS uid FROM participant ` +
260
+ `WHERE ${uidCol} IS NOT NULL LIMIT ${limitContacts}`;
261
+ const rows = trySelect(db, sql) || [];
262
+ for (const r of rows) {
263
+ const uid = r.uid != null ? String(r.uid) : null;
264
+ if (!uid || seen.has(uid)) continue;
265
+ seen.add(uid);
266
+ out.contacts.push({
267
+ uid,
268
+ shortId: null,
269
+ name: null,
270
+ avatarUrl: null,
271
+ followStatus: null,
272
+ fromParticipant: true,
273
+ });
274
+ }
275
+ out.diagnostic.contactCount = out.contacts.length;
276
+ }
277
+ }
278
+
279
+ // ─── conversation_list table (device-verified 2026-06-16) ────────────
280
+ // Each row is a chat thread → PDH TOPIC. Columns vary by build; pick
281
+ // defensively. conversation_id is the only hard requirement.
282
+ const convTableInfo = trySelect(db, "PRAGMA table_info(conversation_list)");
283
+ if (Array.isArray(convTableInfo) && convTableInfo.length > 0) {
284
+ out.diagnostic.hadConversationListTable = true;
285
+ const columns = new Set(convTableInfo.map((r) => r.name));
286
+ const idCol = pickCol(columns, ["conversation_id", "conv_id", "id"]);
287
+ const typeCol = pickCol(columns, ["type", "conversation_type", "conv_type"]);
288
+ const lastTimeCol = pickCol(columns, [
289
+ "last_msg_create_time",
290
+ "last_message_time",
291
+ "updated_time",
292
+ ]);
293
+ const strangerCol = pickCol(columns, ["stranger", "is_stranger"]);
294
+ if (idCol) {
295
+ const fields = [`${idCol} AS convId`];
296
+ if (typeCol) fields.push(`${typeCol} AS convType`);
297
+ if (lastTimeCol) fields.push(`${lastTimeCol} AS lastMsgTime`);
298
+ if (strangerCol) fields.push(`${strangerCol} AS stranger`);
299
+ const orderBy = lastTimeCol ? ` ORDER BY ${lastTimeCol} DESC` : "";
300
+ const sql =
301
+ `SELECT ${fields.join(", ")} FROM conversation_list` +
302
+ `${orderBy} LIMIT ${limitConversations}`;
303
+ const rows = trySelect(db, sql) || [];
304
+ for (const r of rows) {
305
+ if (r.convId == null) continue;
306
+ out.conversations.push({
307
+ conversationId: String(r.convId),
308
+ conversationType:
309
+ typeof r.convType === "number" ? r.convType : null,
310
+ lastMsgTimeMs: normalizeEpochMs(r.lastMsgTime),
311
+ stranger:
312
+ typeof r.stranger === "number" ? r.stranger === 1 : null,
313
+ });
314
+ }
315
+ out.diagnostic.conversationCount = out.conversations.length;
316
+ }
317
+ }
233
318
  } finally {
234
319
  db.close();
235
320
  }
@@ -43,6 +43,8 @@ const {
43
43
  collectAndSync,
44
44
  collectWatchHistory,
45
45
  collectWatchHistoryAndSync,
46
+ salvageDumpToSnapshot,
47
+ salvageAndSync,
46
48
  } = require("./collector");
47
49
  const {
48
50
  createDouyinWatchExtension,
@@ -71,4 +73,7 @@ module.exports = {
71
73
  // Collector orchestrator
72
74
  collect,
73
75
  collectAndSync,
76
+ // Method B salvage path (/proc/mem dump → leaf-salvage → snapshot → ingest)
77
+ salvageDumpToSnapshot,
78
+ salvageAndSync,
74
79
  };
@@ -0,0 +1,119 @@
1
+ "use strict";
2
+ /*
3
+ * Glue: leaf-salvaged records → parseImDb-shaped output.
4
+ *
5
+ * The leaf-page salvager (scripts/android/pdh-sqlite-leaf-salvage.js) emits raw
6
+ * positional tuples {rowid, cols:[...]} (leaf pages carry no column names). This
7
+ * maps them into the SAME shape `parseImDb` returns ({messages, contacts,
8
+ * conversations}) so the existing DouyinAdapter.normalize path ingests them
9
+ * unchanged — closing the loop: Method-B dump → salvage → THIS → PDH entities.
10
+ *
11
+ * Column order comes from the table's CREATE TABLE (see docs/internal/
12
+ * pdh-app-db-schemas.md or grep the dump). Pass it explicitly for correctness;
13
+ * `inferMsgColumns` offers a heuristic fallback (content=JSON/longest text,
14
+ * created_time=epoch int) when the exact order is unknown.
15
+ */
16
+ const { _internals } = require("./im-db-parser");
17
+ const { extractTextFromContent, normalizeEpochMs } = _internals;
18
+
19
+ function zip(cols, names) {
20
+ const o = {};
21
+ for (let i = 0; i < names.length; i++) o[names[i]] = cols[i];
22
+ return o;
23
+ }
24
+
25
+ // Map msg-table salvaged records given the ordered column names.
26
+ function mapMsgRecords(records, columns) {
27
+ const out = [];
28
+ for (const r of records || []) {
29
+ if (!r || !Array.isArray(r.cols)) continue;
30
+ const row = zip(r.cols, columns);
31
+ if (row.content == null && row.created_time == null) continue;
32
+ const t = typeof row.created_time === "number" ? row.created_time
33
+ : Number(row.created_time);
34
+ out.push({
35
+ senderUid: row.sender != null ? String(row.sender) : null,
36
+ conversationId: row.conversation_id != null ? String(row.conversation_id) : null,
37
+ createdTimeMs: normalizeEpochMs(Number.isFinite(t) ? t : 0),
38
+ text: extractTextFromContent(row.content),
39
+ readStatus: typeof row.read_status === "number" ? row.read_status : null,
40
+ contentBlob: typeof row.content === "string" ? row.content : null,
41
+ });
42
+ }
43
+ return out;
44
+ }
45
+
46
+ function mapParticipantRecords(records, columns) {
47
+ const seen = new Set();
48
+ const out = [];
49
+ for (const r of records || []) {
50
+ if (!r || !Array.isArray(r.cols)) continue;
51
+ const row = zip(r.cols, columns);
52
+ const uid = row.user_id != null ? String(row.user_id) : null;
53
+ if (!uid || seen.has(uid)) continue;
54
+ seen.add(uid);
55
+ out.push({ uid, shortId: null, name: null, avatarUrl: null, followStatus: null, fromParticipant: true });
56
+ }
57
+ return out;
58
+ }
59
+
60
+ function mapConversationRecords(records, columns) {
61
+ const out = [];
62
+ for (const r of records || []) {
63
+ if (!r || !Array.isArray(r.cols)) continue;
64
+ const row = zip(r.cols, columns);
65
+ if (row.conversation_id == null) continue;
66
+ out.push({
67
+ conversationId: String(row.conversation_id),
68
+ conversationType: typeof row.type === "number" ? row.type : null,
69
+ lastMsgTimeMs: normalizeEpochMs(Number(row.last_msg_create_time) || 0),
70
+ stranger: typeof row.stranger === "number" ? row.stranger === 1 : null,
71
+ });
72
+ }
73
+ return out;
74
+ }
75
+
76
+ /**
77
+ * Heuristic: when the exact `msg` column order is unknown, guess content +
78
+ * created_time positions from value shapes (content = a JSON-ish / longest
79
+ * string; created_time = the largest plausible-epoch integer). Returns a column
80
+ * name array usable with mapMsgRecords (unknown slots get c0,c1,...).
81
+ */
82
+ function inferMsgColumns(records) {
83
+ const sample = (records || []).find((r) => r && Array.isArray(r.cols) && r.cols.length >= 3);
84
+ if (!sample) return [];
85
+ const cols = sample.cols;
86
+ const names = cols.map((_, i) => `c${i}`);
87
+ let contentIdx = -1, contentScore = -1;
88
+ let timeIdx = -1, timeVal = -1;
89
+ for (let i = 0; i < cols.length; i++) {
90
+ const v = cols[i];
91
+ if (typeof v === "string") {
92
+ const score = (v.trim().startsWith("{") ? 1e6 : 0) + v.length;
93
+ if (score > contentScore) { contentScore = score; contentIdx = i; }
94
+ } else if (typeof v === "number" && v > 1e9 && v > timeVal) {
95
+ // largest epoch-ish int → created_time (ms/sec/us all > 1e9)
96
+ timeVal = v; timeIdx = i;
97
+ }
98
+ }
99
+ if (contentIdx >= 0) names[contentIdx] = "content";
100
+ if (timeIdx >= 0 && timeIdx !== contentIdx) names[timeIdx] = "created_time";
101
+ return names;
102
+ }
103
+
104
+ // One-shot: salvaged records (mixed) → parseImDb shape, given per-table columns.
105
+ function mapSalvaged({ msg, participant, conversation } = {}) {
106
+ return {
107
+ messages: msg ? mapMsgRecords(msg.records, msg.columns) : [],
108
+ contacts: participant ? mapParticipantRecords(participant.records, participant.columns) : [],
109
+ conversations: conversation ? mapConversationRecords(conversation.records, conversation.columns) : [],
110
+ };
111
+ }
112
+
113
+ module.exports = {
114
+ mapMsgRecords,
115
+ mapParticipantRecords,
116
+ mapConversationRecords,
117
+ inferMsgColumns,
118
+ mapSalvaged,
119
+ };