@chainlesschain/personal-data-hub 0.4.26 → 0.4.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,97 @@
1
+ "use strict";
2
+
3
+ import { describe, it, expect, beforeAll, afterAll } from "vitest";
4
+
5
+ const fs = require("node:fs");
6
+ const path = require("node:path");
7
+ const os = require("node:os");
8
+
9
+ const { LocalVault } = require("../lib/vault");
10
+ const { generateKeyHex } = require("../lib/key-providers");
11
+ const {
12
+ buildSalvageEvents,
13
+ salvageDumpToVault,
14
+ resolveApp,
15
+ } = require("../lib/forensics/salvage-ingest");
16
+
17
+ // Build a real SQLite DB and treat its bytes as a memory dump; verify the
18
+ // generic salvage→vault path recovers messages AND tags them with the correct
19
+ // per-app source.adapter (multi-app de-silo). Real LocalVault → proves the
20
+ // hand-built events pass schema validation + are searchable.
21
+ describe("salvage-ingest — generic multi-app salvage → vault", () => {
22
+ let dir, dumpPath, vault, vdir;
23
+ const COLUMNS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
24
+
25
+ beforeAll(() => {
26
+ const Database = require("better-sqlite3-multiple-ciphers");
27
+ dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-ing-"));
28
+ dumpPath = path.join(dir, "u.db");
29
+ const db = new Database(dumpPath);
30
+ db.exec("CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)");
31
+ const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
32
+ ins.run("u1", "conv-1", 111, "今天的会议改到下午三点 hi", 1700000000000);
33
+ ins.run("u2", "conv-1", 222, "收到 👌", 1700000001000);
34
+ db.close();
35
+
36
+ vdir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-vault-"));
37
+ vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
38
+ vault.open();
39
+ });
40
+
41
+ afterAll(() => {
42
+ try { vault.close(); } catch (_e) {}
43
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) {}
44
+ try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) {}
45
+ });
46
+
47
+ it("resolveApp maps known apps to canonical source adapters; unknown → salvage:<app>", () => {
48
+ expect(resolveApp("douyin").sourceAdapter).toBe("social-douyin");
49
+ expect(resolveApp("toutiao").sourceAdapter).toBe("social-toutiao");
50
+ expect(resolveApp("wechat").sourceAdapter).toBe("wechat");
51
+ expect(resolveApp("kuaishou").sourceAdapter).toBe("social-kuaishou");
52
+ expect(resolveApp("bogusapp").sourceAdapter).toBe("salvage:bogusapp");
53
+ });
54
+
55
+ it("buildSalvageEvents tags per-app source + builds valid message events", () => {
56
+ const recs = [{ rowid: "1", cols: ["u1", "conv-1", 111, "hello 世界", 1700000000000] }];
57
+ const built = buildSalvageEvents(recs, { app: "toutiao", columns: COLUMNS, now: 1700000099000 });
58
+ expect(built.events.length).toBe(1);
59
+ const e = built.events[0];
60
+ expect(e.source.adapter).toBe("social-toutiao");
61
+ expect(e.source.capturedBy).toBe("sqlite"); // schema enum; provenance in extra.salvaged
62
+ expect(e.subtype).toBe("message");
63
+ expect(e.content.text).toBe("hello 世界");
64
+ expect(e.extra.platform).toBe("toutiao");
65
+ expect(e.extra.salvaged).toBe(true);
66
+ });
67
+
68
+ it("salvageDumpToVault ingests with douyin source + events are searchable", () => {
69
+ const r = salvageDumpToVault(vault, dumpPath, { app: "douyin", columns: COLUMNS, now: 1700000099000 });
70
+ expect(r.app).toBe("douyin");
71
+ expect(r.sourceAdapter).toBe("social-douyin");
72
+ expect(r.ingested).toBe(2);
73
+ // events landed under the correct source + are searchable
74
+ const events = vault.queryEvents({ limit: 100 }) || [];
75
+ const douyin = events.filter((e) => e.source && e.source.adapter === "social-douyin");
76
+ expect(douyin.length).toBe(2);
77
+ const texts = douyin.map((e) => e.content && e.content.text).sort();
78
+ expect(texts).toContain("收到 👌"); // UTF-8 emoji survives
79
+ });
80
+
81
+ it("same dump under a different app tags a different source (no cross-attribution)", () => {
82
+ const r = salvageDumpToVault(vault, dumpPath, { app: "toutiao", columns: COLUMNS, now: 1700000099000 });
83
+ expect(r.sourceAdapter).toBe("social-toutiao");
84
+ expect(r.ingested).toBe(2);
85
+ const events = vault.queryEvents({ limit: 100 }) || [];
86
+ expect(events.filter((e) => e.source && e.source.adapter === "social-toutiao").length).toBe(2);
87
+ // douyin events from prior test remain distinct
88
+ expect(events.filter((e) => e.source && e.source.adapter === "social-douyin").length).toBe(2);
89
+ });
90
+
91
+ it("re-ingesting the same dump dedups (stable originalId)", () => {
92
+ const before = (vault.queryEvents({ limit: 200 }) || []).length;
93
+ salvageDumpToVault(vault, dumpPath, { app: "douyin", columns: COLUMNS, now: 1700000099000 });
94
+ const after = (vault.queryEvents({ limit: 200 }) || []).length;
95
+ expect(after).toBe(before); // ON CONFLICT(source_adapter, source_original_id) updates, no dupes
96
+ });
97
+ });
@@ -0,0 +1,160 @@
1
+ "use strict";
2
+ /*
3
+ * Generic salvage → vault ingest (multi-app, correct source attribution).
4
+ *
5
+ * The leaf-page salvager recovers raw {rowid, cols} tuples from a /proc/mem
6
+ * dump of ANY app. This maps them into MESSAGE events and writes them straight
7
+ * to the vault with the CORRECT per-app `source.adapter` — so 抖音 data shows as
8
+ * social-douyin, 头条 as social-toutiao, 微信 as wechat, etc., instead of all
9
+ * being mis-attributed to one adapter.
10
+ *
11
+ * Why direct vault.putBatch (not registry.syncAdapter): the registry path runs
12
+ * an adapter's normalize(), which hard-stamps that adapter's name as the source.
13
+ * For salvaged cross-app data we need to stamp the source per the app the dump
14
+ * came from. Building events here + putBatch keeps attribution correct and is
15
+ * engine-agnostic. Events are tagged `extra.capturedBy = "mem-salvage"` for
16
+ * provenance + `extra.salvaged = true`.
17
+ *
18
+ * Authorization: only on data you are entitled to (your own device/account).
19
+ * Docs: docs/internal/pdh-db-decryption-runbook.md (Method B).
20
+ */
21
+ const { newId } = require("../ids");
22
+ const { salvageFile } = require("./leaf-salvage");
23
+ const {
24
+ mapMsgRecords,
25
+ inferMsgColumns,
26
+ } = require("../adapters/social-douyin-adb/salvage-mapper");
27
+
28
+ const SALVAGE_VERSION = "salvage-0.1";
29
+
30
+ // appKey → { sourceAdapter (vault source.adapter, = the app's canonical adapter
31
+ // name so byApp aggregation attributes correctly), platform }. Unknown apps
32
+ // fall back to a "salvage:<app>" source so they're still distinct + traceable.
33
+ const APP_SALVAGE = Object.freeze({
34
+ douyin: { sourceAdapter: "social-douyin", platform: "douyin" },
35
+ toutiao: { sourceAdapter: "social-toutiao", platform: "toutiao" },
36
+ kuaishou: { sourceAdapter: "social-kuaishou", platform: "kuaishou" },
37
+ xiaohongshu: { sourceAdapter: "social-xiaohongshu", platform: "xiaohongshu" },
38
+ weibo: { sourceAdapter: "social-weibo", platform: "weibo" },
39
+ wechat: { sourceAdapter: "wechat", platform: "wechat" },
40
+ qq: { sourceAdapter: "qq", platform: "qq" },
41
+ });
42
+
43
+ function resolveApp(app) {
44
+ const key = String(app || "douyin").toLowerCase();
45
+ return APP_SALVAGE[key] || { sourceAdapter: `salvage:${key}`, platform: key };
46
+ }
47
+
48
+ // Stable, content-derived hash → dedup key. Same message salvaged again
49
+ // (re-scan) collides on ON CONFLICT(source_adapter, source_original_id) and
50
+ // updates rather than duplicating. Tiny FNV-1a over the salient fields.
51
+ function _stableKey(parts) {
52
+ const s = parts.join("|");
53
+ let h = 0x811c9dc5;
54
+ for (let i = 0; i < s.length; i++) {
55
+ h ^= s.charCodeAt(i);
56
+ h = (h + ((h << 1) + (h << 4) + (h << 7) + (h << 8) + (h << 24))) >>> 0;
57
+ }
58
+ return h.toString(16).padStart(8, "0");
59
+ }
60
+
61
+ /**
62
+ * Map salvaged records → MESSAGE events tagged with the per-app source.
63
+ * @param {Array<{rowid,cols}>} records
64
+ * @param {{ app?: string, columns?: string[], now?: number }} [opts]
65
+ * @returns {{ events: object[], mapped: number, columns: string[], sourceAdapter: string }}
66
+ */
67
+ function buildSalvageEvents(records, opts = {}) {
68
+ const cfg = resolveApp(opts.app);
69
+ const now = Number.isFinite(opts.now) ? opts.now : Date.now();
70
+ const columns = Array.isArray(opts.columns) && opts.columns.length
71
+ ? opts.columns
72
+ : inferMsgColumns(records || []);
73
+ const messages = mapMsgRecords(records || [], columns);
74
+ const events = [];
75
+ for (const m of messages) {
76
+ const text = typeof m.text === "string" ? m.text : "";
77
+ const occurredAt = Number.isFinite(m.createdTimeMs) && m.createdTimeMs > 0
78
+ ? m.createdTimeMs
79
+ : now;
80
+ const key = _stableKey([
81
+ cfg.platform,
82
+ String(m.conversationId || m.senderUid || ""),
83
+ String(occurredAt),
84
+ text.slice(0, 64),
85
+ ]);
86
+ events.push({
87
+ id: newId(),
88
+ type: "event",
89
+ subtype: "message",
90
+ occurredAt,
91
+ actor: "person-self",
92
+ content: {
93
+ title: text ? text.slice(0, 80) : "(非文本消息)",
94
+ text,
95
+ },
96
+ ingestedAt: now,
97
+ source: {
98
+ adapter: cfg.sourceAdapter,
99
+ adapterVersion: SALVAGE_VERSION,
100
+ originalId: `salvage:${cfg.platform}:${key}`,
101
+ capturedAt: occurredAt,
102
+ // schema enum: export|api|sqlite|accessibility|ocr|manual. Salvaged
103
+ // pages ARE SQLite rows; mem-salvage provenance lives in extra.salvaged.
104
+ capturedBy: "sqlite",
105
+ },
106
+ extra: {
107
+ platform: cfg.platform,
108
+ channel: "im",
109
+ salvaged: true,
110
+ senderUid: m.senderUid || null,
111
+ conversationId: m.conversationId || null,
112
+ contentBlob: typeof m.contentBlob === "string" ? m.contentBlob : null,
113
+ },
114
+ });
115
+ }
116
+ return { events, mapped: messages.length, columns, sourceAdapter: cfg.sourceAdapter };
117
+ }
118
+
119
+ /**
120
+ * Salvage a memory dump and write the recovered messages straight into the
121
+ * vault with the correct per-app source. Returns counts.
122
+ *
123
+ * @param {object} vault LocalVault (must expose putBatch)
124
+ * @param {string} dumpPath
125
+ * @param {{ app?: string, columns?: string[], unaligned?: boolean, pageSize?: number, minCols?: number, now?: number }} [opts]
126
+ * @returns {{ ingested: number, salvaged: number, app: string, sourceAdapter: string, leafPages: number }}
127
+ */
128
+ function salvageDumpToVault(vault, dumpPath, opts = {}) {
129
+ if (!vault || typeof vault.putBatch !== "function") {
130
+ throw new TypeError("salvageDumpToVault: vault with putBatch required");
131
+ }
132
+ if (typeof dumpPath !== "string" || !dumpPath) {
133
+ throw new TypeError("salvageDumpToVault: dumpPath required");
134
+ }
135
+ const { records, pages } = salvageFile(dumpPath, {
136
+ pageSize: opts.pageSize,
137
+ minCols: opts.minCols,
138
+ unaligned: opts.unaligned,
139
+ stride: opts.stride,
140
+ });
141
+ const built = buildSalvageEvents(records, opts);
142
+ const res = built.events.length
143
+ ? vault.putBatch({ events: built.events })
144
+ : { events: 0 };
145
+ return {
146
+ ingested: res.events || 0,
147
+ salvaged: records.length,
148
+ mapped: built.mapped,
149
+ app: String(opts.app || "douyin").toLowerCase(),
150
+ sourceAdapter: built.sourceAdapter,
151
+ leafPages: pages,
152
+ };
153
+ }
154
+
155
+ module.exports = {
156
+ APP_SALVAGE,
157
+ resolveApp,
158
+ buildSalvageEvents,
159
+ salvageDumpToVault,
160
+ };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@chainlesschain/personal-data-hub",
3
- "version": "0.4.26",
3
+ "version": "0.4.27",
4
4
  "description": "Personal Data Hub — UnifiedSchema + validators + KG ingest helpers for the data-back-to-the-individual middleware",
5
5
  "type": "commonjs",
6
6
  "main": "lib/index.js",
@@ -68,7 +68,9 @@
68
68
  "./adapters/messaging-qq": "./lib/adapters/messaging-qq/index.js",
69
69
  "./adapters/messaging-telegram": "./lib/adapters/messaging-telegram/index.js",
70
70
  "./adapters/messaging-whatsapp": "./lib/adapters/messaging-whatsapp/index.js",
71
- "./sidecar": "./lib/sidecar/index.js"
71
+ "./sidecar": "./lib/sidecar/index.js",
72
+ "./forensics/leaf-salvage": "./lib/forensics/leaf-salvage.js",
73
+ "./forensics/salvage-ingest": "./lib/forensics/salvage-ingest.js"
72
74
  },
73
75
  "scripts": {
74
76
  "test": "vitest run",