@chainlesschain/personal-data-hub 0.4.25 → 0.4.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/analysis-skills.test.js +71 -2
- package/__tests__/analysis.test.js +46 -0
- package/__tests__/salvage-ingest.test.js +97 -0
- package/__tests__/social-douyin-im-direct-read.test.js +69 -3
- package/__tests__/social-douyin-salvage-collector.test.js +98 -0
- package/__tests__/social-douyin-salvage-mapper.test.js +90 -0
- package/__tests__/social-weibo-sqlite-device.test.js +174 -0
- package/__tests__/sqlite-leaf-salvage.test.js +97 -0
- package/lib/adapters/social-douyin/index.js +56 -2
- package/lib/adapters/social-douyin-adb/collector.js +100 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +85 -0
- package/lib/adapters/social-douyin-adb/index.js +5 -0
- package/lib/adapters/social-douyin-adb/salvage-mapper.js +119 -0
- package/lib/adapters/social-weibo/index.js +110 -30
- package/lib/analysis-skills/index.js +3 -0
- package/lib/analysis-skills/overview.js +157 -0
- package/lib/analysis.js +50 -0
- package/lib/forensics/leaf-salvage.js +185 -0
- package/lib/forensics/salvage-ingest.js +160 -0
- package/lib/prompt-builder.js +9 -0
- package/package.json +4 -2
|
@@ -14,6 +14,7 @@ const {
|
|
|
14
14
|
FootprintSkill,
|
|
15
15
|
InterestsSkill,
|
|
16
16
|
TimelineSkill,
|
|
17
|
+
OverviewSkill,
|
|
17
18
|
runAnalysisSkill,
|
|
18
19
|
ANALYSIS_SKILL_NAMES,
|
|
19
20
|
} = require("../lib/analysis-skills");
|
|
@@ -105,13 +106,14 @@ describe("AnalysisSkill base", () => {
|
|
|
105
106
|
expect(skill.resolveTimeWindow({}).since).toBeNull();
|
|
106
107
|
});
|
|
107
108
|
|
|
108
|
-
it("ANALYSIS_SKILL_NAMES lists exactly
|
|
109
|
-
expect(ANALYSIS_SKILL_NAMES).toHaveLength(
|
|
109
|
+
it("ANALYSIS_SKILL_NAMES lists exactly 6", () => {
|
|
110
|
+
expect(ANALYSIS_SKILL_NAMES).toHaveLength(6);
|
|
110
111
|
expect(ANALYSIS_SKILL_NAMES).toContain("analysis.spending");
|
|
111
112
|
expect(ANALYSIS_SKILL_NAMES).toContain("analysis.relations");
|
|
112
113
|
expect(ANALYSIS_SKILL_NAMES).toContain("analysis.footprint");
|
|
113
114
|
expect(ANALYSIS_SKILL_NAMES).toContain("analysis.interests");
|
|
114
115
|
expect(ANALYSIS_SKILL_NAMES).toContain("analysis.timeline");
|
|
116
|
+
expect(ANALYSIS_SKILL_NAMES).toContain("analysis.overview");
|
|
115
117
|
});
|
|
116
118
|
|
|
117
119
|
it("base.run() throws (subclasses must override)", async () => {
|
|
@@ -554,3 +556,70 @@ describe("runAnalysisSkill", () => {
|
|
|
554
556
|
await expect(runAnalysisSkill({}, "analysis.spending", {})).rejects.toThrow(/vault/);
|
|
555
557
|
});
|
|
556
558
|
});
|
|
559
|
+
|
|
560
|
+
// ─── OverviewSkill (cross-app de-silo aggregation) ──────────────────────
|
|
561
|
+
describe("OverviewSkill — cross-app unified snapshot", () => {
|
|
562
|
+
let rig;
|
|
563
|
+
beforeEach(() => { rig = makeVault(); });
|
|
564
|
+
afterEach(() => cleanup(rig));
|
|
565
|
+
|
|
566
|
+
function makeMsg(vault, opts) {
|
|
567
|
+
vault.putEvent({
|
|
568
|
+
id: opts.id, type: "event", subtype: opts.subtype || "message",
|
|
569
|
+
occurredAt: opts.occurredAt, actor: opts.actor || "person-self",
|
|
570
|
+
participants: opts.participants || [],
|
|
571
|
+
content: { title: opts.title || "msg" },
|
|
572
|
+
ingestedAt: Date.now(), source: defaultSource(opts.adapter || "test"),
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
it("aggregates events/spend/contacts across multiple apps", async () => {
|
|
577
|
+
const { vault } = rig;
|
|
578
|
+
makePerson(vault, "p-friend", ["小明"], {}, { adapter: "wechat" });
|
|
579
|
+
// payments from 2 finance/shopping apps
|
|
580
|
+
makePayment(vault, { id: "e1", amount: 30, occurredAt: ts(2026, 5, 1), adapter: "alipay-bill", subtype: "payment" });
|
|
581
|
+
makePayment(vault, { id: "e2", amount: 70, occurredAt: ts(2026, 5, 2), adapter: "shopping-taobao", subtype: "order" });
|
|
582
|
+
// messages from 2 social/im apps, same friend
|
|
583
|
+
makeMsg(vault, { id: "e3", occurredAt: ts(2026, 5, 3), adapter: "wechat", participants: ["p-friend"] });
|
|
584
|
+
makeMsg(vault, { id: "e4", occurredAt: ts(2026, 5, 4), adapter: "social-douyin", participants: ["p-friend"] });
|
|
585
|
+
makeMsg(vault, { id: "e5", occurredAt: ts(2026, 6, 1), adapter: "social-douyin", subtype: "post", participants: [] });
|
|
586
|
+
|
|
587
|
+
const skill = new OverviewSkill({ vault });
|
|
588
|
+
const r = await skill.run({ commentary: false });
|
|
589
|
+
|
|
590
|
+
expect(r.summary.totalEvents).toBe(5);
|
|
591
|
+
expect(r.summary.appsActive).toBe(4); // alipay-bill, shopping-taobao, wechat, social-douyin
|
|
592
|
+
});
|
|
593
|
+
|
|
594
|
+
it("counts 4 distinct apps + sums cross-app spend + top contact merged", async () => {
|
|
595
|
+
const { vault } = rig;
|
|
596
|
+
makePerson(vault, "p-friend", ["小明"], {}, { adapter: "wechat" });
|
|
597
|
+
makePayment(vault, { id: "a", amount: 30, occurredAt: ts(2026, 5, 1), adapter: "alipay-bill", subtype: "payment" });
|
|
598
|
+
makePayment(vault, { id: "b", amount: 70, occurredAt: ts(2026, 5, 2), adapter: "shopping-taobao", subtype: "order" });
|
|
599
|
+
makeMsg(vault, { id: "c", occurredAt: ts(2026, 5, 3), adapter: "wechat", participants: ["p-friend"] });
|
|
600
|
+
makeMsg(vault, { id: "d", occurredAt: ts(2026, 5, 4), adapter: "social-douyin", participants: ["p-friend"] });
|
|
601
|
+
|
|
602
|
+
const r = await new OverviewSkill({ vault }).run({ commentary: false });
|
|
603
|
+
const apps = r.byApp.map((x) => x.app).sort();
|
|
604
|
+
expect(apps).toContain("wechat");
|
|
605
|
+
expect(apps).toContain("social-douyin");
|
|
606
|
+
expect(apps).toContain("alipay-bill");
|
|
607
|
+
expect(apps).toContain("shopping-taobao");
|
|
608
|
+
expect(r.summary.appsActive).toBe(4);
|
|
609
|
+
expect(r.spending.total).toBe(100); // 30 + 70 across two apps
|
|
610
|
+
// the friend appears in wechat + douyin → one merged top contact w/ byApp breakdown
|
|
611
|
+
const friend = r.topContacts.find((c) => c.personId === "p-friend");
|
|
612
|
+
expect(friend).toBeTruthy();
|
|
613
|
+
expect(friend.interactions).toBe(2);
|
|
614
|
+
expect(Object.keys(friend.byApp).sort()).toEqual(["social-douyin", "wechat"]);
|
|
615
|
+
// byType has payment/order/message
|
|
616
|
+
const types = r.byType.map((t) => t.type);
|
|
617
|
+
expect(types).toContain("message");
|
|
618
|
+
});
|
|
619
|
+
|
|
620
|
+
it("is registered + runnable via runAnalysisSkill", async () => {
|
|
621
|
+
expect(ANALYSIS_SKILL_NAMES).toContain("analysis.overview");
|
|
622
|
+
const r = await runAnalysisSkill({ vault: rig.vault }, "analysis.overview", { commentary: false });
|
|
623
|
+
expect(r.skill).toBe("analysis.overview");
|
|
624
|
+
});
|
|
625
|
+
});
|
|
@@ -1797,3 +1797,49 @@ describe("AnalysisEngine._gatherFacts intent=count routing", () => {
|
|
|
1797
1797
|
expect(r.facts.filter((f) => f.type === "item").length).toBe(3);
|
|
1798
1798
|
});
|
|
1799
1799
|
});
|
|
1800
|
+
|
|
1801
|
+
// ─── ① cross-app overview injected into ask() prompt (decision grounding) ──
|
|
1802
|
+
describe("AnalysisEngine.ask crossApp overview context", () => {
|
|
1803
|
+
function seedMultiApp(vault) {
|
|
1804
|
+
vault.putPerson({
|
|
1805
|
+
id: "person-friend", type: "person", subtype: "contact",
|
|
1806
|
+
names: ["小明"], identifiers: {}, ingestedAt: Date.now(), source: source("wechat-pc"),
|
|
1807
|
+
});
|
|
1808
|
+
vault.putEvent({
|
|
1809
|
+
id: newId(), type: "event", subtype: "order", occurredAt: ts(2026, 3, 10),
|
|
1810
|
+
actor: "person-self",
|
|
1811
|
+
content: { title: "鞋", amount: { value: 200, currency: "CNY", direction: "out" } },
|
|
1812
|
+
ingestedAt: Date.now(), source: source("shopping-taobao", "o1"),
|
|
1813
|
+
});
|
|
1814
|
+
vault.putEvent({
|
|
1815
|
+
id: newId(), type: "event", subtype: "message", occurredAt: ts(2026, 3, 11),
|
|
1816
|
+
actor: "person-self", participants: ["person-friend"],
|
|
1817
|
+
content: { title: "hi", text: "hi" },
|
|
1818
|
+
ingestedAt: Date.now(), source: source("wechat-pc", "m1"),
|
|
1819
|
+
});
|
|
1820
|
+
}
|
|
1821
|
+
|
|
1822
|
+
it("injects CROSS_APP_OVERVIEW block when crossApp:true", async () => {
|
|
1823
|
+
freshVault();
|
|
1824
|
+
seedMultiApp(vault);
|
|
1825
|
+
const llm = new MockLLMClient({ reply: "建议:…" });
|
|
1826
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
1827
|
+
await engine.ask("综合我各 app 的数据,我最近重心在哪?", { crossApp: true });
|
|
1828
|
+
const userMsg = llm.calls[0].messages.find((m) => m.role === "user").content;
|
|
1829
|
+
expect(userMsg).toContain("CROSS_APP_OVERVIEW");
|
|
1830
|
+
expect(userMsg).toContain("活跃 app");
|
|
1831
|
+
// both apps surface in the cross-app aggregation
|
|
1832
|
+
expect(userMsg).toMatch(/shopping-taobao|wechat-pc/);
|
|
1833
|
+
expect(userMsg).toContain("跨 app 消费合计");
|
|
1834
|
+
});
|
|
1835
|
+
|
|
1836
|
+
it("omits CROSS_APP_OVERVIEW when crossApp not set", async () => {
|
|
1837
|
+
freshVault();
|
|
1838
|
+
seedMultiApp(vault);
|
|
1839
|
+
const llm = new MockLLMClient({ reply: "ok" });
|
|
1840
|
+
const engine = new AnalysisEngine({ vault, llm });
|
|
1841
|
+
await engine.ask("随便问问", {});
|
|
1842
|
+
const userMsg = llm.calls[0].messages.find((m) => m.role === "user").content;
|
|
1843
|
+
expect(userMsg).not.toContain("CROSS_APP_OVERVIEW");
|
|
1844
|
+
});
|
|
1845
|
+
});
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
4
|
+
|
|
5
|
+
const fs = require("node:fs");
|
|
6
|
+
const path = require("node:path");
|
|
7
|
+
const os = require("node:os");
|
|
8
|
+
|
|
9
|
+
const { LocalVault } = require("../lib/vault");
|
|
10
|
+
const { generateKeyHex } = require("../lib/key-providers");
|
|
11
|
+
const {
|
|
12
|
+
buildSalvageEvents,
|
|
13
|
+
salvageDumpToVault,
|
|
14
|
+
resolveApp,
|
|
15
|
+
} = require("../lib/forensics/salvage-ingest");
|
|
16
|
+
|
|
17
|
+
// Build a real SQLite DB and treat its bytes as a memory dump; verify the
|
|
18
|
+
// generic salvage→vault path recovers messages AND tags them with the correct
|
|
19
|
+
// per-app source.adapter (multi-app de-silo). Real LocalVault → proves the
|
|
20
|
+
// hand-built events pass schema validation + are searchable.
|
|
21
|
+
describe("salvage-ingest — generic multi-app salvage → vault", () => {
|
|
22
|
+
let dir, dumpPath, vault, vdir;
|
|
23
|
+
const COLUMNS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
|
|
24
|
+
|
|
25
|
+
beforeAll(() => {
|
|
26
|
+
const Database = require("better-sqlite3-multiple-ciphers");
|
|
27
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-ing-"));
|
|
28
|
+
dumpPath = path.join(dir, "u.db");
|
|
29
|
+
const db = new Database(dumpPath);
|
|
30
|
+
db.exec("CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)");
|
|
31
|
+
const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
|
|
32
|
+
ins.run("u1", "conv-1", 111, "今天的会议改到下午三点 hi", 1700000000000);
|
|
33
|
+
ins.run("u2", "conv-1", 222, "收到 👌", 1700000001000);
|
|
34
|
+
db.close();
|
|
35
|
+
|
|
36
|
+
vdir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-vault-"));
|
|
37
|
+
vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
|
|
38
|
+
vault.open();
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
afterAll(() => {
|
|
42
|
+
try { vault.close(); } catch (_e) {}
|
|
43
|
+
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) {}
|
|
44
|
+
try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) {}
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it("resolveApp maps known apps to canonical source adapters; unknown → salvage:<app>", () => {
|
|
48
|
+
expect(resolveApp("douyin").sourceAdapter).toBe("social-douyin");
|
|
49
|
+
expect(resolveApp("toutiao").sourceAdapter).toBe("social-toutiao");
|
|
50
|
+
expect(resolveApp("wechat").sourceAdapter).toBe("wechat");
|
|
51
|
+
expect(resolveApp("kuaishou").sourceAdapter).toBe("social-kuaishou");
|
|
52
|
+
expect(resolveApp("bogusapp").sourceAdapter).toBe("salvage:bogusapp");
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
it("buildSalvageEvents tags per-app source + builds valid message events", () => {
|
|
56
|
+
const recs = [{ rowid: "1", cols: ["u1", "conv-1", 111, "hello 世界", 1700000000000] }];
|
|
57
|
+
const built = buildSalvageEvents(recs, { app: "toutiao", columns: COLUMNS, now: 1700000099000 });
|
|
58
|
+
expect(built.events.length).toBe(1);
|
|
59
|
+
const e = built.events[0];
|
|
60
|
+
expect(e.source.adapter).toBe("social-toutiao");
|
|
61
|
+
expect(e.source.capturedBy).toBe("sqlite"); // schema enum; provenance in extra.salvaged
|
|
62
|
+
expect(e.subtype).toBe("message");
|
|
63
|
+
expect(e.content.text).toBe("hello 世界");
|
|
64
|
+
expect(e.extra.platform).toBe("toutiao");
|
|
65
|
+
expect(e.extra.salvaged).toBe(true);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("salvageDumpToVault ingests with douyin source + events are searchable", () => {
|
|
69
|
+
const r = salvageDumpToVault(vault, dumpPath, { app: "douyin", columns: COLUMNS, now: 1700000099000 });
|
|
70
|
+
expect(r.app).toBe("douyin");
|
|
71
|
+
expect(r.sourceAdapter).toBe("social-douyin");
|
|
72
|
+
expect(r.ingested).toBe(2);
|
|
73
|
+
// events landed under the correct source + are searchable
|
|
74
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
75
|
+
const douyin = events.filter((e) => e.source && e.source.adapter === "social-douyin");
|
|
76
|
+
expect(douyin.length).toBe(2);
|
|
77
|
+
const texts = douyin.map((e) => e.content && e.content.text).sort();
|
|
78
|
+
expect(texts).toContain("收到 👌"); // UTF-8 emoji survives
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("same dump under a different app tags a different source (no cross-attribution)", () => {
|
|
82
|
+
const r = salvageDumpToVault(vault, dumpPath, { app: "toutiao", columns: COLUMNS, now: 1700000099000 });
|
|
83
|
+
expect(r.sourceAdapter).toBe("social-toutiao");
|
|
84
|
+
expect(r.ingested).toBe(2);
|
|
85
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
86
|
+
expect(events.filter((e) => e.source && e.source.adapter === "social-toutiao").length).toBe(2);
|
|
87
|
+
// douyin events from prior test remain distinct
|
|
88
|
+
expect(events.filter((e) => e.source && e.source.adapter === "social-douyin").length).toBe(2);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("re-ingesting the same dump dedups (stable originalId)", () => {
|
|
92
|
+
const before = (vault.queryEvents({ limit: 200 }) || []).length;
|
|
93
|
+
salvageDumpToVault(vault, dumpPath, { app: "douyin", columns: COLUMNS, now: 1700000099000 });
|
|
94
|
+
const after = (vault.queryEvents({ limit: 200 }) || []).length;
|
|
95
|
+
expect(after).toBe(before); // ON CONFLICT(source_adapter, source_original_id) updates, no dupes
|
|
96
|
+
});
|
|
97
|
+
});
|
|
@@ -31,7 +31,7 @@ const { partitionBatch } = require("../lib/batch");
|
|
|
31
31
|
*/
|
|
32
32
|
|
|
33
33
|
// Fake better-sqlite3-style driver answering the parser's PRAGMA + SELECTs.
|
|
34
|
-
function makeFakeDb({ msgRows, userRows, msgCols, userCols }) {
|
|
34
|
+
function makeFakeDb({ msgRows, userRows, msgCols, userCols, partCols, partRows, convCols, convRows }) {
|
|
35
35
|
class FakeStmt {
|
|
36
36
|
constructor(sql) {
|
|
37
37
|
this.sql = sql;
|
|
@@ -40,8 +40,12 @@ function makeFakeDb({ msgRows, userRows, msgCols, userCols }) {
|
|
|
40
40
|
const s = this.sql;
|
|
41
41
|
if (/PRAGMA table_info\(msg\)/.test(s)) return msgCols;
|
|
42
42
|
if (/FROM msg/.test(s)) return msgRows;
|
|
43
|
-
if (/PRAGMA table_info\(SIMPLE_USER\)/.test(s)) return userCols;
|
|
44
|
-
if (/FROM SIMPLE_USER/.test(s)) return userRows;
|
|
43
|
+
if (/PRAGMA table_info\(SIMPLE_USER\)/.test(s)) return userCols || [];
|
|
44
|
+
if (/FROM SIMPLE_USER/.test(s)) return userRows || [];
|
|
45
|
+
if (/PRAGMA table_info\(participant\)/.test(s)) return partCols || [];
|
|
46
|
+
if (/FROM participant/.test(s)) return partRows || [];
|
|
47
|
+
if (/PRAGMA table_info\(conversation_list\)/.test(s)) return convCols || [];
|
|
48
|
+
if (/FROM conversation_list/.test(s)) return convRows || [];
|
|
45
49
|
return [];
|
|
46
50
|
}
|
|
47
51
|
}
|
|
@@ -261,6 +265,68 @@ describe("DouyinAdapter — 本地直读 <uid>_im.db", () => {
|
|
|
261
265
|
const raws = await collect(a.sync({ imDbPath: "/does/not/exist_im.db" }));
|
|
262
266
|
expect(raws).toHaveLength(0);
|
|
263
267
|
});
|
|
268
|
+
|
|
269
|
+
// device-verified 2026-06-16: real Douyin IM schema uses `participant`
|
|
270
|
+
// (conversation_id, user_id), not SIMPLE_USER → contacts must come from it.
|
|
271
|
+
it("extracts contacts from `participant` when SIMPLE_USER absent (real schema)", async () => {
|
|
272
|
+
const spec = {
|
|
273
|
+
msgCols: DEFAULT_FAKE.msgCols,
|
|
274
|
+
msgRows: DEFAULT_FAKE.msgRows,
|
|
275
|
+
userCols: [], // no SIMPLE_USER table on a real device
|
|
276
|
+
userRows: [],
|
|
277
|
+
partCols: [{ name: "conversation_id" }, { name: "user_id" }, { name: "sort_order" }],
|
|
278
|
+
partRows: [{ uid: 111 }, { uid: 222 }, { uid: 222 }], // dup 222 → deduped
|
|
279
|
+
};
|
|
280
|
+
const a = freshAdapter(spec);
|
|
281
|
+
const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
|
|
282
|
+
const contacts = raws.filter((r) => r.kind === "contact");
|
|
283
|
+
expect(contacts.map((r) => r.payload.uid).sort()).toEqual(["111", "222"]);
|
|
284
|
+
// each participant uid → a CONTACT person keyed by douyin-uid
|
|
285
|
+
const n = a.normalize(contacts[0]);
|
|
286
|
+
expect(n.persons[0].identifiers["douyin-uid"]).toEqual([contacts[0].payload.uid]);
|
|
287
|
+
});
|
|
288
|
+
|
|
289
|
+
// device-verified: conversation_list row → PDH TOPIC (one chat thread).
|
|
290
|
+
it("maps conversation_list rows to TOPIC entities", async () => {
|
|
291
|
+
const spec = {
|
|
292
|
+
msgCols: DEFAULT_FAKE.msgCols,
|
|
293
|
+
msgRows: DEFAULT_FAKE.msgRows,
|
|
294
|
+
userCols: [], userRows: [],
|
|
295
|
+
convCols: [
|
|
296
|
+
{ name: "conversation_id" }, { name: "type" },
|
|
297
|
+
{ name: "last_msg_create_time" }, { name: "stranger" },
|
|
298
|
+
],
|
|
299
|
+
convRows: [
|
|
300
|
+
{ convId: "conv-1", convType: 0, lastMsgTime: 1700000002000, stranger: 0 },
|
|
301
|
+
{ convId: "conv-2", convType: 1, lastMsgTime: 1700000003000, stranger: 1 },
|
|
302
|
+
],
|
|
303
|
+
};
|
|
304
|
+
const a = freshAdapter(spec);
|
|
305
|
+
const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
|
|
306
|
+
const convs = raws.filter((r) => r.kind === "conversation");
|
|
307
|
+
expect(convs.map((r) => r.payload.conversationId)).toEqual(["conv-1", "conv-2"]);
|
|
308
|
+
const n = a.normalize(convs[1]);
|
|
309
|
+
expect(n.topics).toHaveLength(1);
|
|
310
|
+
expect(n.topics[0].type).toBe("topic");
|
|
311
|
+
expect(n.topics[0].extra.conversationId).toBe("conv-2");
|
|
312
|
+
expect(n.topics[0].extra.stranger).toBe(true);
|
|
313
|
+
expect(n.topics[0].extra.lastMsgTimeMs).toBe(1700000003000);
|
|
314
|
+
});
|
|
315
|
+
|
|
316
|
+
it("participant dedups against SIMPLE_USER contacts (no double-count)", async () => {
|
|
317
|
+
const spec = {
|
|
318
|
+
msgCols: DEFAULT_FAKE.msgCols,
|
|
319
|
+
msgRows: DEFAULT_FAKE.msgRows,
|
|
320
|
+
userCols: DEFAULT_FAKE.userCols,
|
|
321
|
+
userRows: DEFAULT_FAKE.userRows, // uid 222 from SIMPLE_USER
|
|
322
|
+
partCols: [{ name: "conversation_id" }, { name: "user_id" }],
|
|
323
|
+
partRows: [{ uid: 222 }, { uid: 333 }], // 222 already seen, only 333 is new
|
|
324
|
+
};
|
|
325
|
+
const a = freshAdapter(spec);
|
|
326
|
+
const raws = await collect(a.sync({ imDbPath: "/fake/123_im.db" }));
|
|
327
|
+
const uids = raws.filter((r) => r.kind === "contact").map((r) => r.payload.uid).sort();
|
|
328
|
+
expect(uids).toEqual(["222", "333"]); // 222 not duplicated
|
|
329
|
+
});
|
|
264
330
|
});
|
|
265
331
|
|
|
266
332
|
describe("DouyinAdapter — sync() input routing (sniff)", () => {
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
4
|
+
|
|
5
|
+
const fs = require("node:fs");
|
|
6
|
+
const path = require("node:path");
|
|
7
|
+
const os = require("node:os");
|
|
8
|
+
|
|
9
|
+
const {
|
|
10
|
+
salvageDumpToSnapshot,
|
|
11
|
+
salvageAndSync,
|
|
12
|
+
} = require("../lib/adapters/social-douyin-adb/collector");
|
|
13
|
+
|
|
14
|
+
// Build a real (UTF-8) SQLite DB via the SQLCipher-capable driver and treat its
|
|
15
|
+
// raw bytes as a "memory dump" — proving the salvage → snapshot → ingest path
|
|
16
|
+
// recovers message rows with no key (the Method-B capstone). The msg-table
|
|
17
|
+
// column order matches the device-verified Douyin IM schema.
|
|
18
|
+
describe("social-douyin-adb salvage collector", () => {
|
|
19
|
+
let dir, dbPath;
|
|
20
|
+
const COLUMNS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
|
|
21
|
+
|
|
22
|
+
beforeAll(() => {
|
|
23
|
+
const Database = require("better-sqlite3-multiple-ciphers");
|
|
24
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), "salvage-col-"));
|
|
25
|
+
dbPath = path.join(dir, "u.db");
|
|
26
|
+
const db = new Database(dbPath);
|
|
27
|
+
db.exec(
|
|
28
|
+
"CREATE TABLE msg(msg_uuid TEXT, conversation_id TEXT, sender INTEGER, content TEXT, created_time INTEGER)",
|
|
29
|
+
);
|
|
30
|
+
const ins = db.prepare("INSERT INTO msg VALUES(?,?,?,?,?)");
|
|
31
|
+
ins.run("uuid-1", "conv-1", 111, "你好呀 hello", 1700000000000);
|
|
32
|
+
ins.run("uuid-2", "conv-1", 222, "在吗?晚上一起吃饭", 1700000001000);
|
|
33
|
+
ins.run("uuid-3", "conv-2", 333, "ok 👍", 1700000002000);
|
|
34
|
+
db.close();
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
afterAll(() => {
|
|
38
|
+
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* ignore */ }
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
it("salvageDumpToSnapshot recovers msg rows → snapshot JSON (explicit columns)", () => {
|
|
42
|
+
const res = salvageDumpToSnapshot(dbPath, {
|
|
43
|
+
uid: "1234567890123456789",
|
|
44
|
+
columns: COLUMNS,
|
|
45
|
+
now: () => 1700000099000,
|
|
46
|
+
});
|
|
47
|
+
expect(res.uid).toBe("1234567890123456789");
|
|
48
|
+
expect(res.eventCounts.message).toBe(3);
|
|
49
|
+
expect(res.salvage.recordsSalvaged).toBeGreaterThanOrEqual(3);
|
|
50
|
+
|
|
51
|
+
const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
|
|
52
|
+
const msgs = snap.events.filter((e) => e.kind === "message");
|
|
53
|
+
expect(msgs.length).toBe(3);
|
|
54
|
+
const texts = msgs.map((m) => m.text).sort();
|
|
55
|
+
expect(texts).toContain("你好呀 hello");
|
|
56
|
+
expect(texts).toContain("ok 👍"); // UTF-8 emoji survives
|
|
57
|
+
const m1 = msgs.find((m) => m.text === "你好呀 hello");
|
|
58
|
+
expect(m1.conversationId).toBe("conv-1");
|
|
59
|
+
expect(m1.senderUid).toBe("111");
|
|
60
|
+
fs.rmSync(res.snapshotPath, { force: true });
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it("infers columns heuristically when none given (content + created_time)", () => {
|
|
64
|
+
const res = salvageDumpToSnapshot(dbPath, { now: () => 1700000099000 });
|
|
65
|
+
expect(res.eventCounts.message).toBe(3);
|
|
66
|
+
const snap = JSON.parse(fs.readFileSync(res.snapshotPath, "utf-8"));
|
|
67
|
+
const texts = snap.events.filter((e) => e.kind === "message").map((m) => m.text);
|
|
68
|
+
expect(texts).toContain("在吗?晚上一起吃饭");
|
|
69
|
+
fs.rmSync(res.snapshotPath, { force: true });
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
it("salvageAndSync feeds the snapshot to registry.syncAdapter then cleans up", async () => {
|
|
73
|
+
let captured = null;
|
|
74
|
+
const fakeRegistry = {
|
|
75
|
+
syncAdapter: async (name, opts) => {
|
|
76
|
+
captured = { name, opts };
|
|
77
|
+
// verify the snapshot file exists at sync time
|
|
78
|
+
const snap = JSON.parse(fs.readFileSync(opts.inputPath, "utf-8"));
|
|
79
|
+
return { ingested: snap.events.length, adapter: name, kgTriples: 0, ragDocs: 0 };
|
|
80
|
+
},
|
|
81
|
+
};
|
|
82
|
+
const report = await salvageAndSync(fakeRegistry, dbPath, {
|
|
83
|
+
uid: "1234567890123456789",
|
|
84
|
+
columns: COLUMNS,
|
|
85
|
+
});
|
|
86
|
+
expect(captured.name).toBe("social-douyin");
|
|
87
|
+
expect(report.ingested).toBe(3);
|
|
88
|
+
expect(report.douyin.mode).toBe("salvage");
|
|
89
|
+
expect(report.douyin.eventCounts.message).toBe(3);
|
|
90
|
+
expect(report.douyin.cleanupFailed).toBe(false);
|
|
91
|
+
// snapshot file cleaned up in finally
|
|
92
|
+
expect(fs.existsSync(captured.opts.inputPath)).toBe(false);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("throws on missing dumpPath", () => {
|
|
96
|
+
expect(() => salvageDumpToSnapshot("")).toThrow();
|
|
97
|
+
});
|
|
98
|
+
});
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
import { describe, it, expect } from "vitest";
|
|
4
|
+
|
|
5
|
+
const {
|
|
6
|
+
mapMsgRecords,
|
|
7
|
+
mapParticipantRecords,
|
|
8
|
+
mapConversationRecords,
|
|
9
|
+
inferMsgColumns,
|
|
10
|
+
mapSalvaged,
|
|
11
|
+
} = require("../lib/adapters/social-douyin-adb/salvage-mapper");
|
|
12
|
+
const { DouyinAdapter } = require("../lib/adapters/social-douyin");
|
|
13
|
+
|
|
14
|
+
// End-to-end glue: leaf-salvaged {rowid,cols} → parseImDb shape → adapter.normalize
|
|
15
|
+
// → PDH entities. Closes Method-B: dump → salvage → mapper → ingest.
|
|
16
|
+
describe("salvage-mapper — salvaged records → PDH entities", () => {
|
|
17
|
+
// msg column order (device-verified subset, see pdh-app-db-schemas.md)
|
|
18
|
+
const MSG_COLS = ["msg_uuid", "conversation_id", "sender", "content", "created_time"];
|
|
19
|
+
const msgRecords = [
|
|
20
|
+
{ rowid: "1", cols: ["u1", "conv-1", 111, JSON.stringify({ text: "你好呀 hello" }), 1700000000000] },
|
|
21
|
+
{ rowid: "2", cols: ["u2", "conv-1", 222, JSON.stringify({ text: "在吗" }), 1700000001000] },
|
|
22
|
+
];
|
|
23
|
+
|
|
24
|
+
it("maps msg records → message objects (im-db-parser shape)", () => {
|
|
25
|
+
const msgs = mapMsgRecords(msgRecords, MSG_COLS);
|
|
26
|
+
expect(msgs).toHaveLength(2);
|
|
27
|
+
expect(msgs[0].senderUid).toBe("111");
|
|
28
|
+
expect(msgs[0].conversationId).toBe("conv-1");
|
|
29
|
+
expect(msgs[0].createdTimeMs).toBe(1700000000000);
|
|
30
|
+
expect(msgs[0].text).toBe("你好呀 hello"); // content JSON → text extracted
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
it("mapped messages normalize through DouyinAdapter → MESSAGE events", () => {
|
|
34
|
+
const a = new DouyinAdapter();
|
|
35
|
+
const msgs = mapMsgRecords(msgRecords, MSG_COLS);
|
|
36
|
+
const raw = {
|
|
37
|
+
adapter: "social-douyin",
|
|
38
|
+
kind: "message",
|
|
39
|
+
originalId: "douyin:message:x",
|
|
40
|
+
capturedAt: msgs[0].createdTimeMs,
|
|
41
|
+
payload: { kind: "message", ...msgs[0] },
|
|
42
|
+
};
|
|
43
|
+
const n = a.normalize(raw);
|
|
44
|
+
expect(n.events).toHaveLength(1);
|
|
45
|
+
expect(n.events[0].subtype).toBe("message");
|
|
46
|
+
expect(n.events[0].content.text).toBe("你好呀 hello");
|
|
47
|
+
expect(n.events[0].extra.senderUid).toBe("111");
|
|
48
|
+
});
|
|
49
|
+
|
|
50
|
+
it("maps participant records → deduped contacts (uid only)", () => {
|
|
51
|
+
const recs = [
|
|
52
|
+
{ rowid: "1", cols: ["conv-1", "111", 0] },
|
|
53
|
+
{ rowid: "2", cols: ["conv-1", "222", 1] },
|
|
54
|
+
{ rowid: "3", cols: ["conv-2", "222", 0] },
|
|
55
|
+
];
|
|
56
|
+
const contacts = mapParticipantRecords(recs, ["conversation_id", "user_id", "sort_order"]);
|
|
57
|
+
expect(contacts.map((c) => c.uid).sort()).toEqual(["111", "222"]);
|
|
58
|
+
expect(contacts.every((c) => c.fromParticipant)).toBe(true);
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
it("maps conversation records → conversations (→ TOPIC)", () => {
|
|
62
|
+
const recs = [{ rowid: "1", cols: ["conv-9", 1, 1700000002000, 1] }];
|
|
63
|
+
const convs = mapConversationRecords(recs, ["conversation_id", "type", "last_msg_create_time", "stranger"]);
|
|
64
|
+
expect(convs[0].conversationId).toBe("conv-9");
|
|
65
|
+
expect(convs[0].stranger).toBe(true);
|
|
66
|
+
expect(convs[0].lastMsgTimeMs).toBe(1700000002000);
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
it("inferMsgColumns heuristically locates content + created_time", () => {
|
|
70
|
+
const cols = inferMsgColumns(msgRecords);
|
|
71
|
+
// content = the JSON string col (index 3), created_time = the epoch int (index 4)
|
|
72
|
+
expect(cols[3]).toBe("content");
|
|
73
|
+
expect(cols[4]).toBe("created_time");
|
|
74
|
+
// round-trips through mapMsgRecords
|
|
75
|
+
const msgs = mapMsgRecords(msgRecords, cols);
|
|
76
|
+
expect(msgs[0].text).toBe("你好呀 hello");
|
|
77
|
+
expect(msgs[0].createdTimeMs).toBe(1700000000000);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
it("mapSalvaged one-shot returns parseImDb shape", () => {
|
|
81
|
+
const out = mapSalvaged({
|
|
82
|
+
msg: { records: msgRecords, columns: MSG_COLS },
|
|
83
|
+
participant: { records: [{ rowid: "1", cols: ["conv-1", "999"] }], columns: ["conversation_id", "user_id"] },
|
|
84
|
+
conversation: { records: [{ rowid: "1", cols: ["conv-1"] }], columns: ["conversation_id"] },
|
|
85
|
+
});
|
|
86
|
+
expect(out.messages).toHaveLength(2);
|
|
87
|
+
expect(out.contacts).toHaveLength(1);
|
|
88
|
+
expect(out.conversations).toHaveLength(1);
|
|
89
|
+
});
|
|
90
|
+
});
|