@chainlesschain/personal-data-hub 0.4.27 → 0.4.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -16,13 +16,21 @@ middleware.
16
16
  > Ollama / Volcengine / Anthropic / Gemini / DeepSeek), **CcKgSink**, **CcRagSink**
17
17
  > — injected at the desktop/CLI entry so this package stays decoupled (Phase 3.5).
18
18
  >
19
- > **51 adapters are now live** (no longer "later phases"): Email IMAP,
19
+ > **89 adapters are now live** (no longer "later phases"): Email IMAP,
20
20
  > Alipay bill, 9 AI-chat vendors, WeChat / QQ / Weibo / Bilibili / Douyin /
21
- > Xiaohongshu / Toutiao / Kuaishou social, Telegram / WhatsApp messaging,
22
- > Taobao / JD / Meituan / Pinduoduo shopping, Amap / Baidu-map / Tencent-map /
23
- > Ctrip / 12306 travel, system-data (contacts / calls / sms / location),
21
+ > Xiaohongshu / Toutiao / Kuaishou / Douban social, Telegram / WhatsApp messaging,
22
+ > Taobao / JD / Meituan / Pinduoduo / Eleme / Xianyu / Vipshop shopping, Amap /
23
+ > Baidu-map / Tencent-map / Ctrip / 12306 / Didi travel, Kugou / Ximalaya audio,
24
+ > Keep / Joyrun fitness, system-data (contacts / calls / sms / location),
24
25
  > and the developer-activity set (git / shell / vscode / browser-history /
25
- > local-files / win-recent).
26
+ > local-files / win-recent). See `lib/adapters/` for the full list.
27
+ >
28
+ > **On-device root forensics (rooted devices):** beyond cookie/sign-based
29
+ > collection, PDH can pull a logged-in app's local encrypted DB directly via
30
+ > **method B** (key-free `/proc/<pid>/mem` memory scan — engine-agnostic,
31
+ > anti-debug-resistant) or **method C** (frida `sqlcipher_export` online decrypt),
32
+ > plus a SQLite leaf-page **salvager** (`--unaligned`) that recovers plaintext
33
+ > pages from corrupt mem dumps. See `docs/internal/pdh-db-decryption-runbook.md`.
26
34
  >
27
35
  > **New in v0.4.0 (v5.0.3.99):** adapter **readiness** — split out from the
28
36
  > loose `healthCheck` sync gate into a real ready/needs_setup/unavailable
@@ -0,0 +1,229 @@
1
+ /**
2
+ * Douyin usage-profile reader tests (real-device-driven 2026-06-18: the user's
3
+ * exported 1128_feature_engineering.db has FEInternalUserActivityTable = 81
4
+ * rows ≈ 24 days, 175 opens, ~108h, peak 12-17h).
5
+ *
6
+ * Two layers:
7
+ * - pure aggregation via an injected fake Database (no native driver needed);
8
+ * - a real better-sqlite3 db + real LocalVault round-trip proving the
9
+ * hand-built event passes schema validation, is searchable, and re-ingest
10
+ * dedups on the stable originalId.
11
+ */
12
+ "use strict";
13
+
14
+ import { describe, it, expect, beforeAll, afterAll } from "vitest";
15
+
16
+ const fs = require("node:fs");
17
+ const path = require("node:path");
18
+ const os = require("node:os");
19
+
20
+ const { LocalVault } = require("../../lib/vault");
21
+ const { generateKeyHex } = require("../../lib/key-providers");
22
+ const {
23
+ USAGE_TABLE,
24
+ readDouyinUsageProfile,
25
+ summarizeUsageProfile,
26
+ buildUsageProfileEvents,
27
+ usageProfileToVault,
28
+ _internals,
29
+ } = require("../../lib/adapters/social-douyin-adb/usage-profile-reader");
30
+
31
+ // ── pure aggregation with an injected fake Database ───────────────────
32
+ function makeFakeDb(rows, { table = USAGE_TABLE } = {}) {
33
+ const cols = [
34
+ "id",
35
+ "timestamp",
36
+ "open_app_count",
37
+ "total_duration",
38
+ ...Array.from({ length: 24 }, (_v, h) => `launch_hour_${h}`),
39
+ ];
40
+ return class FakeDb {
41
+ constructor() {}
42
+ prepare(sql) {
43
+ return {
44
+ get: (arg) => {
45
+ if (/sqlite_master/.test(sql)) {
46
+ return arg === table ? { name: table } : undefined;
47
+ }
48
+ return undefined;
49
+ },
50
+ all: () => {
51
+ if (/table_info/.test(sql)) return cols.map((name) => ({ name }));
52
+ if (/FROM "/.test(sql)) return rows;
53
+ return [];
54
+ },
55
+ };
56
+ }
57
+ close() {}
58
+ };
59
+ }
60
+
61
+ function row({ ts, opens = 1, durMs = 0, hours = {} }) {
62
+ const r = { id: 1, timestamp: ts, open_app_count: opens, total_duration: durMs };
63
+ for (let h = 0; h < 24; h++) r[`launch_hour_${h}`] = hours[h] || 0;
64
+ return r;
65
+ }
66
+
67
+ const DAY = 86_400_000;
68
+
69
+ describe("readDouyinUsageProfile (injected fake db)", () => {
70
+ it("aggregates opens, duration, hour histogram, peak hour + bucket, distinct days", () => {
71
+ const base = Math.floor(1781000000000 / 1000); // seconds epoch
72
+ const Db = makeFakeDb([
73
+ row({ ts: base, opens: 2, durMs: 3_600_000, hours: { 13: 3, 9: 1 } }),
74
+ row({ ts: base + 86_400, opens: 1, durMs: 1_800_000, hours: { 14: 2, 20: 1 } }),
75
+ ]);
76
+ const p = readDouyinUsageProfile("x.db", { _databaseClass: Db });
77
+ expect(p.sessions).toBe(2);
78
+ expect(p.days).toBe(2);
79
+ expect(p.totalOpens).toBe(3);
80
+ expect(p.totalDurationMs).toBe(5_400_000);
81
+ expect(p.hourHistogram[13]).toBe(3);
82
+ expect(p.hourHistogram[14]).toBe(2);
83
+ expect(p.peakHour).toBe(13); // 3 launches is the single max hour
84
+ expect(p.peakBucket).toBe("12-17h"); // 13+14 = 5 dominates
85
+ expect(p.bucketTotals["12-17h"]).toBe(5);
86
+ expect(p.bucketTotals["18-23h"]).toBe(1);
87
+ expect(p.from).toBe(base * 1000);
88
+ expect(p.to).toBe((base + 86_400) * 1000);
89
+ });
90
+
91
+ it("returns an empty profile when the table is absent", () => {
92
+ const Db = makeFakeDb([], { table: "SomeOtherTable" });
93
+ const p = readDouyinUsageProfile("x.db", { _databaseClass: Db });
94
+ expect(p.sessions).toBe(0);
95
+ expect(p.peakBucket).toBe(null);
96
+ expect(p.hourHistogram).toHaveLength(24);
97
+ expect(p.totalDurationMs).toBe(0);
98
+ });
99
+
100
+ it("counts the same calendar day once even across multiple sessions", () => {
101
+ const base = Math.floor(1781000000000 / 1000);
102
+ const Db = makeFakeDb([
103
+ row({ ts: base, hours: { 10: 1 } }),
104
+ row({ ts: base + 3600, hours: { 11: 1 } }), // same UTC day
105
+ ]);
106
+ const p = readDouyinUsageProfile("x.db", { _databaseClass: Db });
107
+ expect(p.days).toBe(1);
108
+ expect(p.sessions).toBe(2);
109
+ });
110
+
111
+ it("toEpochMs treats >1e12 as ms else seconds; rejects junk", () => {
112
+ expect(_internals.toEpochMs(1781000000)).toBe(1781000000000);
113
+ expect(_internals.toEpochMs(1781000000000)).toBe(1781000000000);
114
+ expect(_internals.toEpochMs(0)).toBe(null);
115
+ expect(_internals.toEpochMs("nope")).toBe(null);
116
+ });
117
+ });
118
+
119
+ describe("summarizeUsageProfile + buildUsageProfileEvents", () => {
120
+ it("summary is empty-safe and renders hours + peak", () => {
121
+ expect(summarizeUsageProfile(null)).toMatch(/无数据/);
122
+ expect(summarizeUsageProfile({ sessions: 0 })).toMatch(/无数据/);
123
+ const txt = summarizeUsageProfile({
124
+ sessions: 81,
125
+ days: 24,
126
+ totalOpens: 175,
127
+ totalDurationMs: 388_440_000, // 107.9h
128
+ peakBucket: "12-17h",
129
+ });
130
+ expect(txt).toMatch(/24 天/);
131
+ expect(txt).toMatch(/175 次启动/);
132
+ expect(txt).toMatch(/107\.9 小时/);
133
+ expect(txt).toMatch(/12-17h/);
134
+ });
135
+
136
+ it("builds no events for an empty profile", () => {
137
+ expect(buildUsageProfileEvents({ sessions: 0 }).events).toHaveLength(0);
138
+ expect(buildUsageProfileEvents(null).events).toHaveLength(0);
139
+ });
140
+
141
+ it("builds one app-usage-profile event with stable originalId + histogram in extra", () => {
142
+ const profile = {
143
+ sessions: 81, days: 24, from: 1, to: 1781800000000,
144
+ totalOpens: 175, totalDurationMs: 388_440_000,
145
+ hourHistogram: new Array(24).fill(0), peakHour: 13, peakBucket: "12-17h",
146
+ bucketTotals: { "0-5h": 1, "6-11h": 81, "12-17h": 107, "18-23h": 75 },
147
+ };
148
+ const { events } = buildUsageProfileEvents(profile, { now: 1781900000000 });
149
+ expect(events).toHaveLength(1);
150
+ const e = events[0];
151
+ expect(e.subtype).toBe("other");
152
+ expect(e.source.adapter).toBe("social-douyin");
153
+ expect(e.source.originalId).toBe("social-douyin:usage-profile");
154
+ expect(e.source.capturedBy).toBe("sqlite");
155
+ expect(e.occurredAt).toBe(1781800000000); // profile.to
156
+ expect(e.extra.kind).toBe("app-usage-profile");
157
+ expect(e.extra.peakBucket).toBe("12-17h");
158
+ expect(e.extra.bucketTotals["12-17h"]).toBe(107);
159
+ expect(Array.isArray(e.extra.hourHistogram)).toBe(true);
160
+ });
161
+ });
162
+
163
+ // ── real db + real vault round-trip (schema validation + dedup) ───────
164
+ describe("usageProfileToVault — real sqlite + real vault", () => {
165
+ let dir, dbPath, vdir, vault;
166
+
167
+ beforeAll(() => {
168
+ const Database = require("better-sqlite3-multiple-ciphers");
169
+ dir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-usage-"));
170
+ dbPath = path.join(dir, "1128_feature_engineering.db");
171
+ const db = new Database(dbPath);
172
+ const hourCols = Array.from({ length: 24 }, (_v, h) => `launch_hour_${h} INTEGER`).join(", ");
173
+ db.exec(
174
+ `CREATE TABLE "${USAGE_TABLE}" (id INTEGER, timestamp INTEGER, ` +
175
+ `start_timestamp_ms INTEGER, end_timestamp_ms INTEGER, ` +
176
+ `open_app_count INTEGER, ${hourCols}, total_duration INTEGER)`,
177
+ );
178
+ const hzero = Array.from({ length: 24 }, () => 0);
179
+ const insCols = ["id", "timestamp", "start_timestamp_ms", "end_timestamp_ms", "open_app_count",
180
+ ...Array.from({ length: 24 }, (_v, h) => `launch_hour_${h}`), "total_duration"];
181
+ const ph = insCols.map(() => "?").join(",");
182
+ const ins = db.prepare(`INSERT INTO "${USAGE_TABLE}" (${insCols.join(",")}) VALUES (${ph})`);
183
+ const baseSec = Math.floor(1781000000000 / 1000);
184
+ // two sessions on two different days; 13h is the peak hour
185
+ const h1 = [...hzero]; h1[13] = 3; h1[9] = 1;
186
+ const h2 = [...hzero]; h2[14] = 2;
187
+ ins.run(1, baseSec, baseSec * 1000, baseSec * 1000 + 1000, 2, ...h1, 3_600_000);
188
+ ins.run(2, baseSec + 86_400, 0, 0, 1, ...h2, 1_800_000);
189
+ db.close();
190
+
191
+ vdir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-usage-vault-"));
192
+ vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
193
+ vault.open();
194
+ });
195
+
196
+ afterAll(() => {
197
+ try { vault.close(); } catch (_e) { /* best-effort */ }
198
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
199
+ try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
200
+ });
201
+
202
+ it("reads the real table, aggregates, and ingests one valid event", () => {
203
+ const profile = readDouyinUsageProfile(dbPath, {});
204
+ expect(profile.sessions).toBe(2);
205
+ expect(profile.days).toBe(2);
206
+ expect(profile.peakHour).toBe(13);
207
+ expect(profile.peakBucket).toBe("12-17h");
208
+
209
+ const r = usageProfileToVault(vault, dbPath, { now: 1781900000000 });
210
+ expect(r.ingested).toBe(1); // proves the hand-built event passed schema validation
211
+ expect(r.sessions).toBe(2);
212
+
213
+ const events = vault.queryEvents({ limit: 100 }) || [];
214
+ const mine = events.filter(
215
+ (e) => e.extra && e.extra.kind === "app-usage-profile",
216
+ );
217
+ expect(mine.length).toBe(1);
218
+ expect(mine[0].source.adapter).toBe("social-douyin");
219
+ });
220
+
221
+ it("re-ingest dedups on the stable originalId (no duplicate baseline)", () => {
222
+ usageProfileToVault(vault, dbPath, { now: 1781999999999 });
223
+ const events = vault.queryEvents({ limit: 100 }) || [];
224
+ const mine = events.filter(
225
+ (e) => e.extra && e.extra.kind === "app-usage-profile",
226
+ );
227
+ expect(mine.length).toBe(1); // still one — updated, not duplicated
228
+ });
229
+ });
@@ -7,10 +7,11 @@
7
7
  */
8
8
  "use strict";
9
9
 
10
- import { describe, it, expect, vi } from "vitest";
10
+ import { describe, it, expect, vi, beforeAll, afterAll } from "vitest";
11
11
 
12
12
  const {
13
13
  createDouyinWatchExtension,
14
+ watchHistoryToVault,
14
15
  VIDEO_RECORD_DB_REMOTE_PATH,
15
16
  _internals,
16
17
  } = require("../../lib/adapters/social-douyin-adb/watch-history-reader");
@@ -55,9 +56,9 @@ function makeFakeDb(tablesToRows) {
55
56
  }
56
57
 
57
58
  describe("readDouyinWatchHistory", () => {
58
- it("picks the largest record_<uid> table (skips record_0), parses rows → ms", () => {
59
+ it("merges record_0 + record_<uid>, attributes uid to the largest uid table, parses rows → ms", () => {
59
60
  const Db = makeFakeDb({
60
- record_0: [{ aid: "x", view_time_timestamp: 1, enter_from: "a" }],
61
+ record_0: [{ aid: "xrec0", view_time_timestamp: 1, enter_from: "a" }],
61
62
  record_92585448288: [
62
63
  { aid: "7480000000000000001", view_time_timestamp: 1717800000, enter_from: "homepage_hot" },
63
64
  { aid: "7480000000000000002", view_time_timestamp: 1717800600, enter_from: "homepage_follow" },
@@ -65,19 +66,40 @@ describe("readDouyinWatchHistory", () => {
65
66
  });
66
67
  const r = _internals.readDouyinWatchHistory("x.db", { _databaseClass: Db });
67
68
  expect(r.uid).toBe("92585448288");
68
- expect(r.records).toHaveLength(2);
69
+ // record_0 is no longer dropped: 1 (record_0) + 2 (uid) = 3 merged records.
70
+ expect(r.records).toHaveLength(3);
71
+ // Most-recent first.
69
72
  expect(r.records[0]).toEqual({
70
- awemeId: "7480000000000000001",
71
- capturedAt: 1717800000 * 1000, // seconds → ms
72
- enterFrom: "homepage_hot",
73
+ awemeId: "7480000000000000002",
74
+ capturedAt: 1717800600 * 1000,
75
+ enterFrom: "homepage_follow",
73
76
  });
77
+ const ids = r.records.map((x) => x.awemeId);
78
+ expect(ids).toContain("xrec0"); // the formerly-lost record_0 row
79
+ const rec0 = r.records.find((x) => x.awemeId === "xrec0");
80
+ expect(rec0).toEqual({ awemeId: "xrec0", capturedAt: 1000, enterFrom: "a" });
74
81
  });
75
82
 
76
- it("returns {uid:null, records:[]} when only the anonymous record_0 exists", () => {
77
- const Db = makeFakeDb({ record_0: [{ aid: "x", view_time_timestamp: 1 }] });
83
+ it("recovers history from record_0 alone (uid:null) the bulk-in-record_0 device case", () => {
84
+ const Db = makeFakeDb({
85
+ record_0: [
86
+ { aid: "a1", view_time_timestamp: 1717800000, enter_from: "homepage_hot" },
87
+ { aid: "a2", view_time_timestamp: 1717800600, enter_from: "homepage_hot" },
88
+ ],
89
+ });
78
90
  const r = _internals.readDouyinWatchHistory("x.db", { _databaseClass: Db });
79
- expect(r.uid).toBe(null);
80
- expect(r.records).toEqual([]);
91
+ expect(r.uid).toBe(null); // no logged-in account table → no attribution
92
+ expect(r.records).toHaveLength(2); // but the watch history is still recovered
93
+ expect(r.records.map((x) => x.awemeId).sort()).toEqual(["a1", "a2"]);
94
+ });
95
+
96
+ it("dedups the same (aid, timestamp) appearing in two record_* tables", () => {
97
+ const Db = makeFakeDb({
98
+ record_0: [{ aid: "dup", view_time_timestamp: 1717800000, enter_from: "homepage_hot" }],
99
+ record_111: [{ aid: "dup", view_time_timestamp: 1717800000, enter_from: "homepage_hot" }],
100
+ });
101
+ const r = _internals.readDouyinWatchHistory("x.db", { _databaseClass: Db });
102
+ expect(r.records).toHaveLength(1);
81
103
  });
82
104
 
83
105
  it("toEpochMs treats >1e12 as ms, else seconds; rejects junk", () => {
@@ -190,3 +212,58 @@ describe("createDouyinWatchExtension contract", () => {
190
212
  await expect(createDouyinWatchExtension()({}, {})).rejects.toThrow(/ctx must provide/);
191
213
  });
192
214
  });
215
+
216
+ // ── watchHistoryToVault: local-db → canonical BROWSE events → vault ────
217
+ describe("watchHistoryToVault — real sqlite + real vault", () => {
218
+ const fs = require("node:fs");
219
+ const path = require("node:path");
220
+ const os = require("node:os");
221
+ const { LocalVault } = require("../../lib/vault");
222
+ const { generateKeyHex } = require("../../lib/key-providers");
223
+ let dir, dbPath, vdir, vault;
224
+
225
+ beforeAll(() => {
226
+ const Database = require("better-sqlite3-multiple-ciphers");
227
+ dir = fs.mkdtempSync(path.join(os.tmpdir(), "dy-watch-"));
228
+ dbPath = path.join(dir, "video_record.db");
229
+ const db = new Database(dbPath);
230
+ db.exec("CREATE TABLE record_0 (aid TEXT, view_time_timestamp INTEGER, enter_from TEXT)");
231
+ db.exec("CREATE TABLE record_92585448288 (aid TEXT, view_time_timestamp INTEGER, enter_from TEXT)");
232
+ db.prepare("INSERT INTO record_0 VALUES (?,?,?)").run("7644480728574545765", 1781706182375, "homepage_hot");
233
+ db.prepare("INSERT INTO record_92585448288 VALUES (?,?,?)").run("7480000000000000002", 1717800600000, "others_homepage");
234
+ db.close();
235
+
236
+ vdir = fs.mkdtempSync(path.join(os.tmpdir(), "dy-watch-vault-"));
237
+ vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
238
+ vault.open();
239
+ });
240
+
241
+ afterAll(() => {
242
+ try { vault.close(); } catch (_e) { /* best-effort */ }
243
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
244
+ try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
245
+ });
246
+
247
+ it("merges record_0 + uid table and ingests canonical BROWSE events", () => {
248
+ const r = watchHistoryToVault(vault, dbPath, { now: 1781900000000 });
249
+ expect(r.records).toBe(2); // record_0 row no longer dropped
250
+ expect(r.ingested).toBe(2);
251
+ expect(r.uid).toBe("92585448288");
252
+ const events = vault.queryEvents({ limit: 100 }) || [];
253
+ const browse = events.filter(
254
+ (e) => e.subtype === "browse" && e.source.adapter === "social-douyin",
255
+ );
256
+ expect(browse.length).toBe(2);
257
+ expect(browse.some((e) => e.extra.awemeId === "7644480728574545765")).toBe(true);
258
+ expect(browse.some((e) => e.extra.enterFrom === "homepage_hot")).toBe(true);
259
+ });
260
+
261
+ it("re-ingest dedups on the per-record originalId", () => {
262
+ watchHistoryToVault(vault, dbPath, { now: 1781999999999 });
263
+ const events = vault.queryEvents({ limit: 100 }) || [];
264
+ const browse = events.filter(
265
+ (e) => e.subtype === "browse" && e.source.adapter === "social-douyin",
266
+ );
267
+ expect(browse.length).toBe(2); // still two
268
+ });
269
+ });
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Toutiao article reader tests (real-device-driven 2026-06-18: the user's
3
+ * exported news_article.db `article` table = 48 feed-cache rows; title lives in
4
+ * the share_info JSON blob, not a column).
5
+ *
6
+ * Two layers: pure parsing via injected fake Database, + a real better-sqlite3
7
+ * db + real LocalVault round-trip proving the hand-built BROWSE events pass
8
+ * schema validation, are searchable, and re-ingest dedups on the stable
9
+ * originalId.
10
+ */
11
+ "use strict";
12
+
13
+ import { describe, it, expect, beforeAll, afterAll } from "vitest";
14
+
15
+ const fs = require("node:fs");
16
+ const path = require("node:path");
17
+ const os = require("node:os");
18
+
19
+ const { LocalVault } = require("../../lib/vault");
20
+ const { generateKeyHex } = require("../../lib/key-providers");
21
+ const {
22
+ ARTICLE_TABLE,
23
+ readToutiaoArticles,
24
+ buildArticleEvents,
25
+ articlesToVault,
26
+ _internals,
27
+ } = require("../../lib/adapters/social-toutiao-adb/article-reader");
28
+
29
+ function makeFakeDb(rows, { table = ARTICLE_TABLE } = {}) {
30
+ const cols = [
31
+ "group_id", "item_id", "share_info", "ext_json", "share_url",
32
+ "behot_time", "read_timestamp", "is_user_digg", "is_user_repin",
33
+ ];
34
+ return class FakeDb {
35
+ constructor() {}
36
+ prepare(sql) {
37
+ return {
38
+ get: (arg) => (/sqlite_master/.test(sql) ? (arg === table ? { name: table } : undefined) : undefined),
39
+ all: () => {
40
+ if (/table_info/.test(sql)) return cols.map((name) => ({ name }));
41
+ if (/FROM "/.test(sql)) return rows;
42
+ return [];
43
+ },
44
+ };
45
+ }
46
+ close() {}
47
+ };
48
+ }
49
+
50
+ describe("readToutiaoArticles (injected fake db)", () => {
51
+ it("parses title from share_info, strips the brand suffix, drops url tracking query", () => {
52
+ const Db = makeFakeDb([
53
+ {
54
+ group_id: 100, behot_time: 1781700000, read_timestamp: 0, is_user_digg: 1, is_user_repin: 0,
55
+ share_info: JSON.stringify({ title: "5月汽车出口延续快速增长态势 - 今日头条", share_url: "https://m.toutiao.com/g/100/?app=x&category_new=headline" }),
56
+ share_url: "https://m.toutiao.com/g/100/?category_new=headline",
57
+ },
58
+ ]);
59
+ const { articles } = readToutiaoArticles("x.db", { _databaseClass: Db });
60
+ expect(articles).toHaveLength(1);
61
+ expect(articles[0].title).toBe("5月汽车出口延续快速增长态势"); // suffix stripped
62
+ expect(articles[0].url).toBe("https://m.toutiao.com/g/100/"); // query dropped
63
+ expect(articles[0].category).toBe("headline");
64
+ expect(articles[0].digg).toBe(true);
65
+ expect(articles[0].behotTime).toBe(1781700000 * 1000);
66
+ });
67
+
68
+ it("falls back to ext_json.title when share_info has none, and skips untitled rows", () => {
69
+ const Db = makeFakeDb([
70
+ { group_id: 1, ext_json: JSON.stringify({ title: "来自 ext_json 的标题" }), share_info: "{}" },
71
+ { group_id: 2, share_info: "{}", ext_json: "{}" }, // untitled → dropped
72
+ ]);
73
+ const { articles } = readToutiaoArticles("x.db", { _databaseClass: Db });
74
+ expect(articles).toHaveLength(1);
75
+ expect(articles[0].title).toBe("来自 ext_json 的标题");
76
+ });
77
+
78
+ it("returns no articles when the table is absent", () => {
79
+ const Db = makeFakeDb([], { table: "other" });
80
+ expect(readToutiaoArticles("x.db", { _databaseClass: Db }).articles).toEqual([]);
81
+ });
82
+
83
+ it("buildArticleEvents → BROWSE events, social-toutiao source, stable originalId, read flag", () => {
84
+ const { events } = buildArticleEvents(
85
+ [{ groupId: "55", title: "标题", url: "u", category: "headline", behotTime: 2, readTimestamp: 1781700000000, digg: true, repin: false }],
86
+ { now: 1781800000000 },
87
+ );
88
+ expect(events).toHaveLength(1);
89
+ const e = events[0];
90
+ expect(e.subtype).toBe("browse");
91
+ expect(e.source.adapter).toBe("social-toutiao");
92
+ expect(e.source.originalId).toBe("social-toutiao:article:55");
93
+ expect(e.occurredAt).toBe(1781700000000); // read_timestamp wins over behot
94
+ expect(e.extra.kind).toBe("article");
95
+ expect(e.extra.read).toBe(true);
96
+ expect(e.extra.digg).toBe(true);
97
+ });
98
+
99
+ it("extractCategory / extractUrl helpers", () => {
100
+ expect(_internals.extractCategory({ share_url: "x?a=1&category_new=my_tabs_digg&b=2" })).toBe("my_tabs_digg");
101
+ expect(_internals.extractUrl({ share_info: JSON.stringify({ share_url: "https://h/g/1/?t=1" }) })).toBe("https://h/g/1/");
102
+ });
103
+ });
104
+
105
+ describe("articlesToVault — real sqlite + real vault", () => {
106
+ let dir, dbPath, vdir, vault;
107
+
108
+ beforeAll(() => {
109
+ const Database = require("better-sqlite3-multiple-ciphers");
110
+ dir = fs.mkdtempSync(path.join(os.tmpdir(), "tt-article-"));
111
+ dbPath = path.join(dir, "news_article.db");
112
+ const db = new Database(dbPath);
113
+ db.exec(
114
+ "CREATE TABLE article (group_id INTEGER, item_id INTEGER, share_info TEXT, ext_json TEXT, " +
115
+ "share_url TEXT, behot_time INTEGER, read_timestamp INTEGER, is_user_digg INTEGER, is_user_repin INTEGER)",
116
+ );
117
+ const ins = db.prepare(
118
+ "INSERT INTO article (group_id, share_info, share_url, behot_time, read_timestamp, is_user_digg, is_user_repin) VALUES (?,?,?,?,?,?,?)",
119
+ );
120
+ ins.run(101, JSON.stringify({ title: "新华视点丨三峡水运新通道 - 今日头条", share_url: "https://m.toutiao.com/g/101/?x=1&category_new=headline" }), "https://m.toutiao.com/g/101/?category_new=headline", 1781700000, 0, 0, 0);
121
+ ins.run(102, JSON.stringify({ title: "5月汽车出口延续快速增长态势 - 今日头条", share_url: "https://m.toutiao.com/g/102/" }), "https://m.toutiao.com/g/102/?category_new=my_tabs_digg", 1781700100, 1781700200, 1, 0);
122
+ ins.run(103, "{}", "https://m.toutiao.com/g/103/", 1781700300, 0, 0, 0); // untitled → not ingested
123
+ db.close();
124
+
125
+ vdir = fs.mkdtempSync(path.join(os.tmpdir(), "tt-article-vault-"));
126
+ vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
127
+ vault.open();
128
+ });
129
+
130
+ afterAll(() => {
131
+ try { vault.close(); } catch (_e) { /* best-effort */ }
132
+ try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
133
+ try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
134
+ });
135
+
136
+ it("reads titled articles and ingests valid BROWSE events", () => {
137
+ const r = articlesToVault(vault, dbPath, { now: 1781900000000 });
138
+ expect(r.articles).toBe(2); // the untitled row is skipped
139
+ expect(r.ingested).toBe(2); // both passed schema validation
140
+ expect(r.digg).toBe(1);
141
+ expect(r.read).toBe(1);
142
+
143
+ const events = vault.queryEvents({ limit: 100 }) || [];
144
+ const mine = events.filter((e) => e.extra && e.extra.kind === "article");
145
+ expect(mine.length).toBe(2);
146
+ expect(mine.every((e) => e.source.adapter === "social-toutiao")).toBe(true);
147
+ });
148
+
149
+ it("re-ingest dedups on the stable per-article originalId", () => {
150
+ articlesToVault(vault, dbPath, { now: 1781999999999 });
151
+ const events = vault.queryEvents({ limit: 100 }) || [];
152
+ const mine = events.filter((e) => e.extra && e.extra.kind === "article");
153
+ expect(mine.length).toBe(2); // still two — updated, not duplicated
154
+ });
155
+ });
@@ -0,0 +1,63 @@
1
+ "use strict";
2
+
3
+ import { describe, it, expect } from "vitest";
4
+
5
+ const { WeChatPcAdapter } = require("../../lib/adapters/wechat-pc");
6
+
7
+ // Build the raw envelope shape that WeChatPcAdapter.normalize() consumes for a
8
+ // group message (the sync() generator yields { kind:"message", payload, ... }).
9
+ function groupMessageRaw(payload) {
10
+ return {
11
+ adapter: "wechat-pc",
12
+ kind: "message",
13
+ originalId: "orig-1",
14
+ capturedAt: 1780000000000,
15
+ payload: { kind: "message", ...payload },
16
+ };
17
+ }
18
+
19
+ describe("wechat-pc — group topic naming", () => {
20
+ it("uses the resolved group display name for the topic when groupName is present", () => {
21
+ const adapter = new WeChatPcAdapter();
22
+ const out = adapter.normalize(groupMessageRaw({
23
+ talker: "45498354778@chatroom",
24
+ isGroup: true,
25
+ senderWxid: "wxid_friend",
26
+ groupName: "家庭群",
27
+ text: "晚饭吃什么",
28
+ createdTimeMs: 1780000000000,
29
+ }));
30
+ expect(out.topics).toHaveLength(1);
31
+ // Stable id keyed on the chatroom wxid (identity unchanged)...
32
+ expect(out.topics[0].id).toBe("topic-wechat-group-45498354778@chatroom");
33
+ // ...but the human-readable display name is used, NOT the numeric id.
34
+ expect(out.topics[0].name).toBe("家庭群");
35
+ });
36
+
37
+ it("falls back to the raw numeric id when no group name was resolved", () => {
38
+ const adapter = new WeChatPcAdapter();
39
+ const out = adapter.normalize(groupMessageRaw({
40
+ talker: "45498354778@chatroom",
41
+ isGroup: true,
42
+ senderWxid: "wxid_friend",
43
+ groupName: null,
44
+ text: "hi",
45
+ createdTimeMs: 1780000000000,
46
+ }));
47
+ expect(out.topics).toHaveLength(1);
48
+ expect(out.topics[0].name).toBe("45498354778");
49
+ });
50
+
51
+ it("blank/whitespace group name falls back to the raw id (no empty topic name)", () => {
52
+ const adapter = new WeChatPcAdapter();
53
+ const out = adapter.normalize(groupMessageRaw({
54
+ talker: "12345@chatroom",
55
+ isGroup: true,
56
+ senderWxid: "wxid_x",
57
+ groupName: " ",
58
+ text: "hi",
59
+ createdTimeMs: 1780000000000,
60
+ }));
61
+ expect(out.topics[0].name).toBe("12345");
62
+ });
63
+ });