@chainlesschain/personal-data-hub 0.4.28 → 0.4.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/__tests__/adapters/social-douyin-adb-usage-profile.test.js +229 -0
- package/__tests__/adapters/social-douyin-adb-watch-history.test.js +88 -11
- package/__tests__/adapters/social-toutiao-adb-article.test.js +155 -0
- package/__tests__/analysis-skills.test.js +75 -0
- package/__tests__/query-parser.test.js +63 -0
- package/lib/adapters/social-douyin-adb/usage-profile-reader.js +253 -0
- package/lib/adapters/social-douyin-adb/watch-history-reader.js +104 -31
- package/lib/adapters/social-toutiao-adb/article-reader.js +202 -0
- package/lib/analysis-skills/overview.js +24 -4
- package/lib/analysis-skills/spending.js +63 -2
- package/lib/analysis-skills/timeline.js +11 -6
- package/lib/query-parser.js +38 -8
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -16,13 +16,21 @@ middleware.
|
|
|
16
16
|
> Ollama / Volcengine / Anthropic / Gemini / DeepSeek), **CcKgSink**, **CcRagSink**
|
|
17
17
|
> — injected at the desktop/CLI entry so this package stays decoupled (Phase 3.5).
|
|
18
18
|
>
|
|
19
|
-
> **
|
|
19
|
+
> **89 adapters are now live** (no longer "later phases"): Email IMAP,
|
|
20
20
|
> Alipay bill, 9 AI-chat vendors, WeChat / QQ / Weibo / Bilibili / Douyin /
|
|
21
|
-
> Xiaohongshu / Toutiao / Kuaishou social, Telegram / WhatsApp messaging,
|
|
22
|
-
> Taobao / JD / Meituan / Pinduoduo
|
|
23
|
-
>
|
|
21
|
+
> Xiaohongshu / Toutiao / Kuaishou / Douban social, Telegram / WhatsApp messaging,
|
|
22
|
+
> Taobao / JD / Meituan / Pinduoduo / Eleme / Xianyu / Vipshop shopping, Amap /
|
|
23
|
+
> Baidu-map / Tencent-map / Ctrip / 12306 / Didi travel, Kugou / Ximalaya audio,
|
|
24
|
+
> Keep / Joyrun fitness, system-data (contacts / calls / sms / location),
|
|
24
25
|
> and the developer-activity set (git / shell / vscode / browser-history /
|
|
25
|
-
> local-files / win-recent).
|
|
26
|
+
> local-files / win-recent). See `lib/adapters/` for the full list.
|
|
27
|
+
>
|
|
28
|
+
> **On-device root forensics (rooted devices):** beyond cookie/sign-based
|
|
29
|
+
> collection, PDH can pull a logged-in app's local encrypted DB directly via
|
|
30
|
+
> **method B** (key-free `/proc/<pid>/mem` memory scan — engine-agnostic,
|
|
31
|
+
> anti-debug-resistant) or **method C** (frida `sqlcipher_export` online decrypt),
|
|
32
|
+
> plus a SQLite leaf-page **salvager** (`--unaligned`) that recovers plaintext
|
|
33
|
+
> pages from corrupt mem dumps. See `docs/internal/pdh-db-decryption-runbook.md`.
|
|
26
34
|
>
|
|
27
35
|
> **New in v0.4.0 (v5.0.3.99):** adapter **readiness** — split out from the
|
|
28
36
|
> loose `healthCheck` sync gate into a real ready/needs_setup/unavailable
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Douyin usage-profile reader tests (real-device-driven 2026-06-18: the user's
|
|
3
|
+
* exported 1128_feature_engineering.db has FEInternalUserActivityTable = 81
|
|
4
|
+
* rows ≈ 24 days, 175 opens, ~108h, peak 12-17h).
|
|
5
|
+
*
|
|
6
|
+
* Two layers:
|
|
7
|
+
* - pure aggregation via an injected fake Database (no native driver needed);
|
|
8
|
+
* - a real better-sqlite3 db + real LocalVault round-trip proving the
|
|
9
|
+
* hand-built event passes schema validation, is searchable, and re-ingest
|
|
10
|
+
* dedups on the stable originalId.
|
|
11
|
+
*/
|
|
12
|
+
"use strict";
|
|
13
|
+
|
|
14
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
15
|
+
|
|
16
|
+
const fs = require("node:fs");
|
|
17
|
+
const path = require("node:path");
|
|
18
|
+
const os = require("node:os");
|
|
19
|
+
|
|
20
|
+
const { LocalVault } = require("../../lib/vault");
|
|
21
|
+
const { generateKeyHex } = require("../../lib/key-providers");
|
|
22
|
+
const {
|
|
23
|
+
USAGE_TABLE,
|
|
24
|
+
readDouyinUsageProfile,
|
|
25
|
+
summarizeUsageProfile,
|
|
26
|
+
buildUsageProfileEvents,
|
|
27
|
+
usageProfileToVault,
|
|
28
|
+
_internals,
|
|
29
|
+
} = require("../../lib/adapters/social-douyin-adb/usage-profile-reader");
|
|
30
|
+
|
|
31
|
+
// ── pure aggregation with an injected fake Database ───────────────────
|
|
32
|
+
function makeFakeDb(rows, { table = USAGE_TABLE } = {}) {
|
|
33
|
+
const cols = [
|
|
34
|
+
"id",
|
|
35
|
+
"timestamp",
|
|
36
|
+
"open_app_count",
|
|
37
|
+
"total_duration",
|
|
38
|
+
...Array.from({ length: 24 }, (_v, h) => `launch_hour_${h}`),
|
|
39
|
+
];
|
|
40
|
+
return class FakeDb {
|
|
41
|
+
constructor() {}
|
|
42
|
+
prepare(sql) {
|
|
43
|
+
return {
|
|
44
|
+
get: (arg) => {
|
|
45
|
+
if (/sqlite_master/.test(sql)) {
|
|
46
|
+
return arg === table ? { name: table } : undefined;
|
|
47
|
+
}
|
|
48
|
+
return undefined;
|
|
49
|
+
},
|
|
50
|
+
all: () => {
|
|
51
|
+
if (/table_info/.test(sql)) return cols.map((name) => ({ name }));
|
|
52
|
+
if (/FROM "/.test(sql)) return rows;
|
|
53
|
+
return [];
|
|
54
|
+
},
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
close() {}
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function row({ ts, opens = 1, durMs = 0, hours = {} }) {
|
|
62
|
+
const r = { id: 1, timestamp: ts, open_app_count: opens, total_duration: durMs };
|
|
63
|
+
for (let h = 0; h < 24; h++) r[`launch_hour_${h}`] = hours[h] || 0;
|
|
64
|
+
return r;
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const DAY = 86_400_000;
|
|
68
|
+
|
|
69
|
+
describe("readDouyinUsageProfile (injected fake db)", () => {
|
|
70
|
+
it("aggregates opens, duration, hour histogram, peak hour + bucket, distinct days", () => {
|
|
71
|
+
const base = Math.floor(1781000000000 / 1000); // seconds epoch
|
|
72
|
+
const Db = makeFakeDb([
|
|
73
|
+
row({ ts: base, opens: 2, durMs: 3_600_000, hours: { 13: 3, 9: 1 } }),
|
|
74
|
+
row({ ts: base + 86_400, opens: 1, durMs: 1_800_000, hours: { 14: 2, 20: 1 } }),
|
|
75
|
+
]);
|
|
76
|
+
const p = readDouyinUsageProfile("x.db", { _databaseClass: Db });
|
|
77
|
+
expect(p.sessions).toBe(2);
|
|
78
|
+
expect(p.days).toBe(2);
|
|
79
|
+
expect(p.totalOpens).toBe(3);
|
|
80
|
+
expect(p.totalDurationMs).toBe(5_400_000);
|
|
81
|
+
expect(p.hourHistogram[13]).toBe(3);
|
|
82
|
+
expect(p.hourHistogram[14]).toBe(2);
|
|
83
|
+
expect(p.peakHour).toBe(13); // 3 launches is the single max hour
|
|
84
|
+
expect(p.peakBucket).toBe("12-17h"); // 13+14 = 5 dominates
|
|
85
|
+
expect(p.bucketTotals["12-17h"]).toBe(5);
|
|
86
|
+
expect(p.bucketTotals["18-23h"]).toBe(1);
|
|
87
|
+
expect(p.from).toBe(base * 1000);
|
|
88
|
+
expect(p.to).toBe((base + 86_400) * 1000);
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
it("returns an empty profile when the table is absent", () => {
|
|
92
|
+
const Db = makeFakeDb([], { table: "SomeOtherTable" });
|
|
93
|
+
const p = readDouyinUsageProfile("x.db", { _databaseClass: Db });
|
|
94
|
+
expect(p.sessions).toBe(0);
|
|
95
|
+
expect(p.peakBucket).toBe(null);
|
|
96
|
+
expect(p.hourHistogram).toHaveLength(24);
|
|
97
|
+
expect(p.totalDurationMs).toBe(0);
|
|
98
|
+
});
|
|
99
|
+
|
|
100
|
+
it("counts the same calendar day once even across multiple sessions", () => {
|
|
101
|
+
const base = Math.floor(1781000000000 / 1000);
|
|
102
|
+
const Db = makeFakeDb([
|
|
103
|
+
row({ ts: base, hours: { 10: 1 } }),
|
|
104
|
+
row({ ts: base + 3600, hours: { 11: 1 } }), // same UTC day
|
|
105
|
+
]);
|
|
106
|
+
const p = readDouyinUsageProfile("x.db", { _databaseClass: Db });
|
|
107
|
+
expect(p.days).toBe(1);
|
|
108
|
+
expect(p.sessions).toBe(2);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it("toEpochMs treats >1e12 as ms else seconds; rejects junk", () => {
|
|
112
|
+
expect(_internals.toEpochMs(1781000000)).toBe(1781000000000);
|
|
113
|
+
expect(_internals.toEpochMs(1781000000000)).toBe(1781000000000);
|
|
114
|
+
expect(_internals.toEpochMs(0)).toBe(null);
|
|
115
|
+
expect(_internals.toEpochMs("nope")).toBe(null);
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
describe("summarizeUsageProfile + buildUsageProfileEvents", () => {
|
|
120
|
+
it("summary is empty-safe and renders hours + peak", () => {
|
|
121
|
+
expect(summarizeUsageProfile(null)).toMatch(/无数据/);
|
|
122
|
+
expect(summarizeUsageProfile({ sessions: 0 })).toMatch(/无数据/);
|
|
123
|
+
const txt = summarizeUsageProfile({
|
|
124
|
+
sessions: 81,
|
|
125
|
+
days: 24,
|
|
126
|
+
totalOpens: 175,
|
|
127
|
+
totalDurationMs: 388_440_000, // 107.9h
|
|
128
|
+
peakBucket: "12-17h",
|
|
129
|
+
});
|
|
130
|
+
expect(txt).toMatch(/24 天/);
|
|
131
|
+
expect(txt).toMatch(/175 次启动/);
|
|
132
|
+
expect(txt).toMatch(/107\.9 小时/);
|
|
133
|
+
expect(txt).toMatch(/12-17h/);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it("builds no events for an empty profile", () => {
|
|
137
|
+
expect(buildUsageProfileEvents({ sessions: 0 }).events).toHaveLength(0);
|
|
138
|
+
expect(buildUsageProfileEvents(null).events).toHaveLength(0);
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
it("builds one app-usage-profile event with stable originalId + histogram in extra", () => {
|
|
142
|
+
const profile = {
|
|
143
|
+
sessions: 81, days: 24, from: 1, to: 1781800000000,
|
|
144
|
+
totalOpens: 175, totalDurationMs: 388_440_000,
|
|
145
|
+
hourHistogram: new Array(24).fill(0), peakHour: 13, peakBucket: "12-17h",
|
|
146
|
+
bucketTotals: { "0-5h": 1, "6-11h": 81, "12-17h": 107, "18-23h": 75 },
|
|
147
|
+
};
|
|
148
|
+
const { events } = buildUsageProfileEvents(profile, { now: 1781900000000 });
|
|
149
|
+
expect(events).toHaveLength(1);
|
|
150
|
+
const e = events[0];
|
|
151
|
+
expect(e.subtype).toBe("other");
|
|
152
|
+
expect(e.source.adapter).toBe("social-douyin");
|
|
153
|
+
expect(e.source.originalId).toBe("social-douyin:usage-profile");
|
|
154
|
+
expect(e.source.capturedBy).toBe("sqlite");
|
|
155
|
+
expect(e.occurredAt).toBe(1781800000000); // profile.to
|
|
156
|
+
expect(e.extra.kind).toBe("app-usage-profile");
|
|
157
|
+
expect(e.extra.peakBucket).toBe("12-17h");
|
|
158
|
+
expect(e.extra.bucketTotals["12-17h"]).toBe(107);
|
|
159
|
+
expect(Array.isArray(e.extra.hourHistogram)).toBe(true);
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
// ── real db + real vault round-trip (schema validation + dedup) ───────
|
|
164
|
+
describe("usageProfileToVault — real sqlite + real vault", () => {
|
|
165
|
+
let dir, dbPath, vdir, vault;
|
|
166
|
+
|
|
167
|
+
beforeAll(() => {
|
|
168
|
+
const Database = require("better-sqlite3-multiple-ciphers");
|
|
169
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-usage-"));
|
|
170
|
+
dbPath = path.join(dir, "1128_feature_engineering.db");
|
|
171
|
+
const db = new Database(dbPath);
|
|
172
|
+
const hourCols = Array.from({ length: 24 }, (_v, h) => `launch_hour_${h} INTEGER`).join(", ");
|
|
173
|
+
db.exec(
|
|
174
|
+
`CREATE TABLE "${USAGE_TABLE}" (id INTEGER, timestamp INTEGER, ` +
|
|
175
|
+
`start_timestamp_ms INTEGER, end_timestamp_ms INTEGER, ` +
|
|
176
|
+
`open_app_count INTEGER, ${hourCols}, total_duration INTEGER)`,
|
|
177
|
+
);
|
|
178
|
+
const hzero = Array.from({ length: 24 }, () => 0);
|
|
179
|
+
const insCols = ["id", "timestamp", "start_timestamp_ms", "end_timestamp_ms", "open_app_count",
|
|
180
|
+
...Array.from({ length: 24 }, (_v, h) => `launch_hour_${h}`), "total_duration"];
|
|
181
|
+
const ph = insCols.map(() => "?").join(",");
|
|
182
|
+
const ins = db.prepare(`INSERT INTO "${USAGE_TABLE}" (${insCols.join(",")}) VALUES (${ph})`);
|
|
183
|
+
const baseSec = Math.floor(1781000000000 / 1000);
|
|
184
|
+
// two sessions on two different days; 13h is the peak hour
|
|
185
|
+
const h1 = [...hzero]; h1[13] = 3; h1[9] = 1;
|
|
186
|
+
const h2 = [...hzero]; h2[14] = 2;
|
|
187
|
+
ins.run(1, baseSec, baseSec * 1000, baseSec * 1000 + 1000, 2, ...h1, 3_600_000);
|
|
188
|
+
ins.run(2, baseSec + 86_400, 0, 0, 1, ...h2, 1_800_000);
|
|
189
|
+
db.close();
|
|
190
|
+
|
|
191
|
+
vdir = fs.mkdtempSync(path.join(os.tmpdir(), "douyin-usage-vault-"));
|
|
192
|
+
vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
|
|
193
|
+
vault.open();
|
|
194
|
+
});
|
|
195
|
+
|
|
196
|
+
afterAll(() => {
|
|
197
|
+
try { vault.close(); } catch (_e) { /* best-effort */ }
|
|
198
|
+
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
|
|
199
|
+
try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
|
|
200
|
+
});
|
|
201
|
+
|
|
202
|
+
it("reads the real table, aggregates, and ingests one valid event", () => {
|
|
203
|
+
const profile = readDouyinUsageProfile(dbPath, {});
|
|
204
|
+
expect(profile.sessions).toBe(2);
|
|
205
|
+
expect(profile.days).toBe(2);
|
|
206
|
+
expect(profile.peakHour).toBe(13);
|
|
207
|
+
expect(profile.peakBucket).toBe("12-17h");
|
|
208
|
+
|
|
209
|
+
const r = usageProfileToVault(vault, dbPath, { now: 1781900000000 });
|
|
210
|
+
expect(r.ingested).toBe(1); // proves the hand-built event passed schema validation
|
|
211
|
+
expect(r.sessions).toBe(2);
|
|
212
|
+
|
|
213
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
214
|
+
const mine = events.filter(
|
|
215
|
+
(e) => e.extra && e.extra.kind === "app-usage-profile",
|
|
216
|
+
);
|
|
217
|
+
expect(mine.length).toBe(1);
|
|
218
|
+
expect(mine[0].source.adapter).toBe("social-douyin");
|
|
219
|
+
});
|
|
220
|
+
|
|
221
|
+
it("re-ingest dedups on the stable originalId (no duplicate baseline)", () => {
|
|
222
|
+
usageProfileToVault(vault, dbPath, { now: 1781999999999 });
|
|
223
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
224
|
+
const mine = events.filter(
|
|
225
|
+
(e) => e.extra && e.extra.kind === "app-usage-profile",
|
|
226
|
+
);
|
|
227
|
+
expect(mine.length).toBe(1); // still one — updated, not duplicated
|
|
228
|
+
});
|
|
229
|
+
});
|
|
@@ -7,10 +7,11 @@
|
|
|
7
7
|
*/
|
|
8
8
|
"use strict";
|
|
9
9
|
|
|
10
|
-
import { describe, it, expect, vi } from "vitest";
|
|
10
|
+
import { describe, it, expect, vi, beforeAll, afterAll } from "vitest";
|
|
11
11
|
|
|
12
12
|
const {
|
|
13
13
|
createDouyinWatchExtension,
|
|
14
|
+
watchHistoryToVault,
|
|
14
15
|
VIDEO_RECORD_DB_REMOTE_PATH,
|
|
15
16
|
_internals,
|
|
16
17
|
} = require("../../lib/adapters/social-douyin-adb/watch-history-reader");
|
|
@@ -55,9 +56,9 @@ function makeFakeDb(tablesToRows) {
|
|
|
55
56
|
}
|
|
56
57
|
|
|
57
58
|
describe("readDouyinWatchHistory", () => {
|
|
58
|
-
it("
|
|
59
|
+
it("merges record_0 + record_<uid>, attributes uid to the largest uid table, parses rows → ms", () => {
|
|
59
60
|
const Db = makeFakeDb({
|
|
60
|
-
record_0: [{ aid: "
|
|
61
|
+
record_0: [{ aid: "xrec0", view_time_timestamp: 1, enter_from: "a" }],
|
|
61
62
|
record_92585448288: [
|
|
62
63
|
{ aid: "7480000000000000001", view_time_timestamp: 1717800000, enter_from: "homepage_hot" },
|
|
63
64
|
{ aid: "7480000000000000002", view_time_timestamp: 1717800600, enter_from: "homepage_follow" },
|
|
@@ -65,19 +66,40 @@ describe("readDouyinWatchHistory", () => {
|
|
|
65
66
|
});
|
|
66
67
|
const r = _internals.readDouyinWatchHistory("x.db", { _databaseClass: Db });
|
|
67
68
|
expect(r.uid).toBe("92585448288");
|
|
68
|
-
|
|
69
|
+
// record_0 is no longer dropped: 1 (record_0) + 2 (uid) = 3 merged records.
|
|
70
|
+
expect(r.records).toHaveLength(3);
|
|
71
|
+
// Most-recent first.
|
|
69
72
|
expect(r.records[0]).toEqual({
|
|
70
|
-
awemeId: "
|
|
71
|
-
capturedAt:
|
|
72
|
-
enterFrom: "
|
|
73
|
+
awemeId: "7480000000000000002",
|
|
74
|
+
capturedAt: 1717800600 * 1000,
|
|
75
|
+
enterFrom: "homepage_follow",
|
|
73
76
|
});
|
|
77
|
+
const ids = r.records.map((x) => x.awemeId);
|
|
78
|
+
expect(ids).toContain("xrec0"); // the formerly-lost record_0 row
|
|
79
|
+
const rec0 = r.records.find((x) => x.awemeId === "xrec0");
|
|
80
|
+
expect(rec0).toEqual({ awemeId: "xrec0", capturedAt: 1000, enterFrom: "a" });
|
|
74
81
|
});
|
|
75
82
|
|
|
76
|
-
it("
|
|
77
|
-
const Db = makeFakeDb({
|
|
83
|
+
it("recovers history from record_0 alone (uid:null) — the bulk-in-record_0 device case", () => {
|
|
84
|
+
const Db = makeFakeDb({
|
|
85
|
+
record_0: [
|
|
86
|
+
{ aid: "a1", view_time_timestamp: 1717800000, enter_from: "homepage_hot" },
|
|
87
|
+
{ aid: "a2", view_time_timestamp: 1717800600, enter_from: "homepage_hot" },
|
|
88
|
+
],
|
|
89
|
+
});
|
|
78
90
|
const r = _internals.readDouyinWatchHistory("x.db", { _databaseClass: Db });
|
|
79
|
-
expect(r.uid).toBe(null);
|
|
80
|
-
expect(r.records).
|
|
91
|
+
expect(r.uid).toBe(null); // no logged-in account table → no attribution
|
|
92
|
+
expect(r.records).toHaveLength(2); // but the watch history is still recovered
|
|
93
|
+
expect(r.records.map((x) => x.awemeId).sort()).toEqual(["a1", "a2"]);
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("dedups the same (aid, timestamp) appearing in two record_* tables", () => {
|
|
97
|
+
const Db = makeFakeDb({
|
|
98
|
+
record_0: [{ aid: "dup", view_time_timestamp: 1717800000, enter_from: "homepage_hot" }],
|
|
99
|
+
record_111: [{ aid: "dup", view_time_timestamp: 1717800000, enter_from: "homepage_hot" }],
|
|
100
|
+
});
|
|
101
|
+
const r = _internals.readDouyinWatchHistory("x.db", { _databaseClass: Db });
|
|
102
|
+
expect(r.records).toHaveLength(1);
|
|
81
103
|
});
|
|
82
104
|
|
|
83
105
|
it("toEpochMs treats >1e12 as ms, else seconds; rejects junk", () => {
|
|
@@ -190,3 +212,58 @@ describe("createDouyinWatchExtension contract", () => {
|
|
|
190
212
|
await expect(createDouyinWatchExtension()({}, {})).rejects.toThrow(/ctx must provide/);
|
|
191
213
|
});
|
|
192
214
|
});
|
|
215
|
+
|
|
216
|
+
// ── watchHistoryToVault: local-db → canonical BROWSE events → vault ────
|
|
217
|
+
describe("watchHistoryToVault — real sqlite + real vault", () => {
|
|
218
|
+
const fs = require("node:fs");
|
|
219
|
+
const path = require("node:path");
|
|
220
|
+
const os = require("node:os");
|
|
221
|
+
const { LocalVault } = require("../../lib/vault");
|
|
222
|
+
const { generateKeyHex } = require("../../lib/key-providers");
|
|
223
|
+
let dir, dbPath, vdir, vault;
|
|
224
|
+
|
|
225
|
+
beforeAll(() => {
|
|
226
|
+
const Database = require("better-sqlite3-multiple-ciphers");
|
|
227
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), "dy-watch-"));
|
|
228
|
+
dbPath = path.join(dir, "video_record.db");
|
|
229
|
+
const db = new Database(dbPath);
|
|
230
|
+
db.exec("CREATE TABLE record_0 (aid TEXT, view_time_timestamp INTEGER, enter_from TEXT)");
|
|
231
|
+
db.exec("CREATE TABLE record_92585448288 (aid TEXT, view_time_timestamp INTEGER, enter_from TEXT)");
|
|
232
|
+
db.prepare("INSERT INTO record_0 VALUES (?,?,?)").run("7644480728574545765", 1781706182375, "homepage_hot");
|
|
233
|
+
db.prepare("INSERT INTO record_92585448288 VALUES (?,?,?)").run("7480000000000000002", 1717800600000, "others_homepage");
|
|
234
|
+
db.close();
|
|
235
|
+
|
|
236
|
+
vdir = fs.mkdtempSync(path.join(os.tmpdir(), "dy-watch-vault-"));
|
|
237
|
+
vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
|
|
238
|
+
vault.open();
|
|
239
|
+
});
|
|
240
|
+
|
|
241
|
+
afterAll(() => {
|
|
242
|
+
try { vault.close(); } catch (_e) { /* best-effort */ }
|
|
243
|
+
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
|
|
244
|
+
try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
|
|
245
|
+
});
|
|
246
|
+
|
|
247
|
+
it("merges record_0 + uid table and ingests canonical BROWSE events", () => {
|
|
248
|
+
const r = watchHistoryToVault(vault, dbPath, { now: 1781900000000 });
|
|
249
|
+
expect(r.records).toBe(2); // record_0 row no longer dropped
|
|
250
|
+
expect(r.ingested).toBe(2);
|
|
251
|
+
expect(r.uid).toBe("92585448288");
|
|
252
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
253
|
+
const browse = events.filter(
|
|
254
|
+
(e) => e.subtype === "browse" && e.source.adapter === "social-douyin",
|
|
255
|
+
);
|
|
256
|
+
expect(browse.length).toBe(2);
|
|
257
|
+
expect(browse.some((e) => e.extra.awemeId === "7644480728574545765")).toBe(true);
|
|
258
|
+
expect(browse.some((e) => e.extra.enterFrom === "homepage_hot")).toBe(true);
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it("re-ingest dedups on the per-record originalId", () => {
|
|
262
|
+
watchHistoryToVault(vault, dbPath, { now: 1781999999999 });
|
|
263
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
264
|
+
const browse = events.filter(
|
|
265
|
+
(e) => e.subtype === "browse" && e.source.adapter === "social-douyin",
|
|
266
|
+
);
|
|
267
|
+
expect(browse.length).toBe(2); // still two
|
|
268
|
+
});
|
|
269
|
+
});
|
|
@@ -0,0 +1,155 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Toutiao article reader tests (real-device-driven 2026-06-18: the user's
|
|
3
|
+
* exported news_article.db `article` table = 48 feed-cache rows; title lives in
|
|
4
|
+
* the share_info JSON blob, not a column).
|
|
5
|
+
*
|
|
6
|
+
* Two layers: pure parsing via injected fake Database, + a real better-sqlite3
|
|
7
|
+
* db + real LocalVault round-trip proving the hand-built BROWSE events pass
|
|
8
|
+
* schema validation, are searchable, and re-ingest dedups on the stable
|
|
9
|
+
* originalId.
|
|
10
|
+
*/
|
|
11
|
+
"use strict";
|
|
12
|
+
|
|
13
|
+
import { describe, it, expect, beforeAll, afterAll } from "vitest";
|
|
14
|
+
|
|
15
|
+
const fs = require("node:fs");
|
|
16
|
+
const path = require("node:path");
|
|
17
|
+
const os = require("node:os");
|
|
18
|
+
|
|
19
|
+
const { LocalVault } = require("../../lib/vault");
|
|
20
|
+
const { generateKeyHex } = require("../../lib/key-providers");
|
|
21
|
+
const {
|
|
22
|
+
ARTICLE_TABLE,
|
|
23
|
+
readToutiaoArticles,
|
|
24
|
+
buildArticleEvents,
|
|
25
|
+
articlesToVault,
|
|
26
|
+
_internals,
|
|
27
|
+
} = require("../../lib/adapters/social-toutiao-adb/article-reader");
|
|
28
|
+
|
|
29
|
+
function makeFakeDb(rows, { table = ARTICLE_TABLE } = {}) {
|
|
30
|
+
const cols = [
|
|
31
|
+
"group_id", "item_id", "share_info", "ext_json", "share_url",
|
|
32
|
+
"behot_time", "read_timestamp", "is_user_digg", "is_user_repin",
|
|
33
|
+
];
|
|
34
|
+
return class FakeDb {
|
|
35
|
+
constructor() {}
|
|
36
|
+
prepare(sql) {
|
|
37
|
+
return {
|
|
38
|
+
get: (arg) => (/sqlite_master/.test(sql) ? (arg === table ? { name: table } : undefined) : undefined),
|
|
39
|
+
all: () => {
|
|
40
|
+
if (/table_info/.test(sql)) return cols.map((name) => ({ name }));
|
|
41
|
+
if (/FROM "/.test(sql)) return rows;
|
|
42
|
+
return [];
|
|
43
|
+
},
|
|
44
|
+
};
|
|
45
|
+
}
|
|
46
|
+
close() {}
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
describe("readToutiaoArticles (injected fake db)", () => {
|
|
51
|
+
it("parses title from share_info, strips the brand suffix, drops url tracking query", () => {
|
|
52
|
+
const Db = makeFakeDb([
|
|
53
|
+
{
|
|
54
|
+
group_id: 100, behot_time: 1781700000, read_timestamp: 0, is_user_digg: 1, is_user_repin: 0,
|
|
55
|
+
share_info: JSON.stringify({ title: "5月汽车出口延续快速增长态势 - 今日头条", share_url: "https://m.toutiao.com/g/100/?app=x&category_new=headline" }),
|
|
56
|
+
share_url: "https://m.toutiao.com/g/100/?category_new=headline",
|
|
57
|
+
},
|
|
58
|
+
]);
|
|
59
|
+
const { articles } = readToutiaoArticles("x.db", { _databaseClass: Db });
|
|
60
|
+
expect(articles).toHaveLength(1);
|
|
61
|
+
expect(articles[0].title).toBe("5月汽车出口延续快速增长态势"); // suffix stripped
|
|
62
|
+
expect(articles[0].url).toBe("https://m.toutiao.com/g/100/"); // query dropped
|
|
63
|
+
expect(articles[0].category).toBe("headline");
|
|
64
|
+
expect(articles[0].digg).toBe(true);
|
|
65
|
+
expect(articles[0].behotTime).toBe(1781700000 * 1000);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("falls back to ext_json.title when share_info has none, and skips untitled rows", () => {
|
|
69
|
+
const Db = makeFakeDb([
|
|
70
|
+
{ group_id: 1, ext_json: JSON.stringify({ title: "来自 ext_json 的标题" }), share_info: "{}" },
|
|
71
|
+
{ group_id: 2, share_info: "{}", ext_json: "{}" }, // untitled → dropped
|
|
72
|
+
]);
|
|
73
|
+
const { articles } = readToutiaoArticles("x.db", { _databaseClass: Db });
|
|
74
|
+
expect(articles).toHaveLength(1);
|
|
75
|
+
expect(articles[0].title).toBe("来自 ext_json 的标题");
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
it("returns no articles when the table is absent", () => {
|
|
79
|
+
const Db = makeFakeDb([], { table: "other" });
|
|
80
|
+
expect(readToutiaoArticles("x.db", { _databaseClass: Db }).articles).toEqual([]);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("buildArticleEvents → BROWSE events, social-toutiao source, stable originalId, read flag", () => {
|
|
84
|
+
const { events } = buildArticleEvents(
|
|
85
|
+
[{ groupId: "55", title: "标题", url: "u", category: "headline", behotTime: 2, readTimestamp: 1781700000000, digg: true, repin: false }],
|
|
86
|
+
{ now: 1781800000000 },
|
|
87
|
+
);
|
|
88
|
+
expect(events).toHaveLength(1);
|
|
89
|
+
const e = events[0];
|
|
90
|
+
expect(e.subtype).toBe("browse");
|
|
91
|
+
expect(e.source.adapter).toBe("social-toutiao");
|
|
92
|
+
expect(e.source.originalId).toBe("social-toutiao:article:55");
|
|
93
|
+
expect(e.occurredAt).toBe(1781700000000); // read_timestamp wins over behot
|
|
94
|
+
expect(e.extra.kind).toBe("article");
|
|
95
|
+
expect(e.extra.read).toBe(true);
|
|
96
|
+
expect(e.extra.digg).toBe(true);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it("extractCategory / extractUrl helpers", () => {
|
|
100
|
+
expect(_internals.extractCategory({ share_url: "x?a=1&category_new=my_tabs_digg&b=2" })).toBe("my_tabs_digg");
|
|
101
|
+
expect(_internals.extractUrl({ share_info: JSON.stringify({ share_url: "https://h/g/1/?t=1" }) })).toBe("https://h/g/1/");
|
|
102
|
+
});
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
describe("articlesToVault — real sqlite + real vault", () => {
|
|
106
|
+
let dir, dbPath, vdir, vault;
|
|
107
|
+
|
|
108
|
+
beforeAll(() => {
|
|
109
|
+
const Database = require("better-sqlite3-multiple-ciphers");
|
|
110
|
+
dir = fs.mkdtempSync(path.join(os.tmpdir(), "tt-article-"));
|
|
111
|
+
dbPath = path.join(dir, "news_article.db");
|
|
112
|
+
const db = new Database(dbPath);
|
|
113
|
+
db.exec(
|
|
114
|
+
"CREATE TABLE article (group_id INTEGER, item_id INTEGER, share_info TEXT, ext_json TEXT, " +
|
|
115
|
+
"share_url TEXT, behot_time INTEGER, read_timestamp INTEGER, is_user_digg INTEGER, is_user_repin INTEGER)",
|
|
116
|
+
);
|
|
117
|
+
const ins = db.prepare(
|
|
118
|
+
"INSERT INTO article (group_id, share_info, share_url, behot_time, read_timestamp, is_user_digg, is_user_repin) VALUES (?,?,?,?,?,?,?)",
|
|
119
|
+
);
|
|
120
|
+
ins.run(101, JSON.stringify({ title: "新华视点丨三峡水运新通道 - 今日头条", share_url: "https://m.toutiao.com/g/101/?x=1&category_new=headline" }), "https://m.toutiao.com/g/101/?category_new=headline", 1781700000, 0, 0, 0);
|
|
121
|
+
ins.run(102, JSON.stringify({ title: "5月汽车出口延续快速增长态势 - 今日头条", share_url: "https://m.toutiao.com/g/102/" }), "https://m.toutiao.com/g/102/?category_new=my_tabs_digg", 1781700100, 1781700200, 1, 0);
|
|
122
|
+
ins.run(103, "{}", "https://m.toutiao.com/g/103/", 1781700300, 0, 0, 0); // untitled → not ingested
|
|
123
|
+
db.close();
|
|
124
|
+
|
|
125
|
+
vdir = fs.mkdtempSync(path.join(os.tmpdir(), "tt-article-vault-"));
|
|
126
|
+
vault = new LocalVault({ path: path.join(vdir, "v.db"), key: generateKeyHex() });
|
|
127
|
+
vault.open();
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
afterAll(() => {
|
|
131
|
+
try { vault.close(); } catch (_e) { /* best-effort */ }
|
|
132
|
+
try { fs.rmSync(dir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
|
|
133
|
+
try { fs.rmSync(vdir, { recursive: true, force: true }); } catch (_e) { /* best-effort */ }
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it("reads titled articles and ingests valid BROWSE events", () => {
|
|
137
|
+
const r = articlesToVault(vault, dbPath, { now: 1781900000000 });
|
|
138
|
+
expect(r.articles).toBe(2); // the untitled row is skipped
|
|
139
|
+
expect(r.ingested).toBe(2); // both passed schema validation
|
|
140
|
+
expect(r.digg).toBe(1);
|
|
141
|
+
expect(r.read).toBe(1);
|
|
142
|
+
|
|
143
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
144
|
+
const mine = events.filter((e) => e.extra && e.extra.kind === "article");
|
|
145
|
+
expect(mine.length).toBe(2);
|
|
146
|
+
expect(mine.every((e) => e.source.adapter === "social-toutiao")).toBe(true);
|
|
147
|
+
});
|
|
148
|
+
|
|
149
|
+
it("re-ingest dedups on the stable per-article originalId", () => {
|
|
150
|
+
articlesToVault(vault, dbPath, { now: 1781999999999 });
|
|
151
|
+
const events = vault.queryEvents({ limit: 100 }) || [];
|
|
152
|
+
const mine = events.filter((e) => e.extra && e.extra.kind === "article");
|
|
153
|
+
expect(mine.length).toBe(2); // still two — updated, not duplicated
|
|
154
|
+
});
|
|
155
|
+
});
|
|
@@ -151,6 +151,43 @@ describe("SpendingSkill", () => {
|
|
|
151
151
|
expect(r.summary.currency).toBe("CNY");
|
|
152
152
|
});
|
|
153
153
|
|
|
154
|
+
it("headline totals come from uncapped sumEventAmount, not the 5000-capped row fetch", async () => {
|
|
155
|
+
// queryEvents caps at 5000 rows per subtype; a >5000-payment user would
|
|
156
|
+
// have totalSpend silently undercounted. Fake a vault where the row fetch
|
|
157
|
+
// returns only a sample but sumEventAmount reports the true sum.
|
|
158
|
+
const fakeVault = {
|
|
159
|
+
queryEvents: ({ subtype }) =>
|
|
160
|
+
subtype === "payment"
|
|
161
|
+
? [
|
|
162
|
+
{ id: "p1", subtype: "payment", occurredAt: ts(2026, 5, 1), content: { amount: { value: 10, direction: "out", currency: "CNY" } } },
|
|
163
|
+
{ id: "p2", subtype: "payment", occurredAt: ts(2026, 5, 2), content: { amount: { value: 20, direction: "out", currency: "CNY" } } },
|
|
164
|
+
]
|
|
165
|
+
: [],
|
|
166
|
+
sumEventAmount: ({ subtype }) =>
|
|
167
|
+
subtype === "payment"
|
|
168
|
+
? { total: 88000, currency: "CNY", count: 5200, byDirection: { out: 88000, in: 1234 }, byCurrency: {} }
|
|
169
|
+
: { total: 0, currency: "CNY", count: 0, byDirection: { out: 0, in: 0 }, byCurrency: {} },
|
|
170
|
+
};
|
|
171
|
+
const r = await new SpendingSkill({ vault: fakeVault }).run({ commentary: false });
|
|
172
|
+
expect(r.summary.totalSpend).toBe(88000); // true sum, not the 30 from 2 sampled rows
|
|
173
|
+
expect(r.summary.totalIncome).toBe(1234);
|
|
174
|
+
expect(r.summary.eventCount).toBe(5200);
|
|
175
|
+
expect(r.summary.netFlow).toBe(Math.round((1234 - 88000) * 100) / 100);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it("with a merchant/person/direction filter it falls back to the row sample (SQL can't express it)", async () => {
|
|
179
|
+
const fakeVault = {
|
|
180
|
+
queryEvents: ({ subtype }) =>
|
|
181
|
+
subtype === "payment"
|
|
182
|
+
? [{ id: "p1", subtype: "payment", occurredAt: ts(2026, 5, 1), content: { amount: { value: 30, direction: "out", currency: "CNY" } }, extra: { counterparty: "美团" } }]
|
|
183
|
+
: [],
|
|
184
|
+
// would be used by the accurate path — must NOT be when a filter is active
|
|
185
|
+
sumEventAmount: () => { throw new Error("sumEventAmount must not be called when a row-only filter is set"); },
|
|
186
|
+
};
|
|
187
|
+
const r = await new SpendingSkill({ vault: fakeVault }).run({ commentary: false, direction: "out" });
|
|
188
|
+
expect(r.summary.totalSpend).toBe(30); // from the row sample, not SQL
|
|
189
|
+
});
|
|
190
|
+
|
|
154
191
|
it("breakdown by merchant ranks top spenders", async () => {
|
|
155
192
|
setupAlipayPayments();
|
|
156
193
|
const skill = new SpendingSkill({ vault: rig.vault });
|
|
@@ -580,12 +617,22 @@ describe("TimelineSkill", () => {
|
|
|
580
617
|
ingestedAt: Date.now(), source: defaultSource("system-data-android"),
|
|
581
618
|
extra: { kind: "contact-snapshot" },
|
|
582
619
|
});
|
|
620
|
+
// Aggregate-baseline event (douyin app-usage-profile) — a single rolling
|
|
621
|
+
// summary, not a discrete activity, so it must be filtered from the timeline.
|
|
622
|
+
rig.vault.putEvent({
|
|
623
|
+
id: "event-douyin-usage", type: "event", subtype: "other",
|
|
624
|
+
occurredAt: ts(2026, 6, 1), actor: "person-self",
|
|
625
|
+
content: { title: "抖音使用画像:24天/108h" },
|
|
626
|
+
ingestedAt: Date.now(), source: defaultSource("social-douyin"),
|
|
627
|
+
extra: { kind: "app-usage-profile" },
|
|
628
|
+
});
|
|
583
629
|
const skill = new TimelineSkill({ vault: rig.vault });
|
|
584
630
|
const r = await skill.run({ since: ts(2026, 4, 1) });
|
|
585
631
|
const ids = r.entries.map((e) => e.id);
|
|
586
632
|
expect(ids).toContain("act-1");
|
|
587
633
|
expect(ids).not.toContain("event-android-app-com.x");
|
|
588
634
|
expect(ids).not.toContain("event-android-contact-y");
|
|
635
|
+
expect(ids).not.toContain("event-douyin-usage");
|
|
589
636
|
expect(r.summary.totalEvents).toBe(1);
|
|
590
637
|
});
|
|
591
638
|
});
|
|
@@ -645,6 +692,34 @@ describe("OverviewSkill — cross-app unified snapshot", () => {
|
|
|
645
692
|
expect(r.summary.appsActive).toBe(4); // alipay-bill, shopping-taobao, wechat, social-douyin
|
|
646
693
|
});
|
|
647
694
|
|
|
695
|
+
it("byApp/byType/total use uncapped facetCounts, not the row-capped fetch", async () => {
|
|
696
|
+
// queryEvents hard-caps at 10k rows; on a big vault one dominant app crowds
|
|
697
|
+
// out the rest, so deriving byApp from the row fetch undercounts (real bug:
|
|
698
|
+
// social-douyin showed 10 instead of 232). Fake a vault where the capped
|
|
699
|
+
// row fetch and the SQL GROUP BY disagree, and assert overview trusts SQL.
|
|
700
|
+
const fakeVault = {
|
|
701
|
+
facetCounts: () => ({
|
|
702
|
+
byAdapter: { "social-douyin": 232, "wechat-pc": 100000 },
|
|
703
|
+
bySubtype: { browse: 232, message: 100000 },
|
|
704
|
+
byCategory: {},
|
|
705
|
+
total: 100232,
|
|
706
|
+
mode: "like",
|
|
707
|
+
shortQuery: false,
|
|
708
|
+
}),
|
|
709
|
+
// simulates the cap: only wechat rows survived the recent-10k window
|
|
710
|
+
queryEvents: () => [
|
|
711
|
+
{ id: "w1", subtype: "message", occurredAt: ts(2026, 6, 1), actor: "person-self", source: { adapter: "wechat-pc" }, content: {} },
|
|
712
|
+
],
|
|
713
|
+
};
|
|
714
|
+
const r = await new OverviewSkill({ vault: fakeVault }).run({ commentary: false });
|
|
715
|
+
const dy = r.byApp.find((a) => a.app === "social-douyin");
|
|
716
|
+
expect(dy && dy.count).toBe(232); // would be absent/0 if derived from the row fetch
|
|
717
|
+
expect(r.byApp[0].app).toBe("wechat-pc"); // 100000 sorts first
|
|
718
|
+
expect(r.summary.totalEvents).toBe(100232);
|
|
719
|
+
expect(r.summary.appsActive).toBe(2);
|
|
720
|
+
expect(r.byType.find((t) => t.type === "browse").count).toBe(232);
|
|
721
|
+
});
|
|
722
|
+
|
|
648
723
|
it("counts 4 distinct apps + sums cross-app spend + top contact merged", async () => {
|
|
649
724
|
const { vault } = rig;
|
|
650
725
|
makePerson(vault, "p-friend", ["小明"], {}, { adapter: "wechat" });
|