@chainlesschain/personal-data-hub 0.4.27 → 0.4.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -151,6 +151,43 @@ describe("SpendingSkill", () => {
151
151
  expect(r.summary.currency).toBe("CNY");
152
152
  });
153
153
 
154
+ it("headline totals come from uncapped sumEventAmount, not the 5000-capped row fetch", async () => {
155
+ // queryEvents caps at 5000 rows per subtype; a >5000-payment user would
156
+ // have totalSpend silently undercounted. Fake a vault where the row fetch
157
+ // returns only a sample but sumEventAmount reports the true sum.
158
+ const fakeVault = {
159
+ queryEvents: ({ subtype }) =>
160
+ subtype === "payment"
161
+ ? [
162
+ { id: "p1", subtype: "payment", occurredAt: ts(2026, 5, 1), content: { amount: { value: 10, direction: "out", currency: "CNY" } } },
163
+ { id: "p2", subtype: "payment", occurredAt: ts(2026, 5, 2), content: { amount: { value: 20, direction: "out", currency: "CNY" } } },
164
+ ]
165
+ : [],
166
+ sumEventAmount: ({ subtype }) =>
167
+ subtype === "payment"
168
+ ? { total: 88000, currency: "CNY", count: 5200, byDirection: { out: 88000, in: 1234 }, byCurrency: {} }
169
+ : { total: 0, currency: "CNY", count: 0, byDirection: { out: 0, in: 0 }, byCurrency: {} },
170
+ };
171
+ const r = await new SpendingSkill({ vault: fakeVault }).run({ commentary: false });
172
+ expect(r.summary.totalSpend).toBe(88000); // true sum, not the 30 from 2 sampled rows
173
+ expect(r.summary.totalIncome).toBe(1234);
174
+ expect(r.summary.eventCount).toBe(5200);
175
+ expect(r.summary.netFlow).toBe(Math.round((1234 - 88000) * 100) / 100);
176
+ });
177
+
178
+ it("with a merchant/person/direction filter it falls back to the row sample (SQL can't express it)", async () => {
179
+ const fakeVault = {
180
+ queryEvents: ({ subtype }) =>
181
+ subtype === "payment"
182
+ ? [{ id: "p1", subtype: "payment", occurredAt: ts(2026, 5, 1), content: { amount: { value: 30, direction: "out", currency: "CNY" } }, extra: { counterparty: "美团" } }]
183
+ : [],
184
+ // would be used by the accurate path — must NOT be when a filter is active
185
+ sumEventAmount: () => { throw new Error("sumEventAmount must not be called when a row-only filter is set"); },
186
+ };
187
+ const r = await new SpendingSkill({ vault: fakeVault }).run({ commentary: false, direction: "out" });
188
+ expect(r.summary.totalSpend).toBe(30); // from the row sample, not SQL
189
+ });
190
+
154
191
  it("breakdown by merchant ranks top spenders", async () => {
155
192
  setupAlipayPayments();
156
193
  const skill = new SpendingSkill({ vault: rig.vault });
@@ -498,6 +535,32 @@ describe("InterestsSkill", () => {
498
535
  expect(r.topTopics[0].name).toBe("Travel");
499
536
  expect(r.llmInterests).toBeNull();
500
537
  });
538
+
539
+ it("drops unresolved numeric group-id topics (e.g. WeChat chatroom ids) from the profile", async () => {
540
+ // Real interest topic
541
+ rig.vault.putTopic({
542
+ id: "topic-doubao", type: "topic", name: "豆包",
543
+ derivedFromEvents: ["e1"],
544
+ ingestedAt: Date.now(), source: defaultSource("test"),
545
+ });
546
+ // Unresolved group-chat topics named by raw numeric chatroom id — noise.
547
+ rig.vault.putTopic({
548
+ id: "topic-g1", type: "topic", name: "45498354778",
549
+ derivedFromEvents: [],
550
+ ingestedAt: Date.now() + 1, source: defaultSource("test"),
551
+ });
552
+ rig.vault.putTopic({
553
+ id: "topic-g2", type: "topic", name: "54346634535",
554
+ derivedFromEvents: [],
555
+ ingestedAt: Date.now() + 2, source: defaultSource("test"),
556
+ });
557
+ const skill = new InterestsSkill({ vault: rig.vault });
558
+ const r = await skill.run({});
559
+ const names = r.topTopics.map((t) => t.name);
560
+ expect(names).toContain("豆包");
561
+ expect(names).not.toContain("45498354778");
562
+ expect(names).not.toContain("54346634535");
563
+ });
501
564
  });
502
565
 
503
566
  // ─── TimelineSkill ──────────────────────────────────────────────────────
@@ -534,6 +597,44 @@ describe("TimelineSkill", () => {
534
597
  const r = await skill.run({ since: ts(2026, 4, 1) });
535
598
  expect(r.llm_narrative).toBe("你这周点了一次外卖。");
536
599
  });
600
+
601
+ it("excludes inventory-snapshot events (installed-app / contact roster) from the narrative", async () => {
602
+ // Real activity event (extra has no `kind` → must be kept)
603
+ makePayment(rig.vault, { id: "act-1", occurredAt: ts(2026, 5, 1), counterpartyName: "美团", amount: 10, adapter: "alipay-bill", title: "外卖" });
604
+ // Inventory-snapshot events stamped at a LATER (collection) time — these
605
+ // would dominate a DESC time query but must be filtered out.
606
+ rig.vault.putEvent({
607
+ id: "event-android-app-com.x", type: "event", subtype: "other",
608
+ occurredAt: ts(2026, 6, 1), actor: "person-self",
609
+ content: { title: "应用:X" },
610
+ ingestedAt: Date.now(), source: defaultSource("system-data-android"),
611
+ extra: { kind: "app-snapshot", packageName: "com.x" },
612
+ });
613
+ rig.vault.putEvent({
614
+ id: "event-android-contact-y", type: "event", subtype: "other",
615
+ occurredAt: ts(2026, 6, 1), actor: "person-self",
616
+ content: { title: "联系人:Y" },
617
+ ingestedAt: Date.now(), source: defaultSource("system-data-android"),
618
+ extra: { kind: "contact-snapshot" },
619
+ });
620
+ // Aggregate-baseline event (douyin app-usage-profile) — a single rolling
621
+ // summary, not a discrete activity, so it must be filtered from the timeline.
622
+ rig.vault.putEvent({
623
+ id: "event-douyin-usage", type: "event", subtype: "other",
624
+ occurredAt: ts(2026, 6, 1), actor: "person-self",
625
+ content: { title: "抖音使用画像:24天/108h" },
626
+ ingestedAt: Date.now(), source: defaultSource("social-douyin"),
627
+ extra: { kind: "app-usage-profile" },
628
+ });
629
+ const skill = new TimelineSkill({ vault: rig.vault });
630
+ const r = await skill.run({ since: ts(2026, 4, 1) });
631
+ const ids = r.entries.map((e) => e.id);
632
+ expect(ids).toContain("act-1");
633
+ expect(ids).not.toContain("event-android-app-com.x");
634
+ expect(ids).not.toContain("event-android-contact-y");
635
+ expect(ids).not.toContain("event-douyin-usage");
636
+ expect(r.summary.totalEvents).toBe(1);
637
+ });
537
638
  });
538
639
 
539
640
  // ─── runAnalysisSkill dispatcher ─────────────────────────────────────────
@@ -591,6 +692,34 @@ describe("OverviewSkill — cross-app unified snapshot", () => {
591
692
  expect(r.summary.appsActive).toBe(4); // alipay-bill, shopping-taobao, wechat, social-douyin
592
693
  });
593
694
 
695
+ it("byApp/byType/total use uncapped facetCounts, not the row-capped fetch", async () => {
696
+ // queryEvents hard-caps at 10k rows; on a big vault one dominant app crowds
697
+ // out the rest, so deriving byApp from the row fetch undercounts (real bug:
698
+ // social-douyin showed 10 instead of 232). Fake a vault where the capped
699
+ // row fetch and the SQL GROUP BY disagree, and assert overview trusts SQL.
700
+ const fakeVault = {
701
+ facetCounts: () => ({
702
+ byAdapter: { "social-douyin": 232, "wechat-pc": 100000 },
703
+ bySubtype: { browse: 232, message: 100000 },
704
+ byCategory: {},
705
+ total: 100232,
706
+ mode: "like",
707
+ shortQuery: false,
708
+ }),
709
+ // simulates the cap: only wechat rows survived the recent-10k window
710
+ queryEvents: () => [
711
+ { id: "w1", subtype: "message", occurredAt: ts(2026, 6, 1), actor: "person-self", source: { adapter: "wechat-pc" }, content: {} },
712
+ ],
713
+ };
714
+ const r = await new OverviewSkill({ vault: fakeVault }).run({ commentary: false });
715
+ const dy = r.byApp.find((a) => a.app === "social-douyin");
716
+ expect(dy && dy.count).toBe(232); // would be absent/0 if derived from the row fetch
717
+ expect(r.byApp[0].app).toBe("wechat-pc"); // 100000 sorts first
718
+ expect(r.summary.totalEvents).toBe(100232);
719
+ expect(r.summary.appsActive).toBe(2);
720
+ expect(r.byType.find((t) => t.type === "browse").count).toBe(232);
721
+ });
722
+
594
723
  it("counts 4 distinct apps + sums cross-app spend + top contact merged", async () => {
595
724
  const { vault } = rig;
596
725
  makePerson(vault, "p-friend", ["小明"], {}, { adapter: "wechat" });
@@ -69,6 +69,27 @@ describe("parseTimeWindow", () => {
69
69
  expect(months.since).toBeLessThan(NOW);
70
70
  });
71
71
 
72
+ it("最近 N 个月 does NOT month-overflow on a month-end day (regression)", () => {
73
+ // Naive setMonth(getMonth()-1) on Mar 31 lands on "Feb 31" → Mar 3, silently
74
+ // dropping all of February from the window. since must land in February.
75
+ const mar31 = new Date(2026, 2, 31, 12, 0, 0).getTime();
76
+ const since = parseTimeWindow("最近1个月", mar31).since;
77
+ const d = new Date(since);
78
+ expect(d.getFullYear()).toBe(2026);
79
+ expect(d.getMonth()).toBe(1); // February, NOT still March
80
+ expect(d.getDate()).toBe(28); // clamped to Feb's last day
81
+
82
+ // May 31 −1mo → April 30 (April has 30 days), not May 1.
83
+ const may31 = new Date(2026, 4, 31, 12, 0, 0).getTime();
84
+ const aprSince = new Date(parseTimeWindow("最近1个月", may31).since);
85
+ expect(aprSince.getMonth()).toBe(3); // April
86
+ expect(aprSince.getDate()).toBe(30);
87
+
88
+ // mid-month is unaffected: Mar 15 −1mo → Feb 15.
89
+ const mar15 = new Date(2026, 2, 15, 12, 0, 0).getTime();
90
+ expect(new Date(parseTimeWindow("最近1个月", mar15).since).getDate()).toBe(15);
91
+ });
92
+
72
93
  it("YYYY 年 M 月 → that calendar month", () => {
73
94
  const w = parseTimeWindow("2024 年 7 月在淘宝下过几单", NOW);
74
95
  expect(w.since).toBe(new Date(2024, 6, 1).getTime());
@@ -95,6 +116,17 @@ describe("parseFilters", () => {
95
116
  expect(parseFilters("我朋友圈发了啥").subtype).toBe("post");
96
117
  });
97
118
 
119
+ it("bare 收到 does not steal non-income subtypes (regression)", () => {
120
+ // 收到 ("receive") used to match income before message → "收到多少消息"
121
+ // was mis-classified as income.
122
+ expect(parseFilters("我收到多少消息").subtype).toBe("message");
123
+ expect(parseFilters("收到的快递").subtype).toBeUndefined();
124
+ expect(parseFilters("收到转账了吗").subtype).toBe("transfer"); // still transfer
125
+ // genuine income keywords still classify
126
+ expect(parseFilters("这个月工资多少").subtype).toBe("income");
127
+ expect(parseFilters("进账多少").subtype).toBe("income");
128
+ });
129
+
98
130
  it("identifies adapter via keywords (Chinese + English)", () => {
99
131
  expect(parseFilters("淘宝今年下了多少单").adapter).toBe("taobao");
100
132
  expect(parseFilters("支付宝账单").adapter).toBe("alipay-bill");
@@ -114,9 +146,40 @@ describe("parseIntent", () => {
114
146
  expect(parseIntent("我今年开销加起来")).toBe("sum-amount");
115
147
  });
116
148
 
149
+ it("sum-amount for spending questions WITHOUT an explicit 总共/合计", () => {
150
+ // Regression: these very common phrasings previously fell through to
151
+ // intent=list (→ engine returned a row sample instead of the authoritative
152
+ // sumEventAmount total).
153
+ expect(parseIntent("我这个月花了多少钱")).toBe("sum-amount");
154
+ expect(parseIntent("上个月在淘宝花了多少钱")).toBe("sum-amount");
155
+ expect(parseIntent("这个月消费多少")).toBe("sum-amount");
156
+ expect(parseIntent("花了多少")).toBe("sum-amount");
157
+ });
158
+
159
+ it("sum-amount for INCOME-side questions (收入/赚/到账)", () => {
160
+ // Regression: income amount words were missing → "总共收入多少" even
161
+ // mis-returned "count". Both with and without 总共.
162
+ expect(parseIntent("这个月收入多少")).toBe("sum-amount");
163
+ expect(parseIntent("我这个月赚了多少")).toBe("sum-amount");
164
+ expect(parseIntent("上个月到账多少")).toBe("sum-amount");
165
+ expect(parseIntent("总共收入多少")).toBe("sum-amount");
166
+ });
167
+
168
+ it("count for 多少X / 几X measure-word symmetry (多少条/多少单 were missed)", () => {
169
+ expect(parseIntent("我有多少条朋友圈")).toBe("count");
170
+ expect(parseIntent("下了多少单")).toBe("count");
171
+ expect(parseIntent("发了多少条微博")).toBe("count");
172
+ expect(parseIntent("多少笔交易")).toBe("count");
173
+ expect(parseIntent("几部电影")).toBe("count");
174
+ });
175
+
117
176
  it("count when 'how many' phrasing", () => {
118
177
  expect(parseIntent("最近多少次跟妈妈聊过")).toBe("count");
119
178
  expect(parseIntent("我下了几单")).toBe("count");
179
+ // the new sum-amount rule must NOT steal a count question that also
180
+ // mentions spending ("how many TIMES did I spend").
181
+ expect(parseIntent("消费了多少次")).toBe("count");
182
+ expect(parseIntent("花了多少次钱")).toBe("count");
120
183
  });
121
184
 
122
185
  it("latest when 'recent / latest'", () => {
@@ -0,0 +1,253 @@
1
+ /**
2
+ * Douyin on-device usage-profile reader — recovers the user's app-usage
3
+ * baseline (active hours / session count / total time-on-app) from the local
4
+ * `1128_feature_engineering.db` table `FEInternalUserActivityTable`, a plaintext
5
+ * SQLite table the app keeps for its own client-side feature store.
6
+ *
7
+ * Why this exists (real-device 2026-06-18, user's exported plaintext DB):
8
+ * - `FEInternalUserActivityTable` rows are per-session aggregates:
9
+ * { timestamp(sec), start/end_timestamp_ms, open_app_count,
10
+ * launch_hour_0..23, total_duration(ms) }
11
+ * - 81 rows spanning ~31 days = "how the user uses Douyin": ~175 opens,
12
+ * ~108 hours total, peak 12–17h. This behavioral baseline is exactly what a
13
+ * personal-AI should know, and it's plaintext (no signing/encryption).
14
+ *
15
+ * This module is the testable core (reader + pure summarizer + vault-event
16
+ * builder). The device pull/collector wiring (mirroring watch-history-reader's
17
+ * pullVideoRecordDbViaSu) is a follow-up; the remote db sub-path must be
18
+ * confirmed on a device first.
19
+ *
20
+ * Authorization: only on your own device/account.
21
+ */
22
+ "use strict";
23
+
24
+ const { newId } = require("../../ids");
25
+ const {
26
+ _internals: { loadDatabaseClass },
27
+ } = require("../social-bilibili-adb/chromium-cookies-reader");
28
+
29
+ const USAGE_TABLE = "FEInternalUserActivityTable";
30
+ const PROFILE_VERSION = "usage-profile-0.1";
31
+ const HOUR_BUCKETS = Object.freeze([
32
+ { label: "0-5h", from: 0, to: 5 },
33
+ { label: "6-11h", from: 6, to: 11 },
34
+ { label: "12-17h", from: 12, to: 17 },
35
+ { label: "18-23h", from: 18, to: 23 },
36
+ ]);
37
+
38
+ /** seconds-or-ms epoch → ms (heuristic: > 1e12 ⇒ already ms). */
39
+ function toEpochMs(v) {
40
+ const n = Number(v);
41
+ if (!Number.isFinite(n) || n <= 0) return null;
42
+ return n > 1e12 ? Math.floor(n) : Math.floor(n * 1000);
43
+ }
44
+
45
+ /**
46
+ * Read per-session usage rows from FEInternalUserActivityTable and aggregate
47
+ * them into a usage profile. Pure once a Database class is injected.
48
+ *
49
+ * @returns {{
50
+ * sessions: number, days: number, from: number|null, to: number|null,
51
+ * totalOpens: number, totalDurationMs: number,
52
+ * hourHistogram: number[], peakHour: number|null,
53
+ * peakBucket: string|null, bucketTotals: Record<string,number>
54
+ * }}
55
+ */
56
+ function readDouyinUsageProfile(dbPath, opts = {}) {
57
+ const Database = opts._databaseClass || loadDatabaseClass();
58
+ const db = new Database(dbPath, { readonly: true });
59
+ try {
60
+ const exists = db
61
+ .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name=?")
62
+ .get(USAGE_TABLE);
63
+ if (!exists) return emptyProfile();
64
+
65
+ const cols = new Set(
66
+ db.prepare(`PRAGMA table_info("${USAGE_TABLE}")`).all().map((c) => c.name),
67
+ );
68
+ const hourCols = [];
69
+ for (let h = 0; h < 24; h++) {
70
+ hourCols.push(cols.has(`launch_hour_${h}`) ? `launch_hour_${h}` : null);
71
+ }
72
+ const hasOpen = cols.has("open_app_count");
73
+ const hasDur = cols.has("total_duration");
74
+ const hasTs = cols.has("timestamp");
75
+
76
+ const rows = db.prepare(`SELECT * FROM "${USAGE_TABLE}"`).all();
77
+ const histogram = new Array(24).fill(0);
78
+ const days = new Set();
79
+ let totalOpens = 0;
80
+ let totalDurationMs = 0;
81
+ let from = null;
82
+ let to = null;
83
+
84
+ for (const r of rows) {
85
+ if (hasOpen) totalOpens += Number(r.open_app_count) || 0;
86
+ if (hasDur) totalDurationMs += Number(r.total_duration) || 0;
87
+ for (let h = 0; h < 24; h++) {
88
+ if (hourCols[h]) histogram[h] += Number(r[hourCols[h]]) || 0;
89
+ }
90
+ const tsMs = hasTs ? toEpochMs(r.timestamp) : null;
91
+ if (tsMs != null) {
92
+ if (from == null || tsMs < from) from = tsMs;
93
+ if (to == null || tsMs > to) to = tsMs;
94
+ // local-day bucket (UTC day is fine for a coarse "distinct days" count)
95
+ days.add(Math.floor(tsMs / 86_400_000));
96
+ }
97
+ }
98
+
99
+ let peakHour = null;
100
+ let peakVal = -1;
101
+ for (let h = 0; h < 24; h++) {
102
+ if (histogram[h] > peakVal) {
103
+ peakVal = histogram[h];
104
+ peakHour = h;
105
+ }
106
+ }
107
+ if (peakVal <= 0) peakHour = null;
108
+
109
+ const bucketTotals = {};
110
+ let peakBucket = null;
111
+ let peakBucketVal = -1;
112
+ for (const b of HOUR_BUCKETS) {
113
+ let sum = 0;
114
+ for (let h = b.from; h <= b.to; h++) sum += histogram[h];
115
+ bucketTotals[b.label] = sum;
116
+ if (sum > peakBucketVal) {
117
+ peakBucketVal = sum;
118
+ peakBucket = b.label;
119
+ }
120
+ }
121
+ if (peakBucketVal <= 0) peakBucket = null;
122
+
123
+ return {
124
+ sessions: rows.length,
125
+ days: days.size,
126
+ from,
127
+ to,
128
+ totalOpens,
129
+ totalDurationMs,
130
+ hourHistogram: histogram,
131
+ peakHour,
132
+ peakBucket,
133
+ bucketTotals,
134
+ };
135
+ } finally {
136
+ try {
137
+ db.close();
138
+ } catch (_e) {
139
+ /* best-effort */
140
+ }
141
+ }
142
+ }
143
+
144
+ function emptyProfile() {
145
+ const bucketTotals = {};
146
+ for (const b of HOUR_BUCKETS) bucketTotals[b.label] = 0;
147
+ return {
148
+ sessions: 0,
149
+ days: 0,
150
+ from: null,
151
+ to: null,
152
+ totalOpens: 0,
153
+ totalDurationMs: 0,
154
+ hourHistogram: new Array(24).fill(0),
155
+ peakHour: null,
156
+ peakBucket: null,
157
+ bucketTotals,
158
+ };
159
+ }
160
+
161
+ /** Human-readable one-line summary of a usage profile (pure). */
162
+ function summarizeUsageProfile(profile) {
163
+ if (!profile || profile.sessions === 0) return "抖音使用画像:无数据";
164
+ const hours = Math.round((profile.totalDurationMs / 3_600_000) * 10) / 10;
165
+ const peak = profile.peakBucket ? `,高峰时段 ${profile.peakBucket}` : "";
166
+ return (
167
+ `抖音使用画像:${profile.days} 天内 ${profile.sessions} 个会话、` +
168
+ `${profile.totalOpens} 次启动、累计约 ${hours} 小时${peak}`
169
+ );
170
+ }
171
+
172
+ /**
173
+ * Build a single rolling "app-usage baseline" vault event from a profile.
174
+ * Stable originalId → re-ingest UPDATES rather than duplicates. Tagged
175
+ * `extra.kind = "app-usage-profile"` so analysis.timeline can exclude it (it's
176
+ * a baseline, not a timeline activity) while overview/interests can use it.
177
+ *
178
+ * @returns {{events: object[]}}
179
+ */
180
+ function buildUsageProfileEvents(profile, opts = {}) {
181
+ if (!profile || profile.sessions === 0) return { events: [] };
182
+ const now = Number.isFinite(opts.now) ? opts.now : Date.now();
183
+ const occurredAt = Number.isFinite(profile.to) ? profile.to : now;
184
+ const text = summarizeUsageProfile(profile);
185
+ const event = {
186
+ id: newId(),
187
+ type: "event",
188
+ subtype: "other",
189
+ occurredAt,
190
+ actor: "person-self",
191
+ content: { title: text, text },
192
+ ingestedAt: now,
193
+ source: {
194
+ adapter: "social-douyin",
195
+ adapterVersion: PROFILE_VERSION,
196
+ originalId: "social-douyin:usage-profile",
197
+ capturedAt: occurredAt,
198
+ capturedBy: "sqlite",
199
+ },
200
+ extra: {
201
+ platform: "douyin",
202
+ kind: "app-usage-profile",
203
+ days: profile.days,
204
+ sessions: profile.sessions,
205
+ totalOpens: profile.totalOpens,
206
+ totalDurationMs: profile.totalDurationMs,
207
+ hourHistogram: profile.hourHistogram,
208
+ peakHour: profile.peakHour,
209
+ peakBucket: profile.peakBucket,
210
+ bucketTotals: profile.bucketTotals,
211
+ rangeFrom: profile.from,
212
+ rangeTo: profile.to,
213
+ },
214
+ };
215
+ return { events: [event] };
216
+ }
217
+
218
+ /**
219
+ * Read a feature-engineering db and write the usage-profile baseline event into
220
+ * the vault. Returns counts.
221
+ *
222
+ * @param {object} vault LocalVault (must expose putBatch)
223
+ * @param {string} dbPath path to 1128_feature_engineering.db
224
+ */
225
+ function usageProfileToVault(vault, dbPath, opts = {}) {
226
+ if (!vault || typeof vault.putBatch !== "function") {
227
+ throw new TypeError("usageProfileToVault: vault with putBatch required");
228
+ }
229
+ if (typeof dbPath !== "string" || !dbPath) {
230
+ throw new TypeError("usageProfileToVault: dbPath required");
231
+ }
232
+ const profile = readDouyinUsageProfile(dbPath, opts);
233
+ const built = buildUsageProfileEvents(profile, opts);
234
+ const res = built.events.length
235
+ ? vault.putBatch({ events: built.events })
236
+ : { events: 0 };
237
+ return {
238
+ ingested: res.events || 0,
239
+ sessions: profile.sessions,
240
+ days: profile.days,
241
+ summary: summarizeUsageProfile(profile),
242
+ };
243
+ }
244
+
245
+ module.exports = {
246
+ USAGE_TABLE,
247
+ HOUR_BUCKETS,
248
+ readDouyinUsageProfile,
249
+ summarizeUsageProfile,
250
+ buildUsageProfileEvents,
251
+ usageProfileToVault,
252
+ _internals: { toEpochMs, emptyProfile },
253
+ };
@@ -29,6 +29,7 @@ const crypto = require("node:crypto");
29
29
  const {
30
30
  _internals: { loadDatabaseClass },
31
31
  } = require("../social-bilibili-adb/chromium-cookies-reader");
32
+ const { DouyinAdapter } = require("../social-douyin");
32
33
 
33
34
  const DOUYIN_PACKAGE = "com.ss.android.ugc.aweme";
34
35
  const VIDEO_RECORD_DB_REMOTE_PATH =
@@ -88,9 +89,18 @@ async function pullVideoRecordDbViaSu(adb, serial, opts = {}) {
88
89
 
89
90
  /**
90
91
  * Read watch records from video_record.db. Tables are named `record_<uid>`
91
- * (per-account) plus an anonymous `record_0`. Picks the numeric-uid table with
92
- * the most rows (the logged-in account); records carry aid + view timestamp +
93
- * enter_from surface.
92
+ * (per-account) plus a default `record_0`. We MERGE every `record_*` table
93
+ * (record_0 included) and dedup by (awemeId, capturedAt), because the watch
94
+ * history is split across tables and which one holds the bulk varies by device:
95
+ *
96
+ * - real-device 2026-06-11 (5lhyaqu8lbwstc6x): record_<uid> = 900 rows.
97
+ * - real-device 2026-06-18: record_0 = 223 rows vs record_<uid> = 9 — the
98
+ * anonymous/default bucket held 96% of the history.
99
+ *
100
+ * The earlier "skip record_0, pick the largest uid table" logic silently
101
+ * dropped the record_0 rows and lost most of the history on the 2nd device.
102
+ * Attribution `uid` is still the largest non-zero `record_<uid>` table (the
103
+ * logged-in account), or null when only record_0 exists.
94
104
  *
95
105
  * @returns {{uid: string|null, records: Array<{awemeId,capturedAt,enterFrom}>}}
96
106
  */
@@ -102,43 +112,57 @@ function readDouyinWatchHistory(dbPath, opts = {}) {
102
112
  const tables = db
103
113
  .prepare("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'record\\_%' ESCAPE '\\'")
104
114
  .all()
105
- .map((t) => t.name);
106
- // Candidate uid tables: record_<digits>, uid != 0. Pick the largest.
107
- let best = null;
115
+ .map((t) => t.name)
116
+ .filter((name) => /^record_\d+$/.test(name));
117
+ if (tables.length === 0) return { uid: null, records: [] };
118
+
119
+ let bestUid = null; // largest non-zero record_<uid> table → attribution
120
+ const merged = new Map(); // dedupKey → record (first-seen wins)
108
121
  for (const name of tables) {
109
122
  const m = /^record_(\d+)$/.exec(name);
110
- if (!m || m[1] === "0") continue;
111
123
  let count = 0;
112
124
  try {
113
125
  count = db.prepare(`SELECT COUNT(*) c FROM "${name}"`).get().c;
114
126
  } catch (_e) {
115
127
  continue;
116
128
  }
117
- if (!best || count > best.count) best = { name, uid: m[1], count };
118
- }
119
- if (!best) return { uid: null, records: [] };
120
- const cols = new Set(
121
- db.prepare(`PRAGMA table_info("${best.name}")`).all().map((c) => c.name),
122
- );
123
- const hasEnter = cols.has("enter_from");
124
- const hasTs = cols.has("view_time_timestamp");
125
- const rows = db
126
- .prepare(
127
- `SELECT aid${hasTs ? ", view_time_timestamp" : ""}${hasEnter ? ", enter_from" : ""} ` +
128
- `FROM "${best.name}"${hasTs ? " ORDER BY view_time_timestamp DESC" : ""} LIMIT ${limit}`,
129
- )
130
- .all();
131
- const records = [];
132
- for (const r of rows) {
133
- const awemeId = r.aid != null ? String(r.aid) : null;
134
- if (!awemeId) continue;
135
- records.push({
136
- awemeId,
137
- capturedAt: hasTs ? toEpochMs(r.view_time_timestamp) : null,
138
- enterFrom: hasEnter ? r.enter_from || null : null,
139
- });
129
+ if (m && m[1] !== "0" && (!bestUid || count > bestUid.count)) {
130
+ bestUid = { uid: m[1], count };
131
+ }
132
+ const cols = new Set(
133
+ db.prepare(`PRAGMA table_info("${name}")`).all().map((c) => c.name),
134
+ );
135
+ const hasEnter = cols.has("enter_from");
136
+ const hasTs = cols.has("view_time_timestamp");
137
+ let rows;
138
+ try {
139
+ rows = db
140
+ .prepare(
141
+ `SELECT aid${hasTs ? ", view_time_timestamp" : ""}${hasEnter ? ", enter_from" : ""} ` +
142
+ `FROM "${name}"${hasTs ? " ORDER BY view_time_timestamp DESC" : ""} LIMIT ${limit}`,
143
+ )
144
+ .all();
145
+ } catch (_e) {
146
+ continue;
147
+ }
148
+ for (const r of rows) {
149
+ const awemeId = r.aid != null ? String(r.aid) : null;
150
+ if (!awemeId) continue;
151
+ const capturedAt = hasTs ? toEpochMs(r.view_time_timestamp) : null;
152
+ const key = `${awemeId}@${capturedAt == null ? "" : capturedAt}`;
153
+ if (merged.has(key)) continue;
154
+ merged.set(key, {
155
+ awemeId,
156
+ capturedAt,
157
+ enterFrom: hasEnter ? r.enter_from || null : null,
158
+ });
159
+ }
140
160
  }
141
- return { uid: best.uid, records };
161
+ // Most-recent first (null timestamps sink to the end), then cap.
162
+ const records = Array.from(merged.values())
163
+ .sort((a, b) => (b.capturedAt || 0) - (a.capturedAt || 0))
164
+ .slice(0, limit);
165
+ return { uid: bestUid ? bestUid.uid : null, records };
142
166
  } finally {
143
167
  try {
144
168
  db.close();
@@ -148,6 +172,53 @@ function readDouyinWatchHistory(dbPath, opts = {}) {
148
172
  }
149
173
  }
150
174
 
175
+ /**
176
+ * Read watch records from a local video_record.db and write them straight into
177
+ * the vault as canonical BROWSE events (via DouyinAdapter.normalize, so they
178
+ * match exactly what the device-bridge collector path produces). Stable
179
+ * per-record originalId → re-ingest UPDATES rather than duplicates.
180
+ *
181
+ * @param {object} vault LocalVault (must expose putBatch)
182
+ * @param {string} dbPath path to video_record.db
183
+ */
184
+ function buildWatchHistoryEvents(dbPath, opts = {}) {
185
+ const { uid, records } = readDouyinWatchHistory(dbPath, opts);
186
+ const adapter = opts._adapter || new DouyinAdapter();
187
+ const now = Number.isFinite(opts.now) ? opts.now : Date.now();
188
+ const events = [];
189
+ for (const r of records) {
190
+ if (!r.awemeId) continue;
191
+ const occurredAt =
192
+ Number.isFinite(r.capturedAt) && r.capturedAt > 0 ? r.capturedAt : now;
193
+ const batch = adapter.normalize({
194
+ adapter: "social-douyin",
195
+ kind: "history",
196
+ originalId: `social-douyin:history:${r.awemeId}:${occurredAt}`,
197
+ capturedAt: occurredAt,
198
+ payload: {
199
+ kind: "history",
200
+ awemeId: r.awemeId,
201
+ capturedAt: occurredAt,
202
+ enterFrom: r.enterFrom,
203
+ },
204
+ });
205
+ for (const ev of batch.events) events.push(ev);
206
+ }
207
+ return { events, records: records.length, uid };
208
+ }
209
+
210
+ function watchHistoryToVault(vault, dbPath, opts = {}) {
211
+ if (!vault || typeof vault.putBatch !== "function") {
212
+ throw new TypeError("watchHistoryToVault: vault with putBatch required");
213
+ }
214
+ if (typeof dbPath !== "string" || !dbPath) {
215
+ throw new TypeError("watchHistoryToVault: dbPath required");
216
+ }
217
+ const { events, records, uid } = buildWatchHistoryEvents(dbPath, opts);
218
+ const res = events.length ? vault.putBatch({ events }) : { events: 0 };
219
+ return { ingested: res.events || 0, records, uid };
220
+ }
221
+
151
222
  /** Bridge handler factory: `bridge.invoke("douyin.watch-history")` → {uid, records}. */
152
223
  function createDouyinWatchExtension(factoryOpts = {}) {
153
224
  const timeoutMs = factoryOpts.timeoutMs || 60_000;
@@ -178,6 +249,8 @@ function createDouyinWatchExtension(factoryOpts = {}) {
178
249
 
179
250
  module.exports = {
180
251
  createDouyinWatchExtension,
252
+ buildWatchHistoryEvents,
253
+ watchHistoryToVault,
181
254
  VIDEO_RECORD_DB_REMOTE_PATH,
182
255
  DOUYIN_PACKAGE,
183
256
  _internals: {