@chainlesschain/personal-data-hub 0.4.28 → 0.4.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +13 -5
- package/__tests__/adapters/social-douyin-adb-usage-profile.test.js +229 -0
- package/__tests__/adapters/social-douyin-adb-watch-history.test.js +88 -11
- package/__tests__/adapters/social-toutiao-adb-article.test.js +155 -0
- package/__tests__/analysis-skills.test.js +75 -0
- package/__tests__/query-parser.test.js +63 -0
- package/lib/adapters/social-douyin-adb/usage-profile-reader.js +253 -0
- package/lib/adapters/social-douyin-adb/watch-history-reader.js +104 -31
- package/lib/adapters/social-toutiao-adb/article-reader.js +202 -0
- package/lib/analysis-skills/overview.js +24 -4
- package/lib/analysis-skills/spending.js +63 -2
- package/lib/analysis-skills/timeline.js +11 -6
- package/lib/query-parser.js +38 -8
- package/package.json +1 -1
|
@@ -69,6 +69,27 @@ describe("parseTimeWindow", () => {
|
|
|
69
69
|
expect(months.since).toBeLessThan(NOW);
|
|
70
70
|
});
|
|
71
71
|
|
|
72
|
+
it("最近 N 个月 does NOT month-overflow on a month-end day (regression)", () => {
|
|
73
|
+
// Naive setMonth(getMonth()-1) on Mar 31 lands on "Feb 31" → Mar 3, silently
|
|
74
|
+
// dropping all of February from the window. since must land in February.
|
|
75
|
+
const mar31 = new Date(2026, 2, 31, 12, 0, 0).getTime();
|
|
76
|
+
const since = parseTimeWindow("最近1个月", mar31).since;
|
|
77
|
+
const d = new Date(since);
|
|
78
|
+
expect(d.getFullYear()).toBe(2026);
|
|
79
|
+
expect(d.getMonth()).toBe(1); // February, NOT still March
|
|
80
|
+
expect(d.getDate()).toBe(28); // clamped to Feb's last day
|
|
81
|
+
|
|
82
|
+
// May 31 −1mo → April 30 (April has 30 days), not May 1.
|
|
83
|
+
const may31 = new Date(2026, 4, 31, 12, 0, 0).getTime();
|
|
84
|
+
const aprSince = new Date(parseTimeWindow("最近1个月", may31).since);
|
|
85
|
+
expect(aprSince.getMonth()).toBe(3); // April
|
|
86
|
+
expect(aprSince.getDate()).toBe(30);
|
|
87
|
+
|
|
88
|
+
// mid-month is unaffected: Mar 15 −1mo → Feb 15.
|
|
89
|
+
const mar15 = new Date(2026, 2, 15, 12, 0, 0).getTime();
|
|
90
|
+
expect(new Date(parseTimeWindow("最近1个月", mar15).since).getDate()).toBe(15);
|
|
91
|
+
});
|
|
92
|
+
|
|
72
93
|
it("YYYY 年 M 月 → that calendar month", () => {
|
|
73
94
|
const w = parseTimeWindow("2024 年 7 月在淘宝下过几单", NOW);
|
|
74
95
|
expect(w.since).toBe(new Date(2024, 6, 1).getTime());
|
|
@@ -95,6 +116,17 @@ describe("parseFilters", () => {
|
|
|
95
116
|
expect(parseFilters("我朋友圈发了啥").subtype).toBe("post");
|
|
96
117
|
});
|
|
97
118
|
|
|
119
|
+
it("bare 收到 does not steal non-income subtypes (regression)", () => {
|
|
120
|
+
// 收到 ("receive") used to match income before message → "收到多少消息"
|
|
121
|
+
// was mis-classified as income.
|
|
122
|
+
expect(parseFilters("我收到多少消息").subtype).toBe("message");
|
|
123
|
+
expect(parseFilters("收到的快递").subtype).toBeUndefined();
|
|
124
|
+
expect(parseFilters("收到转账了吗").subtype).toBe("transfer"); // still transfer
|
|
125
|
+
// genuine income keywords still classify
|
|
126
|
+
expect(parseFilters("这个月工资多少").subtype).toBe("income");
|
|
127
|
+
expect(parseFilters("进账多少").subtype).toBe("income");
|
|
128
|
+
});
|
|
129
|
+
|
|
98
130
|
it("identifies adapter via keywords (Chinese + English)", () => {
|
|
99
131
|
expect(parseFilters("淘宝今年下了多少单").adapter).toBe("taobao");
|
|
100
132
|
expect(parseFilters("支付宝账单").adapter).toBe("alipay-bill");
|
|
@@ -114,9 +146,40 @@ describe("parseIntent", () => {
|
|
|
114
146
|
expect(parseIntent("我今年开销加起来")).toBe("sum-amount");
|
|
115
147
|
});
|
|
116
148
|
|
|
149
|
+
it("sum-amount for spending questions WITHOUT an explicit 总共/合计", () => {
|
|
150
|
+
// Regression: these very common phrasings previously fell through to
|
|
151
|
+
// intent=list (→ engine returned a row sample instead of the authoritative
|
|
152
|
+
// sumEventAmount total).
|
|
153
|
+
expect(parseIntent("我这个月花了多少钱")).toBe("sum-amount");
|
|
154
|
+
expect(parseIntent("上个月在淘宝花了多少钱")).toBe("sum-amount");
|
|
155
|
+
expect(parseIntent("这个月消费多少")).toBe("sum-amount");
|
|
156
|
+
expect(parseIntent("花了多少")).toBe("sum-amount");
|
|
157
|
+
});
|
|
158
|
+
|
|
159
|
+
it("sum-amount for INCOME-side questions (收入/赚/到账)", () => {
|
|
160
|
+
// Regression: income amount words were missing → "总共收入多少" even
|
|
161
|
+
// mis-returned "count". Both with and without 总共.
|
|
162
|
+
expect(parseIntent("这个月收入多少")).toBe("sum-amount");
|
|
163
|
+
expect(parseIntent("我这个月赚了多少")).toBe("sum-amount");
|
|
164
|
+
expect(parseIntent("上个月到账多少")).toBe("sum-amount");
|
|
165
|
+
expect(parseIntent("总共收入多少")).toBe("sum-amount");
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it("count for 多少X / 几X measure-word symmetry (多少条/多少单 were missed)", () => {
|
|
169
|
+
expect(parseIntent("我有多少条朋友圈")).toBe("count");
|
|
170
|
+
expect(parseIntent("下了多少单")).toBe("count");
|
|
171
|
+
expect(parseIntent("发了多少条微博")).toBe("count");
|
|
172
|
+
expect(parseIntent("多少笔交易")).toBe("count");
|
|
173
|
+
expect(parseIntent("几部电影")).toBe("count");
|
|
174
|
+
});
|
|
175
|
+
|
|
117
176
|
it("count when 'how many' phrasing", () => {
|
|
118
177
|
expect(parseIntent("最近多少次跟妈妈聊过")).toBe("count");
|
|
119
178
|
expect(parseIntent("我下了几单")).toBe("count");
|
|
179
|
+
// the new sum-amount rule must NOT steal a count question that also
|
|
180
|
+
// mentions spending ("how many TIMES did I spend").
|
|
181
|
+
expect(parseIntent("消费了多少次")).toBe("count");
|
|
182
|
+
expect(parseIntent("花了多少次钱")).toBe("count");
|
|
120
183
|
});
|
|
121
184
|
|
|
122
185
|
it("latest when 'recent / latest'", () => {
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Douyin on-device usage-profile reader — recovers the user's app-usage
|
|
3
|
+
* baseline (active hours / session count / total time-on-app) from the local
|
|
4
|
+
* `1128_feature_engineering.db` table `FEInternalUserActivityTable`, a plaintext
|
|
5
|
+
* SQLite table the app keeps for its own client-side feature store.
|
|
6
|
+
*
|
|
7
|
+
* Why this exists (real-device 2026-06-18, user's exported plaintext DB):
|
|
8
|
+
* - `FEInternalUserActivityTable` rows are per-session aggregates:
|
|
9
|
+
* { timestamp(sec), start/end_timestamp_ms, open_app_count,
|
|
10
|
+
* launch_hour_0..23, total_duration(ms) }
|
|
11
|
+
* - 81 rows spanning ~31 days = "how the user uses Douyin": ~175 opens,
|
|
12
|
+
* ~108 hours total, peak 12–17h. This behavioral baseline is exactly what a
|
|
13
|
+
* personal-AI should know, and it's plaintext (no signing/encryption).
|
|
14
|
+
*
|
|
15
|
+
* This module is the testable core (reader + pure summarizer + vault-event
|
|
16
|
+
* builder). The device pull/collector wiring (mirroring watch-history-reader's
|
|
17
|
+
* pullVideoRecordDbViaSu) is a follow-up; the remote db sub-path must be
|
|
18
|
+
* confirmed on a device first.
|
|
19
|
+
*
|
|
20
|
+
* Authorization: only on your own device/account.
|
|
21
|
+
*/
|
|
22
|
+
"use strict";
|
|
23
|
+
|
|
24
|
+
const { newId } = require("../../ids");
|
|
25
|
+
const {
|
|
26
|
+
_internals: { loadDatabaseClass },
|
|
27
|
+
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
28
|
+
|
|
29
|
+
const USAGE_TABLE = "FEInternalUserActivityTable";
|
|
30
|
+
const PROFILE_VERSION = "usage-profile-0.1";
|
|
31
|
+
const HOUR_BUCKETS = Object.freeze([
|
|
32
|
+
{ label: "0-5h", from: 0, to: 5 },
|
|
33
|
+
{ label: "6-11h", from: 6, to: 11 },
|
|
34
|
+
{ label: "12-17h", from: 12, to: 17 },
|
|
35
|
+
{ label: "18-23h", from: 18, to: 23 },
|
|
36
|
+
]);
|
|
37
|
+
|
|
38
|
+
/** seconds-or-ms epoch → ms (heuristic: > 1e12 ⇒ already ms). */
|
|
39
|
+
function toEpochMs(v) {
|
|
40
|
+
const n = Number(v);
|
|
41
|
+
if (!Number.isFinite(n) || n <= 0) return null;
|
|
42
|
+
return n > 1e12 ? Math.floor(n) : Math.floor(n * 1000);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Read per-session usage rows from FEInternalUserActivityTable and aggregate
|
|
47
|
+
* them into a usage profile. Pure once a Database class is injected.
|
|
48
|
+
*
|
|
49
|
+
* @returns {{
|
|
50
|
+
* sessions: number, days: number, from: number|null, to: number|null,
|
|
51
|
+
* totalOpens: number, totalDurationMs: number,
|
|
52
|
+
* hourHistogram: number[], peakHour: number|null,
|
|
53
|
+
* peakBucket: string|null, bucketTotals: Record<string,number>
|
|
54
|
+
* }}
|
|
55
|
+
*/
|
|
56
|
+
function readDouyinUsageProfile(dbPath, opts = {}) {
|
|
57
|
+
const Database = opts._databaseClass || loadDatabaseClass();
|
|
58
|
+
const db = new Database(dbPath, { readonly: true });
|
|
59
|
+
try {
|
|
60
|
+
const exists = db
|
|
61
|
+
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name=?")
|
|
62
|
+
.get(USAGE_TABLE);
|
|
63
|
+
if (!exists) return emptyProfile();
|
|
64
|
+
|
|
65
|
+
const cols = new Set(
|
|
66
|
+
db.prepare(`PRAGMA table_info("${USAGE_TABLE}")`).all().map((c) => c.name),
|
|
67
|
+
);
|
|
68
|
+
const hourCols = [];
|
|
69
|
+
for (let h = 0; h < 24; h++) {
|
|
70
|
+
hourCols.push(cols.has(`launch_hour_${h}`) ? `launch_hour_${h}` : null);
|
|
71
|
+
}
|
|
72
|
+
const hasOpen = cols.has("open_app_count");
|
|
73
|
+
const hasDur = cols.has("total_duration");
|
|
74
|
+
const hasTs = cols.has("timestamp");
|
|
75
|
+
|
|
76
|
+
const rows = db.prepare(`SELECT * FROM "${USAGE_TABLE}"`).all();
|
|
77
|
+
const histogram = new Array(24).fill(0);
|
|
78
|
+
const days = new Set();
|
|
79
|
+
let totalOpens = 0;
|
|
80
|
+
let totalDurationMs = 0;
|
|
81
|
+
let from = null;
|
|
82
|
+
let to = null;
|
|
83
|
+
|
|
84
|
+
for (const r of rows) {
|
|
85
|
+
if (hasOpen) totalOpens += Number(r.open_app_count) || 0;
|
|
86
|
+
if (hasDur) totalDurationMs += Number(r.total_duration) || 0;
|
|
87
|
+
for (let h = 0; h < 24; h++) {
|
|
88
|
+
if (hourCols[h]) histogram[h] += Number(r[hourCols[h]]) || 0;
|
|
89
|
+
}
|
|
90
|
+
const tsMs = hasTs ? toEpochMs(r.timestamp) : null;
|
|
91
|
+
if (tsMs != null) {
|
|
92
|
+
if (from == null || tsMs < from) from = tsMs;
|
|
93
|
+
if (to == null || tsMs > to) to = tsMs;
|
|
94
|
+
// local-day bucket (UTC day is fine for a coarse "distinct days" count)
|
|
95
|
+
days.add(Math.floor(tsMs / 86_400_000));
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
let peakHour = null;
|
|
100
|
+
let peakVal = -1;
|
|
101
|
+
for (let h = 0; h < 24; h++) {
|
|
102
|
+
if (histogram[h] > peakVal) {
|
|
103
|
+
peakVal = histogram[h];
|
|
104
|
+
peakHour = h;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
if (peakVal <= 0) peakHour = null;
|
|
108
|
+
|
|
109
|
+
const bucketTotals = {};
|
|
110
|
+
let peakBucket = null;
|
|
111
|
+
let peakBucketVal = -1;
|
|
112
|
+
for (const b of HOUR_BUCKETS) {
|
|
113
|
+
let sum = 0;
|
|
114
|
+
for (let h = b.from; h <= b.to; h++) sum += histogram[h];
|
|
115
|
+
bucketTotals[b.label] = sum;
|
|
116
|
+
if (sum > peakBucketVal) {
|
|
117
|
+
peakBucketVal = sum;
|
|
118
|
+
peakBucket = b.label;
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
if (peakBucketVal <= 0) peakBucket = null;
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
sessions: rows.length,
|
|
125
|
+
days: days.size,
|
|
126
|
+
from,
|
|
127
|
+
to,
|
|
128
|
+
totalOpens,
|
|
129
|
+
totalDurationMs,
|
|
130
|
+
hourHistogram: histogram,
|
|
131
|
+
peakHour,
|
|
132
|
+
peakBucket,
|
|
133
|
+
bucketTotals,
|
|
134
|
+
};
|
|
135
|
+
} finally {
|
|
136
|
+
try {
|
|
137
|
+
db.close();
|
|
138
|
+
} catch (_e) {
|
|
139
|
+
/* best-effort */
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function emptyProfile() {
|
|
145
|
+
const bucketTotals = {};
|
|
146
|
+
for (const b of HOUR_BUCKETS) bucketTotals[b.label] = 0;
|
|
147
|
+
return {
|
|
148
|
+
sessions: 0,
|
|
149
|
+
days: 0,
|
|
150
|
+
from: null,
|
|
151
|
+
to: null,
|
|
152
|
+
totalOpens: 0,
|
|
153
|
+
totalDurationMs: 0,
|
|
154
|
+
hourHistogram: new Array(24).fill(0),
|
|
155
|
+
peakHour: null,
|
|
156
|
+
peakBucket: null,
|
|
157
|
+
bucketTotals,
|
|
158
|
+
};
|
|
159
|
+
}
|
|
160
|
+
|
|
161
|
+
/** Human-readable one-line summary of a usage profile (pure). */
|
|
162
|
+
function summarizeUsageProfile(profile) {
|
|
163
|
+
if (!profile || profile.sessions === 0) return "抖音使用画像:无数据";
|
|
164
|
+
const hours = Math.round((profile.totalDurationMs / 3_600_000) * 10) / 10;
|
|
165
|
+
const peak = profile.peakBucket ? `,高峰时段 ${profile.peakBucket}` : "";
|
|
166
|
+
return (
|
|
167
|
+
`抖音使用画像:${profile.days} 天内 ${profile.sessions} 个会话、` +
|
|
168
|
+
`${profile.totalOpens} 次启动、累计约 ${hours} 小时${peak}`
|
|
169
|
+
);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/**
|
|
173
|
+
* Build a single rolling "app-usage baseline" vault event from a profile.
|
|
174
|
+
* Stable originalId → re-ingest UPDATES rather than duplicates. Tagged
|
|
175
|
+
* `extra.kind = "app-usage-profile"` so analysis.timeline can exclude it (it's
|
|
176
|
+
* a baseline, not a timeline activity) while overview/interests can use it.
|
|
177
|
+
*
|
|
178
|
+
* @returns {{events: object[]}}
|
|
179
|
+
*/
|
|
180
|
+
function buildUsageProfileEvents(profile, opts = {}) {
|
|
181
|
+
if (!profile || profile.sessions === 0) return { events: [] };
|
|
182
|
+
const now = Number.isFinite(opts.now) ? opts.now : Date.now();
|
|
183
|
+
const occurredAt = Number.isFinite(profile.to) ? profile.to : now;
|
|
184
|
+
const text = summarizeUsageProfile(profile);
|
|
185
|
+
const event = {
|
|
186
|
+
id: newId(),
|
|
187
|
+
type: "event",
|
|
188
|
+
subtype: "other",
|
|
189
|
+
occurredAt,
|
|
190
|
+
actor: "person-self",
|
|
191
|
+
content: { title: text, text },
|
|
192
|
+
ingestedAt: now,
|
|
193
|
+
source: {
|
|
194
|
+
adapter: "social-douyin",
|
|
195
|
+
adapterVersion: PROFILE_VERSION,
|
|
196
|
+
originalId: "social-douyin:usage-profile",
|
|
197
|
+
capturedAt: occurredAt,
|
|
198
|
+
capturedBy: "sqlite",
|
|
199
|
+
},
|
|
200
|
+
extra: {
|
|
201
|
+
platform: "douyin",
|
|
202
|
+
kind: "app-usage-profile",
|
|
203
|
+
days: profile.days,
|
|
204
|
+
sessions: profile.sessions,
|
|
205
|
+
totalOpens: profile.totalOpens,
|
|
206
|
+
totalDurationMs: profile.totalDurationMs,
|
|
207
|
+
hourHistogram: profile.hourHistogram,
|
|
208
|
+
peakHour: profile.peakHour,
|
|
209
|
+
peakBucket: profile.peakBucket,
|
|
210
|
+
bucketTotals: profile.bucketTotals,
|
|
211
|
+
rangeFrom: profile.from,
|
|
212
|
+
rangeTo: profile.to,
|
|
213
|
+
},
|
|
214
|
+
};
|
|
215
|
+
return { events: [event] };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Read a feature-engineering db and write the usage-profile baseline event into
|
|
220
|
+
* the vault. Returns counts.
|
|
221
|
+
*
|
|
222
|
+
* @param {object} vault LocalVault (must expose putBatch)
|
|
223
|
+
* @param {string} dbPath path to 1128_feature_engineering.db
|
|
224
|
+
*/
|
|
225
|
+
function usageProfileToVault(vault, dbPath, opts = {}) {
|
|
226
|
+
if (!vault || typeof vault.putBatch !== "function") {
|
|
227
|
+
throw new TypeError("usageProfileToVault: vault with putBatch required");
|
|
228
|
+
}
|
|
229
|
+
if (typeof dbPath !== "string" || !dbPath) {
|
|
230
|
+
throw new TypeError("usageProfileToVault: dbPath required");
|
|
231
|
+
}
|
|
232
|
+
const profile = readDouyinUsageProfile(dbPath, opts);
|
|
233
|
+
const built = buildUsageProfileEvents(profile, opts);
|
|
234
|
+
const res = built.events.length
|
|
235
|
+
? vault.putBatch({ events: built.events })
|
|
236
|
+
: { events: 0 };
|
|
237
|
+
return {
|
|
238
|
+
ingested: res.events || 0,
|
|
239
|
+
sessions: profile.sessions,
|
|
240
|
+
days: profile.days,
|
|
241
|
+
summary: summarizeUsageProfile(profile),
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
module.exports = {
|
|
246
|
+
USAGE_TABLE,
|
|
247
|
+
HOUR_BUCKETS,
|
|
248
|
+
readDouyinUsageProfile,
|
|
249
|
+
summarizeUsageProfile,
|
|
250
|
+
buildUsageProfileEvents,
|
|
251
|
+
usageProfileToVault,
|
|
252
|
+
_internals: { toEpochMs, emptyProfile },
|
|
253
|
+
};
|
|
@@ -29,6 +29,7 @@ const crypto = require("node:crypto");
|
|
|
29
29
|
const {
|
|
30
30
|
_internals: { loadDatabaseClass },
|
|
31
31
|
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
32
|
+
const { DouyinAdapter } = require("../social-douyin");
|
|
32
33
|
|
|
33
34
|
const DOUYIN_PACKAGE = "com.ss.android.ugc.aweme";
|
|
34
35
|
const VIDEO_RECORD_DB_REMOTE_PATH =
|
|
@@ -88,9 +89,18 @@ async function pullVideoRecordDbViaSu(adb, serial, opts = {}) {
|
|
|
88
89
|
|
|
89
90
|
/**
|
|
90
91
|
* Read watch records from video_record.db. Tables are named `record_<uid>`
|
|
91
|
-
* (per-account) plus
|
|
92
|
-
*
|
|
93
|
-
*
|
|
92
|
+
* (per-account) plus a default `record_0`. We MERGE every `record_*` table
|
|
93
|
+
* (record_0 included) and dedup by (awemeId, capturedAt), because the watch
|
|
94
|
+
* history is split across tables and which one holds the bulk varies by device:
|
|
95
|
+
*
|
|
96
|
+
* - real-device 2026-06-11 (5lhyaqu8lbwstc6x): record_<uid> = 900 rows.
|
|
97
|
+
* - real-device 2026-06-18: record_0 = 223 rows vs record_<uid> = 9 — the
|
|
98
|
+
* anonymous/default bucket held 96% of the history.
|
|
99
|
+
*
|
|
100
|
+
* The earlier "skip record_0, pick the largest uid table" logic silently
|
|
101
|
+
* dropped the record_0 rows and lost most of the history on the 2nd device.
|
|
102
|
+
* Attribution `uid` is still the largest non-zero `record_<uid>` table (the
|
|
103
|
+
* logged-in account), or null when only record_0 exists.
|
|
94
104
|
*
|
|
95
105
|
* @returns {{uid: string|null, records: Array<{awemeId,capturedAt,enterFrom}>}}
|
|
96
106
|
*/
|
|
@@ -102,43 +112,57 @@ function readDouyinWatchHistory(dbPath, opts = {}) {
|
|
|
102
112
|
const tables = db
|
|
103
113
|
.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name LIKE 'record\\_%' ESCAPE '\\'")
|
|
104
114
|
.all()
|
|
105
|
-
.map((t) => t.name)
|
|
106
|
-
|
|
107
|
-
|
|
115
|
+
.map((t) => t.name)
|
|
116
|
+
.filter((name) => /^record_\d+$/.test(name));
|
|
117
|
+
if (tables.length === 0) return { uid: null, records: [] };
|
|
118
|
+
|
|
119
|
+
let bestUid = null; // largest non-zero record_<uid> table → attribution
|
|
120
|
+
const merged = new Map(); // dedupKey → record (first-seen wins)
|
|
108
121
|
for (const name of tables) {
|
|
109
122
|
const m = /^record_(\d+)$/.exec(name);
|
|
110
|
-
if (!m || m[1] === "0") continue;
|
|
111
123
|
let count = 0;
|
|
112
124
|
try {
|
|
113
125
|
count = db.prepare(`SELECT COUNT(*) c FROM "${name}"`).get().c;
|
|
114
126
|
} catch (_e) {
|
|
115
127
|
continue;
|
|
116
128
|
}
|
|
117
|
-
if (!
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
129
|
+
if (m && m[1] !== "0" && (!bestUid || count > bestUid.count)) {
|
|
130
|
+
bestUid = { uid: m[1], count };
|
|
131
|
+
}
|
|
132
|
+
const cols = new Set(
|
|
133
|
+
db.prepare(`PRAGMA table_info("${name}")`).all().map((c) => c.name),
|
|
134
|
+
);
|
|
135
|
+
const hasEnter = cols.has("enter_from");
|
|
136
|
+
const hasTs = cols.has("view_time_timestamp");
|
|
137
|
+
let rows;
|
|
138
|
+
try {
|
|
139
|
+
rows = db
|
|
140
|
+
.prepare(
|
|
141
|
+
`SELECT aid${hasTs ? ", view_time_timestamp" : ""}${hasEnter ? ", enter_from" : ""} ` +
|
|
142
|
+
`FROM "${name}"${hasTs ? " ORDER BY view_time_timestamp DESC" : ""} LIMIT ${limit}`,
|
|
143
|
+
)
|
|
144
|
+
.all();
|
|
145
|
+
} catch (_e) {
|
|
146
|
+
continue;
|
|
147
|
+
}
|
|
148
|
+
for (const r of rows) {
|
|
149
|
+
const awemeId = r.aid != null ? String(r.aid) : null;
|
|
150
|
+
if (!awemeId) continue;
|
|
151
|
+
const capturedAt = hasTs ? toEpochMs(r.view_time_timestamp) : null;
|
|
152
|
+
const key = `${awemeId}@${capturedAt == null ? "" : capturedAt}`;
|
|
153
|
+
if (merged.has(key)) continue;
|
|
154
|
+
merged.set(key, {
|
|
155
|
+
awemeId,
|
|
156
|
+
capturedAt,
|
|
157
|
+
enterFrom: hasEnter ? r.enter_from || null : null,
|
|
158
|
+
});
|
|
159
|
+
}
|
|
140
160
|
}
|
|
141
|
-
|
|
161
|
+
// Most-recent first (null timestamps sink to the end), then cap.
|
|
162
|
+
const records = Array.from(merged.values())
|
|
163
|
+
.sort((a, b) => (b.capturedAt || 0) - (a.capturedAt || 0))
|
|
164
|
+
.slice(0, limit);
|
|
165
|
+
return { uid: bestUid ? bestUid.uid : null, records };
|
|
142
166
|
} finally {
|
|
143
167
|
try {
|
|
144
168
|
db.close();
|
|
@@ -148,6 +172,53 @@ function readDouyinWatchHistory(dbPath, opts = {}) {
|
|
|
148
172
|
}
|
|
149
173
|
}
|
|
150
174
|
|
|
175
|
+
/**
|
|
176
|
+
* Read watch records from a local video_record.db and write them straight into
|
|
177
|
+
* the vault as canonical BROWSE events (via DouyinAdapter.normalize, so they
|
|
178
|
+
* match exactly what the device-bridge collector path produces). Stable
|
|
179
|
+
* per-record originalId → re-ingest UPDATES rather than duplicates.
|
|
180
|
+
*
|
|
181
|
+
* @param {object} vault LocalVault (must expose putBatch)
|
|
182
|
+
* @param {string} dbPath path to video_record.db
|
|
183
|
+
*/
|
|
184
|
+
function buildWatchHistoryEvents(dbPath, opts = {}) {
|
|
185
|
+
const { uid, records } = readDouyinWatchHistory(dbPath, opts);
|
|
186
|
+
const adapter = opts._adapter || new DouyinAdapter();
|
|
187
|
+
const now = Number.isFinite(opts.now) ? opts.now : Date.now();
|
|
188
|
+
const events = [];
|
|
189
|
+
for (const r of records) {
|
|
190
|
+
if (!r.awemeId) continue;
|
|
191
|
+
const occurredAt =
|
|
192
|
+
Number.isFinite(r.capturedAt) && r.capturedAt > 0 ? r.capturedAt : now;
|
|
193
|
+
const batch = adapter.normalize({
|
|
194
|
+
adapter: "social-douyin",
|
|
195
|
+
kind: "history",
|
|
196
|
+
originalId: `social-douyin:history:${r.awemeId}:${occurredAt}`,
|
|
197
|
+
capturedAt: occurredAt,
|
|
198
|
+
payload: {
|
|
199
|
+
kind: "history",
|
|
200
|
+
awemeId: r.awemeId,
|
|
201
|
+
capturedAt: occurredAt,
|
|
202
|
+
enterFrom: r.enterFrom,
|
|
203
|
+
},
|
|
204
|
+
});
|
|
205
|
+
for (const ev of batch.events) events.push(ev);
|
|
206
|
+
}
|
|
207
|
+
return { events, records: records.length, uid };
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function watchHistoryToVault(vault, dbPath, opts = {}) {
|
|
211
|
+
if (!vault || typeof vault.putBatch !== "function") {
|
|
212
|
+
throw new TypeError("watchHistoryToVault: vault with putBatch required");
|
|
213
|
+
}
|
|
214
|
+
if (typeof dbPath !== "string" || !dbPath) {
|
|
215
|
+
throw new TypeError("watchHistoryToVault: dbPath required");
|
|
216
|
+
}
|
|
217
|
+
const { events, records, uid } = buildWatchHistoryEvents(dbPath, opts);
|
|
218
|
+
const res = events.length ? vault.putBatch({ events }) : { events: 0 };
|
|
219
|
+
return { ingested: res.events || 0, records, uid };
|
|
220
|
+
}
|
|
221
|
+
|
|
151
222
|
/** Bridge handler factory: `bridge.invoke("douyin.watch-history")` → {uid, records}. */
|
|
152
223
|
function createDouyinWatchExtension(factoryOpts = {}) {
|
|
153
224
|
const timeoutMs = factoryOpts.timeoutMs || 60_000;
|
|
@@ -178,6 +249,8 @@ function createDouyinWatchExtension(factoryOpts = {}) {
|
|
|
178
249
|
|
|
179
250
|
module.exports = {
|
|
180
251
|
createDouyinWatchExtension,
|
|
252
|
+
buildWatchHistoryEvents,
|
|
253
|
+
watchHistoryToVault,
|
|
181
254
|
VIDEO_RECORD_DB_REMOTE_PATH,
|
|
182
255
|
DOUYIN_PACKAGE,
|
|
183
256
|
_internals: {
|