@chainlesschain/personal-data-hub 0.4.4 → 0.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/__tests__/adapters/edu-huawei-learning-live.test.js +198 -0
  2. package/__tests__/adapters/edu-zuoyebang-live.test.js +226 -0
  3. package/__tests__/adapters/family-23-collectors-scaffold.test.js +5 -1
  4. package/__tests__/adapters/finance-alipay-live.test.js +258 -0
  5. package/__tests__/adapters/game-genshin-live.test.js +238 -0
  6. package/__tests__/adapters/game-genshin-scaffold.test.js +4 -3
  7. package/__tests__/adapters/game-honor-of-kings-live.test.js +230 -0
  8. package/__tests__/adapters/netease-music-live.test.js +244 -0
  9. package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +165 -0
  10. package/__tests__/adapters/social-douyin-adb-watch-history.test.js +192 -0
  11. package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +135 -0
  12. package/__tests__/adapters/social-toutiao-adb-api-client.test.js +89 -0
  13. package/__tests__/adapters/social-toutiao-adb-collector.test.js +95 -2
  14. package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +30 -0
  15. package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
  16. package/lib/adapters/_live-json-helpers.js +50 -0
  17. package/lib/adapters/edu-huawei-learning/api-client.js +178 -5
  18. package/lib/adapters/edu-huawei-learning/index.js +83 -9
  19. package/lib/adapters/edu-zuoyebang/api-client.js +181 -6
  20. package/lib/adapters/edu-zuoyebang/index.js +83 -9
  21. package/lib/adapters/finance-alipay/api-client.js +268 -6
  22. package/lib/adapters/finance-alipay/index.js +85 -9
  23. package/lib/adapters/game-genshin/api-client.js +207 -6
  24. package/lib/adapters/game-genshin/index.js +90 -9
  25. package/lib/adapters/game-honor-of-kings/api-client.js +235 -12
  26. package/lib/adapters/game-honor-of-kings/index.js +80 -9
  27. package/lib/adapters/netease-music/api-client.js +284 -0
  28. package/lib/adapters/netease-music/index.js +85 -9
  29. package/lib/adapters/social-douyin/index.js +2 -0
  30. package/lib/adapters/social-douyin-adb/aweme-detail-client.js +119 -0
  31. package/lib/adapters/social-douyin-adb/collector.js +114 -0
  32. package/lib/adapters/social-douyin-adb/index.js +18 -1
  33. package/lib/adapters/social-douyin-adb/watch-history-reader.js +188 -0
  34. package/lib/adapters/social-toutiao-adb/account-reader.js +179 -0
  35. package/lib/adapters/social-toutiao-adb/api-client.js +41 -17
  36. package/lib/adapters/social-toutiao-adb/collector.js +55 -19
  37. package/lib/adapters/social-toutiao-adb/cookies-extension.js +21 -1
  38. package/lib/adapters/social-toutiao-adb/index.js +6 -0
  39. package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +19 -1
  40. package/lib/index.js +1 -1
  41. package/package.json +1 -1
@@ -0,0 +1,284 @@
1
+ /**
2
+ * NeteaseMusicApiClient — 网易云音乐 live cookie web-API fetcher.
3
+ *
4
+ * 走标准 weapi 加密(AES-128-CBC 双层 + textbook-RSA encSecKey)。这套加密常量
5
+ * (presetKey / iv / RSA pubKey+modulus)自 2015 起未变,远比米哈游 DS salt 稳定,
6
+ * 故可放心钉死。登录态仅需 cookie 里的 `MUSIC_U`。
7
+ *
8
+ * 拉取(只读):
9
+ * - /weapi/w/nuser/account/get → uid + nickname(cookie-only)
10
+ * - /weapi/v1/play/record → 听歌排行(weekData / allData,含 playCount)
11
+ * - /weapi/user/playlist → 用户歌单(id / name / trackCount / creator)
12
+ * 输出事件形状对齐 snapshot(play / playlist),故 adapter.normalize 不变。
13
+ * favorite(喜欢的歌)需额外解 likelist+歌曲详情,留 snapshot 模式,live 暂不出。
14
+ *
15
+ * 加密的随机 secKey 与网络 fetch 都经 opts 注入,可确定性单测。
16
+ */
17
+ "use strict";
18
+
19
+ const crypto = require("node:crypto");
20
+
21
+ const DEFAULT_BASE_URL = "https://music.163.com";
22
+
23
+ // ─── weapi crypto constants (stable since 2015) ────────────────────────
24
+ const PRESET_KEY = "0CoJUm6Qyw8W8jud";
25
+ const AES_IV = "0102030405060708";
26
+ const RSA_PUB_KEY = "010001";
27
+ const RSA_MODULUS =
28
+ "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b7251" +
29
+ "52b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ec" +
30
+ "bda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d81" +
31
+ "3cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7";
32
+ const SECKEY_ALPHABET =
33
+ "012345679abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
34
+
35
+ const BROWSER_UA =
36
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
37
+ "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
38
+
39
+ /** AES-128-CBC encrypt `text` with `key` (utf-8, fixed IV) → base64. */
40
+ function aesEncrypt(text, key) {
41
+ const cipher = crypto.createCipheriv("aes-128-cbc", Buffer.from(key, "utf8"), Buffer.from(AES_IV, "utf8"));
42
+ return cipher.update(text, "utf8", "base64") + cipher.final("base64");
43
+ }
44
+
45
+ /** Modular exponentiation over BigInt: base^exp mod m. */
46
+ function modpow(base, exp, m) {
47
+ let result = 1n;
48
+ base %= m;
49
+ while (exp > 0n) {
50
+ if (exp & 1n) result = (result * base) % m;
51
+ exp >>= 1n;
52
+ base = (base * base) % m;
53
+ }
54
+ return result;
55
+ }
56
+
57
+ /** Textbook RSA (no padding) of a short utf-8 string → 256-hex. */
58
+ function rsaEncrypt(text, pubKeyHex, modulusHex) {
59
+ const hex = Buffer.from(text, "utf8").toString("hex");
60
+ const base = BigInt("0x" + (hex || "0"));
61
+ const exp = BigInt("0x" + pubKeyHex);
62
+ const mod = BigInt("0x" + modulusHex);
63
+ return modpow(base, exp, mod).toString(16).padStart(256, "0");
64
+ }
65
+
66
+ /**
67
+ * weapi envelope for a payload object. secKey is a 16-char random string
68
+ * (injectable for tests). Returns { params, encSecKey } form fields.
69
+ */
70
+ function weapiEncrypt(payloadObj, secKey) {
71
+ const text = JSON.stringify(payloadObj);
72
+ const params = aesEncrypt(aesEncrypt(text, PRESET_KEY), secKey);
73
+ const reversed = secKey.split("").reverse().join("");
74
+ const encSecKey = rsaEncrypt(reversed, RSA_PUB_KEY, RSA_MODULUS);
75
+ return { params, encSecKey };
76
+ }
77
+
78
+ class NeteaseMusicApiClient {
79
+ constructor(opts = {}) {
80
+ this.baseUrl = (opts.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, "");
81
+ this._fetch =
82
+ opts.fetch || (typeof globalThis.fetch === "function" ? globalThis.fetch : null);
83
+ this._rand = opts.rand || Math.random;
84
+ // Test seam: force a fixed secKey so weapi output is deterministic.
85
+ this._secKey = opts.secKey || null;
86
+ this._lastErrorCode = 0;
87
+ this._lastErrorMsg = "";
88
+ }
89
+
90
+ _setLastError(code, msg) {
91
+ this._lastErrorCode = code;
92
+ this._lastErrorMsg = msg;
93
+ }
94
+ _clearLastError() {
95
+ this._lastErrorCode = 0;
96
+ this._lastErrorMsg = "";
97
+ }
98
+ get lastError() {
99
+ return { code: this._lastErrorCode, message: this._lastErrorMsg };
100
+ }
101
+
102
+ _genSecKey() {
103
+ if (this._secKey) return this._secKey;
104
+ let s = "";
105
+ for (let i = 0; i < 16; i += 1) {
106
+ s += SECKEY_ALPHABET[Math.floor(this._rand() * SECKEY_ALPHABET.length)];
107
+ }
108
+ return s;
109
+ }
110
+
111
+ /**
112
+ * POST a weapi endpoint. Returns parsed JSON on success (code 200), null on
113
+ * transport / API error (sets lastError).
114
+ * @param {string} path e.g. "/weapi/user/playlist"
115
+ * @param {object} payload
116
+ * @param {string} cookie
117
+ */
118
+ async _post(path, payload, cookie) {
119
+ if (typeof this._fetch !== "function") {
120
+ this._setLastError(-2, "NeteaseMusicApiClient: fetch not available — pass opts.fetch or run on Node 18+");
121
+ return null;
122
+ }
123
+ const { params, encSecKey } = weapiEncrypt(payload, this._genSecKey());
124
+ const body = `params=${encodeURIComponent(params)}&encSecKey=${encodeURIComponent(encSecKey)}`;
125
+ let resp;
126
+ try {
127
+ resp = await this._fetch(`${this.baseUrl}${path}`, {
128
+ method: "POST",
129
+ headers: {
130
+ "Content-Type": "application/x-www-form-urlencoded",
131
+ "User-Agent": BROWSER_UA,
132
+ Referer: "https://music.163.com/",
133
+ Cookie: cookie,
134
+ },
135
+ body,
136
+ });
137
+ } catch (e) {
138
+ this._setLastError(-4, "network: " + (e && e.message ? e.message : String(e)));
139
+ return null;
140
+ }
141
+ const txt = await resp.text();
142
+ if (!resp.ok) {
143
+ this._setLastError(resp.status, `HTTP ${resp.status}`);
144
+ return null;
145
+ }
146
+ let obj;
147
+ try {
148
+ obj = JSON.parse(txt);
149
+ } catch (e) {
150
+ this._setLastError(-3, "parse: " + (e && e.message ? e.message : String(e)));
151
+ return null;
152
+ }
153
+ const code = typeof obj.code === "number" ? obj.code : 200;
154
+ if (code !== 200) {
155
+ this._setLastError(code, (obj.message || obj.msg || `code ${code}`).toString());
156
+ return null;
157
+ }
158
+ this._clearLastError();
159
+ return obj;
160
+ }
161
+
162
+ /** Resolve logged-in account → { uid, nickname } or null. */
163
+ async getAccount(cookie) {
164
+ const obj = await this._post("/weapi/w/nuser/account/get", {}, cookie);
165
+ if (obj === null) return null;
166
+ const profile = obj.profile && typeof obj.profile === "object" ? obj.profile : null;
167
+ const account = obj.account && typeof obj.account === "object" ? obj.account : null;
168
+ const uid =
169
+ (profile && profile.userId != null && String(profile.userId)) ||
170
+ (account && account.id != null && String(account.id)) ||
171
+ null;
172
+ if (!uid) {
173
+ this._setLastError(-7, "未登录或 cookie 失效(account.get 无 userId)");
174
+ return null;
175
+ }
176
+ return { uid, nickname: (profile && profile.nickname) || null };
177
+ }
178
+
179
+ /**
180
+ * 听歌排行。type 1 = 最近一周(含 playCount),0 = 累计。
181
+ * @returns {Promise<Array<{songId,song,artist,album,playCount}>|null>}
182
+ */
183
+ async getPlayRecord(cookie, uid, type = 1) {
184
+ const obj = await this._post("/weapi/v1/play/record", { uid, type }, cookie);
185
+ if (obj === null) return null;
186
+ const rows = Array.isArray(obj.weekData) && obj.weekData.length > 0
187
+ ? obj.weekData
188
+ : Array.isArray(obj.allData)
189
+ ? obj.allData
190
+ : [];
191
+ return rows.map((r) => {
192
+ const song = r && r.song ? r.song : {};
193
+ const artist = Array.isArray(song.ar)
194
+ ? song.ar.map((a) => a && a.name).filter(Boolean).join(" / ")
195
+ : "";
196
+ return {
197
+ songId: song.id != null ? String(song.id) : null,
198
+ song: song.name || "(未知歌曲)",
199
+ artist,
200
+ album: song.al && song.al.name ? song.al.name : null,
201
+ playCount: Number.isFinite(r.playCount) ? r.playCount : null,
202
+ };
203
+ });
204
+ }
205
+
206
+ /**
207
+ * 用户歌单。
208
+ * @returns {Promise<Array<{playlistId,name,trackCount,creator}>|null>}
209
+ */
210
+ async getUserPlaylists(cookie, uid, limit = 100) {
211
+ const obj = await this._post(
212
+ "/weapi/user/playlist",
213
+ { uid, limit, offset: 0, includeVideo: true },
214
+ cookie,
215
+ );
216
+ if (obj === null) return null;
217
+ const list = Array.isArray(obj.playlist) ? obj.playlist : [];
218
+ return list.map((p) => ({
219
+ playlistId: p.id != null ? String(p.id) : null,
220
+ name: p.name || "(未命名歌单)",
221
+ trackCount: Number.isFinite(p.trackCount) ? p.trackCount : null,
222
+ creator: p.creator && p.creator.nickname ? p.creator.nickname : null,
223
+ }));
224
+ }
225
+
226
+ /**
227
+ * High-level: build snapshot-shaped events (play + playlist) for a cookie.
228
+ * Output matches the adapter snapshot schema so normalize is unchanged.
229
+ * @returns {Promise<{account, events}|null>}
230
+ */
231
+ async fetchSnapshot(cookie, opts = {}) {
232
+ if (typeof cookie !== "string" || cookie.length === 0) {
233
+ this._setLastError(-1, "cookie 为空");
234
+ return null;
235
+ }
236
+ const account = await this.getAccount(cookie);
237
+ if (account === null) return null; // lastError set
238
+ const events = [];
239
+ const include = opts.include || {};
240
+
241
+ if (include.play !== false) {
242
+ const plays = await this.getPlayRecord(cookie, account.uid, opts.recordType != null ? opts.recordType : 1);
243
+ if (plays === null) return null;
244
+ for (const r of plays) {
245
+ events.push({
246
+ kind: "play",
247
+ id: r.songId ? `play-${r.songId}` : null,
248
+ songId: r.songId,
249
+ song: r.song,
250
+ artist: r.artist,
251
+ album: r.album,
252
+ playCount: r.playCount,
253
+ });
254
+ }
255
+ }
256
+
257
+ if (include.playlist !== false) {
258
+ const lists = await this.getUserPlaylists(cookie, account.uid, opts.playlistLimit || 100);
259
+ if (lists === null) return null;
260
+ for (const p of lists) {
261
+ events.push({
262
+ kind: "playlist",
263
+ id: p.playlistId ? `playlist-${p.playlistId}` : null,
264
+ playlistId: p.playlistId,
265
+ name: p.name,
266
+ trackCount: p.trackCount,
267
+ creator: p.creator,
268
+ });
269
+ }
270
+ }
271
+
272
+ this._clearLastError();
273
+ return { account: { uid: account.uid, nickname: account.nickname }, events };
274
+ }
275
+ }
276
+
277
+ module.exports = {
278
+ NeteaseMusicApiClient,
279
+ // Exported for tests.
280
+ weapiEncrypt,
281
+ aesEncrypt,
282
+ rsaEncrypt,
283
+ modpow,
284
+ };
@@ -1,13 +1,16 @@
1
1
  "use strict";
2
2
 
3
3
  /**
4
- * 网易云音乐 (NetEase Cloud Music) adapter — snapshot mode.
4
+ * 网易云音乐 (NetEase Cloud Music) adapter — snapshot + live cookie modes.
5
5
  *
6
- * Mirrors the social snapshot adapters (bilibili/douyin): a device-side
7
- * collector (Android in-app, or a desktop helper hitting the cookie web API
8
- * `/user/record` `/user/playlist`) writes a snapshot JSON; this adapter
9
- * ingests it. Schema is OUR contract, so normalize is fully testable and the
10
- * vault path is reliable regardless of how the bytes were captured.
6
+ * 两路互补:
7
+ * - snapshot 模式(inputPath):device-side collectorAndroid in-app)写
8
+ * 的快照 JSON;schema OUR contract,normalize 全可测、vault 路径稳定。
9
+ * - **live 模式(cookie,v0.2 接通)**:[NeteaseMusicApiClient.fetchSnapshot]
10
+ * 经标准 weapi 加密拉 `/weapi/v1/play/record`(听歌排行)+ `/weapi/user/playlist`
11
+ * (歌单),输出形状对齐 snapshot 故 normalize 不变。favorite(喜欢的歌)
12
+ * 需额外解 likelist+歌曲详情,留 snapshot 模式,live 暂不出。
13
+ * Schema 是 OUR contract,无论字节怎么采到 normalize 都一致。
11
14
  *
12
15
  * Snapshot schema (schemaVersion 1):
13
16
  * {
@@ -35,7 +38,7 @@ const {
35
38
  } = require("../../constants");
36
39
 
37
40
  const NAME = "netease-music";
38
- const VERSION = "0.1.0";
41
+ const VERSION = "0.2.0";
39
42
  const SNAPSHOT_SCHEMA_VERSION = 1;
40
43
 
41
44
  const KIND_PLAY = "play";
@@ -63,10 +66,15 @@ function stableOriginalId(kind, id) {
63
66
  class NeteaseMusicAdapter {
64
67
  constructor(opts = {}) {
65
68
  this._dataPath = opts.inputPath || null;
69
+ this._cookie = opts.cookie || null;
70
+ // Test seam: override how the live client is built per-sync (inject fetch).
71
+ this._apiClientFactory =
72
+ typeof opts.apiClientFactory === "function" ? opts.apiClientFactory : null;
66
73
  this.name = NAME;
67
74
  this.version = VERSION;
68
75
  this.capabilities = [
69
76
  "sync:snapshot",
77
+ "sync:cookie",
70
78
  "parse:netease-play",
71
79
  "parse:netease-favorite",
72
80
  "parse:netease-playlist",
@@ -102,7 +110,17 @@ class NeteaseMusicAdapter {
102
110
  }
103
111
  return { ok: true, mode: "snapshot-file" };
104
112
  }
105
- return { ok: false, reason: "NO_INPUT", message: "netease-music.authenticate: needs opts.inputPath" };
113
+ const cookie = (ctx && ctx.cookie) || this._cookie;
114
+ if (cookie) {
115
+ return /MUSIC_U=/.test(cookie)
116
+ ? { ok: true, mode: "cookie" }
117
+ : { ok: false, reason: "INVALID_COOKIE", message: "netease-music.authenticate: cookie 缺 MUSIC_U(未登录)" };
118
+ }
119
+ return {
120
+ ok: false,
121
+ reason: "NO_INPUT",
122
+ message: "netease-music.authenticate: needs opts.inputPath (snapshot) or opts.cookie (live weapi)",
123
+ };
106
124
  }
107
125
 
108
126
  async healthCheck() {
@@ -111,7 +129,16 @@ class NeteaseMusicAdapter {
111
129
 
112
130
  async *sync(opts = {}) {
113
131
  const inputPath = opts.inputPath || this._dataPath;
114
- if (!inputPath) throw new Error("netease-music.sync: needs opts.inputPath (snapshot JSON)");
132
+ if (!inputPath) {
133
+ const cookie = opts.cookie || this._cookie;
134
+ if (cookie) {
135
+ yield* this._syncViaCookie({ ...opts, cookie });
136
+ return;
137
+ }
138
+ throw new Error(
139
+ "netease-music.sync: needs opts.inputPath (snapshot JSON) or opts.cookie (live weapi fetch)",
140
+ );
141
+ }
115
142
  if (!this._deps.fs.existsSync(inputPath)) return;
116
143
  const snapshot = JSON.parse(this._deps.fs.readFileSync(inputPath, "utf-8"));
117
144
  if (!snapshot || snapshot.schemaVersion !== SNAPSHOT_SCHEMA_VERSION) {
@@ -144,6 +171,55 @@ class NeteaseMusicAdapter {
144
171
  }
145
172
  }
146
173
 
174
+ async *_syncViaCookie(opts) {
175
+ const client = this._apiClientFactory
176
+ ? this._apiClientFactory(opts)
177
+ : new (require("./api-client").NeteaseMusicApiClient)({
178
+ fetch: opts.fetch,
179
+ rand: opts.rand,
180
+ secKey: opts.secKey,
181
+ baseUrl: opts.baseUrl,
182
+ });
183
+ const emit = (phase, extra) => {
184
+ if (typeof opts.onProgress === "function") {
185
+ try {
186
+ opts.onProgress({ phase, adapter: NAME, ...extra });
187
+ } catch (_e) {
188
+ /* progress callback errors are best-effort */
189
+ }
190
+ }
191
+ };
192
+ const result = await client.fetchSnapshot(opts.cookie, {
193
+ include: opts.include || {},
194
+ recordType: opts.recordType,
195
+ playlistLimit: opts.playlistLimit,
196
+ });
197
+ if (result === null) {
198
+ const e = client.lastError;
199
+ throw new Error(`netease-music.sync (live): ${e.message || "fetch failed"} (code ${e.code})`);
200
+ }
201
+ const account = result.account || null;
202
+ emit("fetched", { count: result.events.length });
203
+ const capturedAt = Date.now();
204
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : Infinity;
205
+ const include = opts.include || {};
206
+ let emitted = 0;
207
+ for (const ev of result.events) {
208
+ if (emitted >= limit) return;
209
+ if (!ev || !VALID_KINDS.includes(ev.kind)) continue;
210
+ if (include[ev.kind] === false) continue;
211
+ const id = (typeof ev.id === "string" && ev.id) || ev.songId || ev.playlistId || null;
212
+ yield {
213
+ adapter: NAME,
214
+ kind: ev.kind,
215
+ originalId: stableOriginalId(ev.kind, id),
216
+ capturedAt,
217
+ payload: { ...ev, capturedAt, account },
218
+ };
219
+ emitted += 1;
220
+ }
221
+ }
222
+
147
223
  normalize(raw) {
148
224
  if (!raw || !raw.payload) throw new Error("NeteaseMusicAdapter.normalize: payload missing");
149
225
  const kind = raw.kind || raw.payload.kind;
@@ -537,6 +537,8 @@ function normalizeHistory(p, raw, ingestedAt) {
537
537
  awemeId,
538
538
  author,
539
539
  duration,
540
+ // Source surface from the local video_record.db (homepage_hot / etc.).
541
+ enterFrom: row.enterFrom || row.enter_from || p.enterFrom || null,
540
542
  },
541
543
  }],
542
544
  persons: [], places: [], items: [], topics: [],
@@ -0,0 +1,119 @@
1
+ /**
2
+ * AwemeDetailClient — resolves Douyin aweme (video) ids to human-readable
3
+ * metadata (desc / author / duration) so watch-history events show WHAT was
4
+ * watched, not just an id.
5
+ *
6
+ * Real-device finding 2026-06-11: the web detail endpoint
7
+ * https://www.douyin.com/aweme/v1/web/aweme/detail/?aweme_id=<id>
8
+ * &device_platform=webapp&aid=6383&channel=channel_pc_web
9
+ * returns HTTP 200 + full `aweme_detail` JSON (desc, author.nickname, duration,
10
+ * create_time) with **just a browser UA + Referer — no X-Bogus / cookie / msToken**
11
+ * for this guest request shape. So title resolution is a plain HTTP client, not a
12
+ * sign-bridge. (If Douyin later enforces signing here, this becomes the seam to
13
+ * route through a DouyinSignBridge — same pattern as toutiao/xhs.)
14
+ *
15
+ * Rate-friendly: dedups ids, caps per run, sleeps between calls, fails soft per
16
+ * id (an unresolved id just keeps "(no title)" — never aborts the sync).
17
+ */
18
+ "use strict";
19
+
20
+ const DEFAULT_BASE_URL = "https://www.douyin.com";
21
+ const BROWSER_HEADERS = Object.freeze({
22
+ "User-Agent":
23
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
24
+ "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
25
+ Referer: "https://www.douyin.com/",
26
+ "Accept-Language": "zh-CN,zh;q=0.9",
27
+ });
28
+
29
+ class AwemeDetailClient {
30
+ constructor(opts = {}) {
31
+ this.baseUrl = (opts.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, "");
32
+ this._fetch =
33
+ opts.fetch || (typeof globalThis.fetch === "function" ? globalThis.fetch : null);
34
+ this._sleep =
35
+ opts.sleep || ((ms) => new Promise((r) => setTimeout(r, ms)));
36
+ this.delayMs = Number.isFinite(opts.delayMs) ? opts.delayMs : 200;
37
+ this.lastErrorCode = 0;
38
+ this.lastErrorMessage = null;
39
+ }
40
+
41
+ _setErr(code, msg) {
42
+ this.lastErrorCode = code;
43
+ this.lastErrorMessage = msg;
44
+ }
45
+
46
+ /**
47
+ * Resolve one aweme id → {awemeId, desc, author, durationMs, createTime} or
48
+ * null on any error (sets lastError).
49
+ */
50
+ async fetchDetail(aid) {
51
+ if (typeof this._fetch !== "function") {
52
+ this._setErr(-2, "AwemeDetailClient: fetch not available — pass opts.fetch or run on Node 18+");
53
+ return null;
54
+ }
55
+ const url =
56
+ `${this.baseUrl}/aweme/v1/web/aweme/detail/?aweme_id=${encodeURIComponent(String(aid))}` +
57
+ `&device_platform=webapp&aid=6383&channel=channel_pc_web`;
58
+ let resp;
59
+ try {
60
+ resp = await this._fetch(url, { method: "GET", headers: { ...BROWSER_HEADERS } });
61
+ } catch (e) {
62
+ this._setErr(-4, "network: " + (e && e.message ? e.message : String(e)));
63
+ return null;
64
+ }
65
+ const body = await resp.text();
66
+ if (!resp.ok) {
67
+ this._setErr(resp.status, `HTTP ${resp.status}`);
68
+ return null;
69
+ }
70
+ let obj;
71
+ try {
72
+ obj = JSON.parse(body);
73
+ } catch (e) {
74
+ this._setErr(-3, "parse: " + (e && e.message ? e.message : String(e)));
75
+ return null;
76
+ }
77
+ const code = typeof obj.status_code === "number" ? obj.status_code : 0;
78
+ if (code !== 0) {
79
+ this._setErr(code, (obj.status_msg || `status_code=${code}`).toString());
80
+ return null;
81
+ }
82
+ const d = obj.aweme_detail;
83
+ if (!d || typeof d !== "object") {
84
+ this._setErr(-5, "no aweme_detail (deleted/private video?)");
85
+ return null;
86
+ }
87
+ this._setErr(0, null);
88
+ return {
89
+ awemeId: String(aid),
90
+ desc: d.desc || null,
91
+ author: (d.author && d.author.nickname) || null,
92
+ durationMs: Number.isFinite(d.duration) ? d.duration : null,
93
+ createTime: Number.isFinite(d.create_time) ? d.create_time : null,
94
+ };
95
+ }
96
+
97
+ /**
98
+ * Resolve many ids → Map<aid, detail>. Dedups, caps at `limit`, sleeps
99
+ * `delayMs` between calls. Per-id failures are skipped (not in the map).
100
+ * @param {string[]} aids
101
+ * @param {{limit?: number}} [opts]
102
+ */
103
+ async resolveMany(aids, opts = {}) {
104
+ const uniq = [...new Set((aids || []).map(String))];
105
+ const cap = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : uniq.length;
106
+ const out = new Map();
107
+ let n = 0;
108
+ for (const aid of uniq) {
109
+ if (n >= cap) break;
110
+ const d = await this.fetchDetail(aid);
111
+ n += 1;
112
+ if (d) out.set(aid, d);
113
+ if (this.delayMs > 0 && n < cap) await this._sleep(this.delayMs);
114
+ }
115
+ return out;
116
+ }
117
+ }
118
+
119
+ module.exports = { AwemeDetailClient, BROWSER_HEADERS };
@@ -159,7 +159,121 @@ async function collectAndSync(bridge, registry, opts = {}) {
159
159
  };
160
160
  }
161
161
 
162
+ // ── Watch-history (video_record.db) path ─────────────────────────────────
163
+ // Distinct from the IM-db path above: pulls the plaintext video_record.db and
164
+ // emits `history` events (KIND_HISTORY → BROWSE) the social-douyin adapter
165
+ // already normalizes. No X-Bogus, no SQLCipher — the durable "what/when the
166
+ // user watched" signal. See watch-history-reader.js.
167
+ const DOUYIN_SNAPSHOT_SCHEMA_VERSION = 1;
168
+
169
+ async function collectWatchHistory(bridge, opts = {}) {
170
+ if (!bridge || typeof bridge.invoke !== "function") {
171
+ throw new TypeError(
172
+ "DouyinAdbCollector.collectWatchHistory: bridge must expose invoke(method, params)",
173
+ );
174
+ }
175
+ const now = opts.now || Date.now;
176
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 2000;
177
+ const res = await bridge.invoke("douyin.watch-history", { limit });
178
+ if (!res || !Array.isArray(res.records)) {
179
+ throw new Error(
180
+ "DouyinAdbCollector.collectWatchHistory: bridge.invoke('douyin.watch-history') returned malformed payload",
181
+ );
182
+ }
183
+ const uid = res.uid || opts.uid || null;
184
+ const events = [];
185
+ for (const r of res.records) {
186
+ if (!r || !r.awemeId) continue;
187
+ events.push({
188
+ kind: "history",
189
+ id: `history-${r.awemeId}-${r.capturedAt || ""}`,
190
+ capturedAt: r.capturedAt || now(),
191
+ awemeId: r.awemeId,
192
+ enterFrom: r.enterFrom || null,
193
+ });
194
+ }
195
+
196
+ // Optional title enrichment: resolve aweme ids → desc/author/duration via the
197
+ // web detail endpoint (plain HTTP, no signing) so events show WHAT was watched.
198
+ // Capped + dedup'd + fail-soft; an unresolved id just keeps "(no title)".
199
+ let titlesResolved = 0;
200
+ if (opts.resolveTitles && events.length > 0) {
201
+ const client =
202
+ opts._detailClient ||
203
+ new (require("./aweme-detail-client").AwemeDetailClient)({
204
+ fetch: opts.fetch,
205
+ delayMs: opts.titleDelayMs,
206
+ });
207
+ // Resolve most-recent first (events come back DESC by view time).
208
+ const titles = await client.resolveMany(
209
+ events.map((e) => e.awemeId),
210
+ { limit: Number.isInteger(opts.titleLimit) && opts.titleLimit > 0 ? opts.titleLimit : 60 },
211
+ );
212
+ for (const e of events) {
213
+ const t = titles.get(e.awemeId);
214
+ if (t) {
215
+ // normalizeHistory reads title/author/duration off the snapshot event.
216
+ e.title = t.desc;
217
+ e.author = t.author;
218
+ e.duration = t.durationMs;
219
+ titlesResolved += 1;
220
+ }
221
+ }
222
+ }
223
+
224
+ const snapshot = {
225
+ schemaVersion: DOUYIN_SNAPSHOT_SCHEMA_VERSION,
226
+ snapshottedAt: now(),
227
+ account: {
228
+ ...(uid ? { shortId: String(uid) } : {}),
229
+ displayName: opts.displayName,
230
+ },
231
+ events,
232
+ };
233
+ const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
234
+ return {
235
+ snapshotPath,
236
+ uid,
237
+ eventCounts: { history: events.length, total: events.length },
238
+ titlesResolved,
239
+ };
240
+ }
241
+
242
+ async function collectWatchHistoryAndSync(bridge, registry, opts = {}) {
243
+ if (!registry || typeof registry.syncAdapter !== "function") {
244
+ throw new TypeError(
245
+ "DouyinAdbCollector.collectWatchHistoryAndSync: registry must expose syncAdapter(name, options)",
246
+ );
247
+ }
248
+ const collectResult = await collectWatchHistory(bridge, opts);
249
+ let syncReport = null;
250
+ let cleanupFailed = false;
251
+ try {
252
+ syncReport = await registry.syncAdapter("social-douyin", {
253
+ inputPath: collectResult.snapshotPath,
254
+ });
255
+ } finally {
256
+ try {
257
+ cleanupSnapshotJson(collectResult.snapshotPath);
258
+ } catch (_e) {
259
+ cleanupFailed = true;
260
+ }
261
+ }
262
+ return {
263
+ ...syncReport,
264
+ douyin: {
265
+ uid: collectResult.uid,
266
+ eventCounts: collectResult.eventCounts,
267
+ titlesResolved: collectResult.titlesResolved || 0,
268
+ mode: "watch-history",
269
+ cleanupFailed,
270
+ },
271
+ };
272
+ }
273
+
162
274
  module.exports = {
163
275
  collect,
164
276
  collectAndSync,
277
+ collectWatchHistory,
278
+ collectWatchHistoryAndSync,
165
279
  };