@chainlesschain/personal-data-hub 0.4.4 → 0.4.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/__tests__/adapters/edu-huawei-learning-live.test.js +198 -0
  2. package/__tests__/adapters/edu-zuoyebang-live.test.js +226 -0
  3. package/__tests__/adapters/family-23-collectors-scaffold.test.js +5 -1
  4. package/__tests__/adapters/finance-alipay-live.test.js +258 -0
  5. package/__tests__/adapters/game-genshin-live.test.js +238 -0
  6. package/__tests__/adapters/game-genshin-scaffold.test.js +4 -3
  7. package/__tests__/adapters/game-honor-of-kings-live.test.js +230 -0
  8. package/__tests__/adapters/netease-music-live.test.js +244 -0
  9. package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +165 -0
  10. package/__tests__/adapters/social-douyin-adb-watch-history.test.js +192 -0
  11. package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +135 -0
  12. package/__tests__/adapters/social-toutiao-adb-api-client.test.js +89 -0
  13. package/__tests__/adapters/social-toutiao-adb-collector.test.js +95 -2
  14. package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +30 -0
  15. package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
  16. package/__tests__/shopping-pinduoduo-snapshot.test.js +182 -0
  17. package/lib/adapters/_live-json-helpers.js +50 -0
  18. package/lib/adapters/edu-huawei-learning/api-client.js +178 -5
  19. package/lib/adapters/edu-huawei-learning/index.js +83 -9
  20. package/lib/adapters/edu-zuoyebang/api-client.js +181 -6
  21. package/lib/adapters/edu-zuoyebang/index.js +83 -9
  22. package/lib/adapters/finance-alipay/api-client.js +268 -6
  23. package/lib/adapters/finance-alipay/index.js +85 -9
  24. package/lib/adapters/game-genshin/api-client.js +207 -6
  25. package/lib/adapters/game-genshin/index.js +90 -9
  26. package/lib/adapters/game-honor-of-kings/api-client.js +235 -12
  27. package/lib/adapters/game-honor-of-kings/index.js +80 -9
  28. package/lib/adapters/netease-music/api-client.js +284 -0
  29. package/lib/adapters/netease-music/index.js +85 -9
  30. package/lib/adapters/shopping-pinduoduo/index.js +241 -33
  31. package/lib/adapters/social-douyin/index.js +2 -0
  32. package/lib/adapters/social-douyin-adb/aweme-detail-client.js +119 -0
  33. package/lib/adapters/social-douyin-adb/collector.js +114 -0
  34. package/lib/adapters/social-douyin-adb/index.js +18 -1
  35. package/lib/adapters/social-douyin-adb/watch-history-reader.js +188 -0
  36. package/lib/adapters/social-toutiao-adb/account-reader.js +179 -0
  37. package/lib/adapters/social-toutiao-adb/api-client.js +41 -17
  38. package/lib/adapters/social-toutiao-adb/collector.js +55 -19
  39. package/lib/adapters/social-toutiao-adb/cookies-extension.js +21 -1
  40. package/lib/adapters/social-toutiao-adb/index.js +6 -0
  41. package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +19 -1
  42. package/lib/index.js +1 -1
  43. package/package.json +1 -1
@@ -1,29 +1,32 @@
1
1
  /**
2
- * §2.4c 购物三联 v0.2 — Pinduoduo (拼多多) adapter, snapshot-only.
2
+ * §2.4c 购物三联 — Pinduoduo (拼多多) adapter, dual-mode (snapshot + cookie-api).
3
3
  *
4
- * Mirror of shopping-jd / shopping-meituan snapshot-mode pattern, **but
5
- * without a cookie-mode fallback** because:
4
+ * v0.3 brings 拼多多 to parity with shopping-taobao / shopping-jd /
5
+ * shopping-meituan by adding a cookie-api fetch path alongside the existing
6
+ * snapshot ingest. As with the other shopping adapters the actual HTTP call is
7
+ * delegated to an injected `fetchFn` (the Android in-APK cc uses OkHttp; the
8
+ * desktop hub uses an Electron WebView net request) so this module stays a
9
+ * pure-Node parser + orchestrator.
6
10
  *
7
- * 1. mobile.yangkeduo.com web endpoint `/proxy/api/galerie/transaction/
8
- * transaction_list` requires `anti_token` signing computed by client-side
9
- * JS (similar to 抖音 X-Bogus). No pure-Node implementation survives
10
- * pinduoduo's monthly anti_token rotation.
11
- * 2. Pinduoduo Android app has no built-in "export orders" feature, so
12
- * there's no SAF source-format to parse directly either.
11
+ * 1. snapshot mode (opts.inputPath): ingest a snapshot JSON produced by a
12
+ * browser extension / hand-roll (stateless account OPTIONAL).
13
13
  *
14
- * v0.2 deliverable = **scaffold + snapshot-mode JSON ingest**. User-facing
15
- * paths for producing the snapshot JSON:
14
+ * 2. cookie-api mode (opts.account.cookies): fetch
15
+ * `mobile.yangkeduo.com/proxy/api/galerie/transaction/transaction_list`
16
+ * via the injected `fetchFn`, paginating with the `pageNumber` cursor and
17
+ * stopping at the `sinceWatermark`. account.uid REQUIRED in this mode.
16
18
  *
17
- * a) Browser extension (planned v0.3) that scrapes yangkeduo.com order
18
- * pages while logged in and exports JSON matching this schema.
19
- * b) Manual hand-roll (rare; for testing).
19
+ * ── anti_token signing seam ──────────────────────────────────────────────
20
+ * Pinduoduo's transaction_list requires an `anti_token` (a.k.a.
21
+ * `anti-content`) computed by client-side JS analogous to 抖音 X-Bogus.
22
+ * No pure-Node implementation survives pinduoduo's anti_token rotation, so
23
+ * the signing itself is injected via `opts.signProvider` (or constructor
24
+ * `signProvider`). On Android the in-APK WebView JS VM produces the token;
25
+ * in tests a stub returns a fixed value. When no signProvider is configured
26
+ * the request is still issued with `antiToken: null` — best-effort, the
27
+ * endpoint may 403, which surfaces as zero events rather than a crash.
20
28
  *
21
- * UI surface: pinduoduo card appears alongside alipay/taobao/jd/meituan in
22
- * 推文 §"支付与购物" 大类, with an explicit "v0.2 待用户导出 — 需 web
23
- * extension 或手抄" banner so user knows the limitation.
24
- *
25
- * Snapshot schema (mirrors PinduoduoLocalCollector.SNAPSHOT_SCHEMA_VERSION
26
- * once the Kotlin collector lands in v0.3+):
29
+ * Snapshot schema (mirrors PinduoduoLocalCollector.SNAPSHOT_SCHEMA_VERSION):
27
30
  *
28
31
  * {
29
32
  * "schemaVersion": 1,
@@ -45,33 +48,46 @@
45
48
  * ]
46
49
  * }
47
50
  *
48
- * Future v0.3: HTML parsing (`Save As Webpage` from `mobile.yangkeduo.com/
51
+ * Future v0.4: HTML parsing (`Save As Webpage` from `mobile.yangkeduo.com/
49
52
  * users/orders.html` — pinduoduo's order list endpoint).
50
53
  */
51
54
 
52
55
  "use strict";
53
56
 
54
57
  const fs = require("node:fs");
55
- const { normalizeOrderRecord } = require("../shopping-base");
58
+ const { normalizeOrderRecord, CookieAuth } = require("../shopping-base");
56
59
 
57
60
  const NAME = "shopping-pinduoduo";
58
- const VERSION = "0.1.0";
61
+ const VERSION = "0.2.0";
59
62
  const SNAPSHOT_SCHEMA_VERSION = 1;
60
63
 
61
64
  const KIND_ORDER = "order";
62
65
  const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_ORDER]);
63
66
 
67
+ const PINDUODUO_ORDERS_URL =
68
+ "https://mobile.yangkeduo.com/proxy/api/galerie/transaction/transaction_list";
69
+
64
70
  class PinduoduoAdapter {
65
71
  constructor(opts = {}) {
66
- // §2.4c v0.2: account is OPTIONAL — snapshot mode is stateless. There's
67
- // no cookie mode at all (anti_token signing path deferred to v0.3+).
72
+ // §2.4c: account is OPTIONAL — snapshot mode is stateless. Cookie-api mode
73
+ // activates only when account.cookies is supplied; account.uid is then
74
+ // required (checked at sync time).
68
75
  this.account = opts.account || null;
76
+ this._cookieAuth =
77
+ opts.account && opts.account.cookies
78
+ ? new CookieAuth({ platform: "pinduoduo", cookies: opts.account.cookies })
79
+ : null;
80
+ this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
81
+ // anti_token signing seam — see file header. Async fn({ url, query,
82
+ // cookies }) → string|null. When absent, requests carry antiToken: null.
83
+ this._signProvider =
84
+ typeof opts.signProvider === "function" ? opts.signProvider : null;
69
85
 
70
86
  this.name = NAME;
71
87
  this.version = VERSION;
72
- this.capabilities = ["sync:snapshot", "parse:pinduoduo-orders"];
73
- this.extractMode = "user-export";
74
- this.rateLimits = {};
88
+ this.capabilities = ["sync:snapshot", "sync:cookie-api", "parse:pinduoduo-orders"];
89
+ this.extractMode = "web-api";
90
+ this.rateLimits = { perMinute: 8, perDay: 200 };
75
91
  this.dataDisclosure = {
76
92
  fields: [
77
93
  "pinduoduo:order_sn / mall_name / goods_list / order_amount / address",
@@ -99,15 +115,33 @@ class PinduoduoAdapter {
99
115
  }
100
116
  return { ok: true, mode: "snapshot-file" };
101
117
  }
118
+ if (this._cookieAuth) {
119
+ const ok = await this._cookieAuth.validate();
120
+ if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
121
+ if (!this.account || !this.account.uid) {
122
+ return {
123
+ ok: false,
124
+ reason: "NO_ACCOUNT_UID",
125
+ message: "cookie-api mode requires account.uid",
126
+ };
127
+ }
128
+ return { ok: true, account: this.account.uid, mode: "cookie" };
129
+ }
102
130
  return {
103
131
  ok: false,
104
132
  reason: "NO_INPUT",
105
133
  message:
106
- "PinduoduoAdapter.authenticate: needs opts.inputPath (snapshot mode no cookie mode in v0.2)",
134
+ "PinduoduoAdapter.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode anti_token signing via signProvider)",
107
135
  };
108
136
  }
109
137
 
110
138
  async healthCheck() {
139
+ if (this._cookieAuth) {
140
+ const r = await this.authenticate();
141
+ return r.ok
142
+ ? { ok: true, lastChecked: Date.now() }
143
+ : { ok: false, reason: r.reason, error: r.error };
144
+ }
111
145
  return { ok: true, lastChecked: Date.now() };
112
146
  }
113
147
 
@@ -116,21 +150,25 @@ class PinduoduoAdapter {
116
150
  yield* this._syncViaSnapshot(opts);
117
151
  return;
118
152
  }
153
+ if (this._cookieAuth) {
154
+ yield* this._syncViaCookie(opts);
155
+ return;
156
+ }
119
157
  throw new Error(
120
- "PinduoduoAdapter.sync: needs opts.inputPath (snapshot mode; no cookie/api mode in v0.2 because pinduoduo's web API requires anti_token JS-VM signing)",
158
+ "PinduoduoAdapter.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode; pinduoduo's web API requires anti_token signing supplied via opts.signProvider)",
121
159
  );
122
160
  }
123
161
 
124
162
  async *_syncViaSnapshot(opts) {
125
163
  const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
126
164
  // v0.2 explicit JSON-only. HTML parsing (SAF-exported webpage from
127
- // yangkeduo.com order list) is future v0.3 work.
165
+ // yangkeduo.com order list) is future v0.4 work.
128
166
  let snapshot;
129
167
  try {
130
168
  snapshot = JSON.parse(raw);
131
169
  } catch (err) {
132
170
  throw new Error(
133
- `shopping-pinduoduo.sync: snapshot must be JSON (v0.3 will add HTML parsing). Got parse error: ${err.message}`,
171
+ `shopping-pinduoduo.sync: snapshot must be JSON (v0.4 will add HTML parsing). Got parse error: ${err.message}`,
134
172
  );
135
173
  }
136
174
  if (
@@ -184,11 +222,77 @@ class PinduoduoAdapter {
184
222
  }
185
223
  }
186
224
 
225
+ async *_syncViaCookie(opts = {}) {
226
+ if (!this.account || !this.account.uid) {
227
+ throw new Error(
228
+ "PinduoduoAdapter._syncViaCookie: account.uid required (set via new PinduoduoAdapter({ account: { uid, cookies } }))",
229
+ );
230
+ }
231
+ if (!(await this._cookieAuth.validate())) return;
232
+ const sinceMs =
233
+ opts.sinceWatermark != null
234
+ ? parseInt(String(opts.sinceWatermark), 10) || 0
235
+ : Date.now() - 365 * 24 * 3600_000; // default last year
236
+ const pageSize = Number.isFinite(opts.pageSize) ? opts.pageSize : 10;
237
+ const include = opts.include || {};
238
+ if (include[KIND_ORDER] === false) return;
239
+
240
+ let pageNumber = 1;
241
+ while (true) {
242
+ const query = { pageNumber, pageSize, ts: Date.now() };
243
+ // anti_token signing seam — best-effort. null when no signProvider.
244
+ let antiToken = null;
245
+ if (this._signProvider) {
246
+ antiToken = await this._signProvider({
247
+ url: PINDUODUO_ORDERS_URL,
248
+ query,
249
+ cookies: this._cookieAuth.toHeader(),
250
+ });
251
+ }
252
+ const resp = await this._fetchFn({
253
+ url: PINDUODUO_ORDERS_URL,
254
+ cookies: this._cookieAuth.toHeader(),
255
+ antiToken,
256
+ query,
257
+ });
258
+ const orders = extractOrders(resp);
259
+ if (!orders.length) break;
260
+ let pageHasNew = false;
261
+ let reachedWatermark = false;
262
+ for (const raw of orders) {
263
+ const rec = orderToRecord(raw);
264
+ if (!rec) continue;
265
+ if (rec.placedAt && rec.placedAt < sinceMs) {
266
+ reachedWatermark = true; // everything from here on is older
267
+ break;
268
+ }
269
+ pageHasNew = true;
270
+ yield {
271
+ adapter: NAME,
272
+ originalId: rec.orderId,
273
+ capturedAt: rec.paidAt || rec.placedAt || Date.now(),
274
+ payload: { record: rec },
275
+ };
276
+ }
277
+ // Stop once we've crossed the watermark, drained the page, or the page
278
+ // came back short (last page).
279
+ if (reachedWatermark || !pageHasNew || orders.length < pageSize) break;
280
+ pageNumber += 1;
281
+ }
282
+ }
283
+
187
284
  normalize(raw) {
188
285
  if (!raw || !raw.payload) {
189
286
  throw new Error("PinduoduoAdapter.normalize: payload missing");
190
287
  }
191
- // Snapshot-mode only payload carries fields directly on the event.
288
+ // Cookie-api mode wraps a normalized record under payload.record; snapshot
289
+ // mode carries the raw event fields directly on the payload.
290
+ if (raw.payload.record) {
291
+ return normalizeOrderRecord(raw.payload.record, {
292
+ adapterName: NAME,
293
+ adapterVersion: VERSION,
294
+ });
295
+ }
192
296
  const rec = snapshotEventToRecord(raw.payload);
193
297
  return normalizeOrderRecord(rec, {
194
298
  adapterName: NAME,
@@ -208,6 +312,104 @@ function stableOriginalId(kind, id) {
208
312
  return `pinduoduo:${kind}:${safe}`;
209
313
  }
210
314
 
315
+ /**
316
+ * Pull the order array out of a transaction_list response. Pinduoduo nests it
317
+ * under different keys across endpoint versions; the injected fetchFn may also
318
+ * pre-flatten to `{ orders }`. Tolerant of all common shapes.
319
+ */
320
+ function extractOrders(resp) {
321
+ if (!resp || typeof resp !== "object") return [];
322
+ if (Array.isArray(resp.orders)) return resp.orders;
323
+ if (Array.isArray(resp.order_list)) return resp.order_list;
324
+ if (Array.isArray(resp.list)) return resp.list;
325
+ if (resp.result && Array.isArray(resp.result.order_list)) return resp.result.order_list;
326
+ if (resp.result && Array.isArray(resp.result.list)) return resp.result.list;
327
+ return [];
328
+ }
329
+
330
+ /**
331
+ * Map one pinduoduo transaction_list order object → vendor-neutral OrderRecord.
332
+ * Pinduoduo amounts are in 分 (cents); converted to 元 here. Field names are
333
+ * best-effort across endpoint versions (camelCase + snake_case fallbacks).
334
+ */
335
+ function orderToRecord(o) {
336
+ if (!o || typeof o !== "object") return null;
337
+ const orderId = o.order_sn || o.orderSn || o.orderId || o.id;
338
+ if (!orderId) return null;
339
+ const merchant = o.mall_name || o.mallName || o.merchantName || o.shop_name || "拼多多";
340
+
341
+ const items = [];
342
+ const rawItems = o.goods_list || o.order_goods || o.goodsList || o.items || [];
343
+ for (const it of Array.isArray(rawItems) ? rawItems : []) {
344
+ if (!it) continue;
345
+ items.push({
346
+ name: it.goods_name || it.goodsName || it.name || it.skuName,
347
+ quantity: parseInt(it.goods_number || it.goods_count || it.quantity || 1, 10),
348
+ unitPrice: centsToYuan(it.goods_price || it.goodsPrice || it.unitPrice || 0),
349
+ sku: it.sku_id || it.skuId || it.goods_id || it.sku || null,
350
+ });
351
+ }
352
+
353
+ return {
354
+ vendorId: "pinduoduo",
355
+ orderId: String(orderId),
356
+ placedAt: parseTime(o.order_time || o.create_at || o.createAt || o.order_create_at),
357
+ paidAt: parseTime(o.pay_time || o.payTime || o.group_order_pay_time),
358
+ status: mapStatus(pickStatusText(o)),
359
+ merchantName: merchant,
360
+ totalAmount: {
361
+ value: centsToYuan(o.order_amount || o.orderAmount || o.pay_amount || o.total_amount || 0),
362
+ currency: "CNY",
363
+ },
364
+ items,
365
+ recipient: o.receive_name || o.receiver || o.recipient || null,
366
+ shippingAddress: o.address || o.receive_address || o.shippingAddress || null,
367
+ trackingNumber: o.tracking_number || o.waybill_no || o.trackingNumber || null,
368
+ extras: { capturedBy: "cookie-api", platform: "pinduoduo" },
369
+ };
370
+ }
371
+
372
+ /**
373
+ * Pinduoduo carries a human-readable status under several keys; prefer text
374
+ * over the numeric `order_status` code so mapStatus's keyword match works.
375
+ */
376
+ function pickStatusText(o) {
377
+ const text =
378
+ o.order_status_prompt ||
379
+ o.orderStatusPrompt ||
380
+ o.status_prompt ||
381
+ o.statusPrompt ||
382
+ o.status_desc ||
383
+ null;
384
+ if (text) return text;
385
+ // Fall back to the numeric order_status code (best-effort PDD mapping).
386
+ const code = o.order_status != null ? o.order_status : o.orderStatus;
387
+ switch (Number(code)) {
388
+ case 1:
389
+ return "待付款";
390
+ case 2:
391
+ return "待发货";
392
+ case 3:
393
+ return "已发货";
394
+ case 4:
395
+ return "已完成";
396
+ case 5:
397
+ case 6:
398
+ return "已关闭";
399
+ default:
400
+ return o.status != null ? String(o.status) : "";
401
+ }
402
+ }
403
+
404
+ function centsToYuan(v) {
405
+ const n = Number(v);
406
+ if (!Number.isFinite(n)) return 0;
407
+ // Snapshot/test inputs may already be 元 with a decimal point; treat any
408
+ // non-integer as 元, integers as 分.
409
+ if (!Number.isInteger(n)) return n;
410
+ return Math.round(n) / 100;
411
+ }
412
+
211
413
  function snapshotEventToRecord(ev) {
212
414
  const items = [];
213
415
  const rawItems = Array.isArray(ev.items) ? ev.items : [];
@@ -266,8 +468,14 @@ function mapStatus(s) {
266
468
  return "placed";
267
469
  }
268
470
 
471
+ async function defaultFetch(_opts) {
472
+ throw new Error("PinduoduoAdapter: no fetchFn configured");
473
+ }
474
+
269
475
  module.exports = {
270
476
  PinduoduoAdapter,
477
+ orderToRecord,
478
+ extractOrders,
271
479
  NAME,
272
480
  VERSION,
273
481
  SNAPSHOT_SCHEMA_VERSION,
@@ -537,6 +537,8 @@ function normalizeHistory(p, raw, ingestedAt) {
537
537
  awemeId,
538
538
  author,
539
539
  duration,
540
+ // Source surface from the local video_record.db (homepage_hot / etc.).
541
+ enterFrom: row.enterFrom || row.enter_from || p.enterFrom || null,
540
542
  },
541
543
  }],
542
544
  persons: [], places: [], items: [], topics: [],
@@ -0,0 +1,119 @@
1
+ /**
2
+ * AwemeDetailClient — resolves Douyin aweme (video) ids to human-readable
3
+ * metadata (desc / author / duration) so watch-history events show WHAT was
4
+ * watched, not just an id.
5
+ *
6
+ * Real-device finding 2026-06-11: the web detail endpoint
7
+ * https://www.douyin.com/aweme/v1/web/aweme/detail/?aweme_id=<id>
8
+ * &device_platform=webapp&aid=6383&channel=channel_pc_web
9
+ * returns HTTP 200 + full `aweme_detail` JSON (desc, author.nickname, duration,
10
+ * create_time) with **just a browser UA + Referer — no X-Bogus / cookie / msToken**
11
+ * for this guest request shape. So title resolution is a plain HTTP client, not a
12
+ * sign-bridge. (If Douyin later enforces signing here, this becomes the seam to
13
+ * route through a DouyinSignBridge — same pattern as toutiao/xhs.)
14
+ *
15
+ * Rate-friendly: dedups ids, caps per run, sleeps between calls, fails soft per
16
+ * id (an unresolved id just keeps "(no title)" — never aborts the sync).
17
+ */
18
+ "use strict";
19
+
20
+ const DEFAULT_BASE_URL = "https://www.douyin.com";
21
+ const BROWSER_HEADERS = Object.freeze({
22
+ "User-Agent":
23
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
24
+ "(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
25
+ Referer: "https://www.douyin.com/",
26
+ "Accept-Language": "zh-CN,zh;q=0.9",
27
+ });
28
+
29
+ class AwemeDetailClient {
30
+ constructor(opts = {}) {
31
+ this.baseUrl = (opts.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, "");
32
+ this._fetch =
33
+ opts.fetch || (typeof globalThis.fetch === "function" ? globalThis.fetch : null);
34
+ this._sleep =
35
+ opts.sleep || ((ms) => new Promise((r) => setTimeout(r, ms)));
36
+ this.delayMs = Number.isFinite(opts.delayMs) ? opts.delayMs : 200;
37
+ this.lastErrorCode = 0;
38
+ this.lastErrorMessage = null;
39
+ }
40
+
41
+ _setErr(code, msg) {
42
+ this.lastErrorCode = code;
43
+ this.lastErrorMessage = msg;
44
+ }
45
+
46
+ /**
47
+ * Resolve one aweme id → {awemeId, desc, author, durationMs, createTime} or
48
+ * null on any error (sets lastError).
49
+ */
50
+ async fetchDetail(aid) {
51
+ if (typeof this._fetch !== "function") {
52
+ this._setErr(-2, "AwemeDetailClient: fetch not available — pass opts.fetch or run on Node 18+");
53
+ return null;
54
+ }
55
+ const url =
56
+ `${this.baseUrl}/aweme/v1/web/aweme/detail/?aweme_id=${encodeURIComponent(String(aid))}` +
57
+ `&device_platform=webapp&aid=6383&channel=channel_pc_web`;
58
+ let resp;
59
+ try {
60
+ resp = await this._fetch(url, { method: "GET", headers: { ...BROWSER_HEADERS } });
61
+ } catch (e) {
62
+ this._setErr(-4, "network: " + (e && e.message ? e.message : String(e)));
63
+ return null;
64
+ }
65
+ const body = await resp.text();
66
+ if (!resp.ok) {
67
+ this._setErr(resp.status, `HTTP ${resp.status}`);
68
+ return null;
69
+ }
70
+ let obj;
71
+ try {
72
+ obj = JSON.parse(body);
73
+ } catch (e) {
74
+ this._setErr(-3, "parse: " + (e && e.message ? e.message : String(e)));
75
+ return null;
76
+ }
77
+ const code = typeof obj.status_code === "number" ? obj.status_code : 0;
78
+ if (code !== 0) {
79
+ this._setErr(code, (obj.status_msg || `status_code=${code}`).toString());
80
+ return null;
81
+ }
82
+ const d = obj.aweme_detail;
83
+ if (!d || typeof d !== "object") {
84
+ this._setErr(-5, "no aweme_detail (deleted/private video?)");
85
+ return null;
86
+ }
87
+ this._setErr(0, null);
88
+ return {
89
+ awemeId: String(aid),
90
+ desc: d.desc || null,
91
+ author: (d.author && d.author.nickname) || null,
92
+ durationMs: Number.isFinite(d.duration) ? d.duration : null,
93
+ createTime: Number.isFinite(d.create_time) ? d.create_time : null,
94
+ };
95
+ }
96
+
97
+ /**
98
+ * Resolve many ids → Map<aid, detail>. Dedups, caps at `limit`, sleeps
99
+ * `delayMs` between calls. Per-id failures are skipped (not in the map).
100
+ * @param {string[]} aids
101
+ * @param {{limit?: number}} [opts]
102
+ */
103
+ async resolveMany(aids, opts = {}) {
104
+ const uniq = [...new Set((aids || []).map(String))];
105
+ const cap = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : uniq.length;
106
+ const out = new Map();
107
+ let n = 0;
108
+ for (const aid of uniq) {
109
+ if (n >= cap) break;
110
+ const d = await this.fetchDetail(aid);
111
+ n += 1;
112
+ if (d) out.set(aid, d);
113
+ if (this.delayMs > 0 && n < cap) await this._sleep(this.delayMs);
114
+ }
115
+ return out;
116
+ }
117
+ }
118
+
119
+ module.exports = { AwemeDetailClient, BROWSER_HEADERS };
@@ -159,7 +159,121 @@ async function collectAndSync(bridge, registry, opts = {}) {
159
159
  };
160
160
  }
161
161
 
162
+ // ── Watch-history (video_record.db) path ─────────────────────────────────
163
+ // Distinct from the IM-db path above: pulls the plaintext video_record.db and
164
+ // emits `history` events (KIND_HISTORY → BROWSE) the social-douyin adapter
165
+ // already normalizes. No X-Bogus, no SQLCipher — the durable "what/when the
166
+ // user watched" signal. See watch-history-reader.js.
167
+ const DOUYIN_SNAPSHOT_SCHEMA_VERSION = 1;
168
+
169
+ async function collectWatchHistory(bridge, opts = {}) {
170
+ if (!bridge || typeof bridge.invoke !== "function") {
171
+ throw new TypeError(
172
+ "DouyinAdbCollector.collectWatchHistory: bridge must expose invoke(method, params)",
173
+ );
174
+ }
175
+ const now = opts.now || Date.now;
176
+ const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 2000;
177
+ const res = await bridge.invoke("douyin.watch-history", { limit });
178
+ if (!res || !Array.isArray(res.records)) {
179
+ throw new Error(
180
+ "DouyinAdbCollector.collectWatchHistory: bridge.invoke('douyin.watch-history') returned malformed payload",
181
+ );
182
+ }
183
+ const uid = res.uid || opts.uid || null;
184
+ const events = [];
185
+ for (const r of res.records) {
186
+ if (!r || !r.awemeId) continue;
187
+ events.push({
188
+ kind: "history",
189
+ id: `history-${r.awemeId}-${r.capturedAt || ""}`,
190
+ capturedAt: r.capturedAt || now(),
191
+ awemeId: r.awemeId,
192
+ enterFrom: r.enterFrom || null,
193
+ });
194
+ }
195
+
196
+ // Optional title enrichment: resolve aweme ids → desc/author/duration via the
197
+ // web detail endpoint (plain HTTP, no signing) so events show WHAT was watched.
198
+ // Capped + dedup'd + fail-soft; an unresolved id just keeps "(no title)".
199
+ let titlesResolved = 0;
200
+ if (opts.resolveTitles && events.length > 0) {
201
+ const client =
202
+ opts._detailClient ||
203
+ new (require("./aweme-detail-client").AwemeDetailClient)({
204
+ fetch: opts.fetch,
205
+ delayMs: opts.titleDelayMs,
206
+ });
207
+ // Resolve most-recent first (events come back DESC by view time).
208
+ const titles = await client.resolveMany(
209
+ events.map((e) => e.awemeId),
210
+ { limit: Number.isInteger(opts.titleLimit) && opts.titleLimit > 0 ? opts.titleLimit : 60 },
211
+ );
212
+ for (const e of events) {
213
+ const t = titles.get(e.awemeId);
214
+ if (t) {
215
+ // normalizeHistory reads title/author/duration off the snapshot event.
216
+ e.title = t.desc;
217
+ e.author = t.author;
218
+ e.duration = t.durationMs;
219
+ titlesResolved += 1;
220
+ }
221
+ }
222
+ }
223
+
224
+ const snapshot = {
225
+ schemaVersion: DOUYIN_SNAPSHOT_SCHEMA_VERSION,
226
+ snapshottedAt: now(),
227
+ account: {
228
+ ...(uid ? { shortId: String(uid) } : {}),
229
+ displayName: opts.displayName,
230
+ },
231
+ events,
232
+ };
233
+ const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
234
+ return {
235
+ snapshotPath,
236
+ uid,
237
+ eventCounts: { history: events.length, total: events.length },
238
+ titlesResolved,
239
+ };
240
+ }
241
+
242
+ async function collectWatchHistoryAndSync(bridge, registry, opts = {}) {
243
+ if (!registry || typeof registry.syncAdapter !== "function") {
244
+ throw new TypeError(
245
+ "DouyinAdbCollector.collectWatchHistoryAndSync: registry must expose syncAdapter(name, options)",
246
+ );
247
+ }
248
+ const collectResult = await collectWatchHistory(bridge, opts);
249
+ let syncReport = null;
250
+ let cleanupFailed = false;
251
+ try {
252
+ syncReport = await registry.syncAdapter("social-douyin", {
253
+ inputPath: collectResult.snapshotPath,
254
+ });
255
+ } finally {
256
+ try {
257
+ cleanupSnapshotJson(collectResult.snapshotPath);
258
+ } catch (_e) {
259
+ cleanupFailed = true;
260
+ }
261
+ }
262
+ return {
263
+ ...syncReport,
264
+ douyin: {
265
+ uid: collectResult.uid,
266
+ eventCounts: collectResult.eventCounts,
267
+ titlesResolved: collectResult.titlesResolved || 0,
268
+ mode: "watch-history",
269
+ cleanupFailed,
270
+ },
271
+ };
272
+ }
273
+
162
274
  module.exports = {
163
275
  collect,
164
276
  collectAndSync,
277
+ collectWatchHistory,
278
+ collectWatchHistoryAndSync,
165
279
  };
@@ -38,13 +38,30 @@ const {
38
38
  cleanupSnapshotJson,
39
39
  SNAPSHOT_SCHEMA_VERSION,
40
40
  } = require("./snapshot-builder");
41
- const { collect, collectAndSync } = require("./collector");
41
+ const {
42
+ collect,
43
+ collectAndSync,
44
+ collectWatchHistory,
45
+ collectWatchHistoryAndSync,
46
+ } = require("./collector");
47
+ const {
48
+ createDouyinWatchExtension,
49
+ VIDEO_RECORD_DB_REMOTE_PATH,
50
+ } = require("./watch-history-reader");
51
+ const { AwemeDetailClient } = require("./aweme-detail-client");
42
52
 
43
53
  module.exports = {
44
54
  // Extension factory (wiring registers this on the bridge)
45
55
  createDouyinDbExtension,
46
56
  DOUYIN_DB_REMOTE_DIR,
47
57
  IM_DB_PATTERN,
58
+ // Watch-history (video_record.db) extension + path
59
+ createDouyinWatchExtension,
60
+ VIDEO_RECORD_DB_REMOTE_PATH,
61
+ collectWatchHistory,
62
+ collectWatchHistoryAndSync,
63
+ // Aweme title resolver (web detail endpoint, no signing)
64
+ AwemeDetailClient,
48
65
  // Parser + builder (also exposed for advanced callers / tests)
49
66
  parseImDb,
50
67
  buildSnapshot,