@chainlesschain/personal-data-hub 0.4.4 → 0.4.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/edu-huawei-learning-live.test.js +198 -0
- package/__tests__/adapters/edu-zuoyebang-live.test.js +226 -0
- package/__tests__/adapters/family-23-collectors-scaffold.test.js +5 -1
- package/__tests__/adapters/finance-alipay-live.test.js +258 -0
- package/__tests__/adapters/game-genshin-live.test.js +238 -0
- package/__tests__/adapters/game-genshin-scaffold.test.js +4 -3
- package/__tests__/adapters/game-honor-of-kings-live.test.js +230 -0
- package/__tests__/adapters/netease-music-live.test.js +244 -0
- package/__tests__/adapters/social-douyin-adb-aweme-detail.test.js +165 -0
- package/__tests__/adapters/social-douyin-adb-watch-history.test.js +192 -0
- package/__tests__/adapters/social-toutiao-adb-account-reader.test.js +135 -0
- package/__tests__/adapters/social-toutiao-adb-api-client.test.js +89 -0
- package/__tests__/adapters/social-toutiao-adb-collector.test.js +95 -2
- package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +30 -0
- package/__tests__/adapters/social-xiaohongshu-adb-cookies-extension.test.js +0 -0
- package/__tests__/shopping-pinduoduo-snapshot.test.js +182 -0
- package/lib/adapters/_live-json-helpers.js +50 -0
- package/lib/adapters/edu-huawei-learning/api-client.js +178 -5
- package/lib/adapters/edu-huawei-learning/index.js +83 -9
- package/lib/adapters/edu-zuoyebang/api-client.js +181 -6
- package/lib/adapters/edu-zuoyebang/index.js +83 -9
- package/lib/adapters/finance-alipay/api-client.js +268 -6
- package/lib/adapters/finance-alipay/index.js +85 -9
- package/lib/adapters/game-genshin/api-client.js +207 -6
- package/lib/adapters/game-genshin/index.js +90 -9
- package/lib/adapters/game-honor-of-kings/api-client.js +235 -12
- package/lib/adapters/game-honor-of-kings/index.js +80 -9
- package/lib/adapters/netease-music/api-client.js +284 -0
- package/lib/adapters/netease-music/index.js +85 -9
- package/lib/adapters/shopping-pinduoduo/index.js +241 -33
- package/lib/adapters/social-douyin/index.js +2 -0
- package/lib/adapters/social-douyin-adb/aweme-detail-client.js +119 -0
- package/lib/adapters/social-douyin-adb/collector.js +114 -0
- package/lib/adapters/social-douyin-adb/index.js +18 -1
- package/lib/adapters/social-douyin-adb/watch-history-reader.js +188 -0
- package/lib/adapters/social-toutiao-adb/account-reader.js +179 -0
- package/lib/adapters/social-toutiao-adb/api-client.js +41 -17
- package/lib/adapters/social-toutiao-adb/collector.js +55 -19
- package/lib/adapters/social-toutiao-adb/cookies-extension.js +21 -1
- package/lib/adapters/social-toutiao-adb/index.js +6 -0
- package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +19 -1
- package/lib/index.js +1 -1
- package/package.json +1 -1
|
@@ -1,29 +1,32 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* §2.4c 购物三联
|
|
2
|
+
* §2.4c 购物三联 — Pinduoduo (拼多多) adapter, dual-mode (snapshot + cookie-api).
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* v0.3 brings 拼多多 to parity with shopping-taobao / shopping-jd /
|
|
5
|
+
* shopping-meituan by adding a cookie-api fetch path alongside the existing
|
|
6
|
+
* snapshot ingest. As with the other shopping adapters the actual HTTP call is
|
|
7
|
+
* delegated to an injected `fetchFn` (the Android in-APK cc uses OkHttp; the
|
|
8
|
+
* desktop hub uses an Electron WebView net request) so this module stays a
|
|
9
|
+
* pure-Node parser + orchestrator.
|
|
6
10
|
*
|
|
7
|
-
* 1.
|
|
8
|
-
*
|
|
9
|
-
* JS (similar to 抖音 X-Bogus). No pure-Node implementation survives
|
|
10
|
-
* pinduoduo's monthly anti_token rotation.
|
|
11
|
-
* 2. Pinduoduo Android app has no built-in "export orders" feature, so
|
|
12
|
-
* there's no SAF source-format to parse directly either.
|
|
11
|
+
* 1. snapshot mode (opts.inputPath): ingest a snapshot JSON produced by a
|
|
12
|
+
* browser extension / hand-roll (stateless — account OPTIONAL).
|
|
13
13
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
14
|
+
* 2. cookie-api mode (opts.account.cookies): fetch
|
|
15
|
+
* `mobile.yangkeduo.com/proxy/api/galerie/transaction/transaction_list`
|
|
16
|
+
* via the injected `fetchFn`, paginating with the `pageNumber` cursor and
|
|
17
|
+
* stopping at the `sinceWatermark`. account.uid REQUIRED in this mode.
|
|
16
18
|
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
19
|
+
* ── anti_token signing seam ──────────────────────────────────────────────
|
|
20
|
+
* Pinduoduo's transaction_list requires an `anti_token` (a.k.a.
|
|
21
|
+
* `anti-content`) computed by client-side JS — analogous to 抖音 X-Bogus.
|
|
22
|
+
* No pure-Node implementation survives pinduoduo's anti_token rotation, so
|
|
23
|
+
* the signing itself is injected via `opts.signProvider` (or constructor
|
|
24
|
+
* `signProvider`). On Android the in-APK WebView JS VM produces the token;
|
|
25
|
+
* in tests a stub returns a fixed value. When no signProvider is configured
|
|
26
|
+
* the request is still issued with `antiToken: null` — best-effort, the
|
|
27
|
+
* endpoint may 403, which surfaces as zero events rather than a crash.
|
|
20
28
|
*
|
|
21
|
-
*
|
|
22
|
-
* 推文 §"支付与购物" 大类, with an explicit "v0.2 待用户导出 — 需 web
|
|
23
|
-
* extension 或手抄" banner so user knows the limitation.
|
|
24
|
-
*
|
|
25
|
-
* Snapshot schema (mirrors PinduoduoLocalCollector.SNAPSHOT_SCHEMA_VERSION
|
|
26
|
-
* once the Kotlin collector lands in v0.3+):
|
|
29
|
+
* Snapshot schema (mirrors PinduoduoLocalCollector.SNAPSHOT_SCHEMA_VERSION):
|
|
27
30
|
*
|
|
28
31
|
* {
|
|
29
32
|
* "schemaVersion": 1,
|
|
@@ -45,33 +48,46 @@
|
|
|
45
48
|
* ]
|
|
46
49
|
* }
|
|
47
50
|
*
|
|
48
|
-
* Future v0.
|
|
51
|
+
* Future v0.4: HTML parsing (`Save As Webpage` from `mobile.yangkeduo.com/
|
|
49
52
|
* users/orders.html` — pinduoduo's order list endpoint).
|
|
50
53
|
*/
|
|
51
54
|
|
|
52
55
|
"use strict";
|
|
53
56
|
|
|
54
57
|
const fs = require("node:fs");
|
|
55
|
-
const { normalizeOrderRecord } = require("../shopping-base");
|
|
58
|
+
const { normalizeOrderRecord, CookieAuth } = require("../shopping-base");
|
|
56
59
|
|
|
57
60
|
const NAME = "shopping-pinduoduo";
|
|
58
|
-
const VERSION = "0.
|
|
61
|
+
const VERSION = "0.2.0";
|
|
59
62
|
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
60
63
|
|
|
61
64
|
const KIND_ORDER = "order";
|
|
62
65
|
const VALID_SNAPSHOT_KINDS = Object.freeze([KIND_ORDER]);
|
|
63
66
|
|
|
67
|
+
const PINDUODUO_ORDERS_URL =
|
|
68
|
+
"https://mobile.yangkeduo.com/proxy/api/galerie/transaction/transaction_list";
|
|
69
|
+
|
|
64
70
|
class PinduoduoAdapter {
|
|
65
71
|
constructor(opts = {}) {
|
|
66
|
-
// §2.4c
|
|
67
|
-
//
|
|
72
|
+
// §2.4c: account is OPTIONAL — snapshot mode is stateless. Cookie-api mode
|
|
73
|
+
// activates only when account.cookies is supplied; account.uid is then
|
|
74
|
+
// required (checked at sync time).
|
|
68
75
|
this.account = opts.account || null;
|
|
76
|
+
this._cookieAuth =
|
|
77
|
+
opts.account && opts.account.cookies
|
|
78
|
+
? new CookieAuth({ platform: "pinduoduo", cookies: opts.account.cookies })
|
|
79
|
+
: null;
|
|
80
|
+
this._fetchFn = typeof opts.fetchFn === "function" ? opts.fetchFn : defaultFetch;
|
|
81
|
+
// anti_token signing seam — see file header. Async fn({ url, query,
|
|
82
|
+
// cookies }) → string|null. When absent, requests carry antiToken: null.
|
|
83
|
+
this._signProvider =
|
|
84
|
+
typeof opts.signProvider === "function" ? opts.signProvider : null;
|
|
69
85
|
|
|
70
86
|
this.name = NAME;
|
|
71
87
|
this.version = VERSION;
|
|
72
|
-
this.capabilities = ["sync:snapshot", "parse:pinduoduo-orders"];
|
|
73
|
-
this.extractMode = "
|
|
74
|
-
this.rateLimits = {};
|
|
88
|
+
this.capabilities = ["sync:snapshot", "sync:cookie-api", "parse:pinduoduo-orders"];
|
|
89
|
+
this.extractMode = "web-api";
|
|
90
|
+
this.rateLimits = { perMinute: 8, perDay: 200 };
|
|
75
91
|
this.dataDisclosure = {
|
|
76
92
|
fields: [
|
|
77
93
|
"pinduoduo:order_sn / mall_name / goods_list / order_amount / address",
|
|
@@ -99,15 +115,33 @@ class PinduoduoAdapter {
|
|
|
99
115
|
}
|
|
100
116
|
return { ok: true, mode: "snapshot-file" };
|
|
101
117
|
}
|
|
118
|
+
if (this._cookieAuth) {
|
|
119
|
+
const ok = await this._cookieAuth.validate();
|
|
120
|
+
if (!ok) return { ok: false, reason: "INVALID_COOKIE", error: "cookies missing" };
|
|
121
|
+
if (!this.account || !this.account.uid) {
|
|
122
|
+
return {
|
|
123
|
+
ok: false,
|
|
124
|
+
reason: "NO_ACCOUNT_UID",
|
|
125
|
+
message: "cookie-api mode requires account.uid",
|
|
126
|
+
};
|
|
127
|
+
}
|
|
128
|
+
return { ok: true, account: this.account.uid, mode: "cookie" };
|
|
129
|
+
}
|
|
102
130
|
return {
|
|
103
131
|
ok: false,
|
|
104
132
|
reason: "NO_INPUT",
|
|
105
133
|
message:
|
|
106
|
-
"PinduoduoAdapter.authenticate: needs opts.inputPath (snapshot mode
|
|
134
|
+
"PinduoduoAdapter.authenticate: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode — anti_token signing via signProvider)",
|
|
107
135
|
};
|
|
108
136
|
}
|
|
109
137
|
|
|
110
138
|
async healthCheck() {
|
|
139
|
+
if (this._cookieAuth) {
|
|
140
|
+
const r = await this.authenticate();
|
|
141
|
+
return r.ok
|
|
142
|
+
? { ok: true, lastChecked: Date.now() }
|
|
143
|
+
: { ok: false, reason: r.reason, error: r.error };
|
|
144
|
+
}
|
|
111
145
|
return { ok: true, lastChecked: Date.now() };
|
|
112
146
|
}
|
|
113
147
|
|
|
@@ -116,21 +150,25 @@ class PinduoduoAdapter {
|
|
|
116
150
|
yield* this._syncViaSnapshot(opts);
|
|
117
151
|
return;
|
|
118
152
|
}
|
|
153
|
+
if (this._cookieAuth) {
|
|
154
|
+
yield* this._syncViaCookie(opts);
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
119
157
|
throw new Error(
|
|
120
|
-
"PinduoduoAdapter.sync: needs opts.inputPath (snapshot mode
|
|
158
|
+
"PinduoduoAdapter.sync: needs opts.inputPath (snapshot mode) OR opts.account.cookies (cookie-api mode; pinduoduo's web API requires anti_token signing supplied via opts.signProvider)",
|
|
121
159
|
);
|
|
122
160
|
}
|
|
123
161
|
|
|
124
162
|
async *_syncViaSnapshot(opts) {
|
|
125
163
|
const raw = this._deps.fs.readFileSync(opts.inputPath, "utf-8");
|
|
126
164
|
// v0.2 explicit JSON-only. HTML parsing (SAF-exported webpage from
|
|
127
|
-
// yangkeduo.com order list) is future v0.
|
|
165
|
+
// yangkeduo.com order list) is future v0.4 work.
|
|
128
166
|
let snapshot;
|
|
129
167
|
try {
|
|
130
168
|
snapshot = JSON.parse(raw);
|
|
131
169
|
} catch (err) {
|
|
132
170
|
throw new Error(
|
|
133
|
-
`shopping-pinduoduo.sync: snapshot must be JSON (v0.
|
|
171
|
+
`shopping-pinduoduo.sync: snapshot must be JSON (v0.4 will add HTML parsing). Got parse error: ${err.message}`,
|
|
134
172
|
);
|
|
135
173
|
}
|
|
136
174
|
if (
|
|
@@ -184,11 +222,77 @@ class PinduoduoAdapter {
|
|
|
184
222
|
}
|
|
185
223
|
}
|
|
186
224
|
|
|
225
|
+
async *_syncViaCookie(opts = {}) {
|
|
226
|
+
if (!this.account || !this.account.uid) {
|
|
227
|
+
throw new Error(
|
|
228
|
+
"PinduoduoAdapter._syncViaCookie: account.uid required (set via new PinduoduoAdapter({ account: { uid, cookies } }))",
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
if (!(await this._cookieAuth.validate())) return;
|
|
232
|
+
const sinceMs =
|
|
233
|
+
opts.sinceWatermark != null
|
|
234
|
+
? parseInt(String(opts.sinceWatermark), 10) || 0
|
|
235
|
+
: Date.now() - 365 * 24 * 3600_000; // default last year
|
|
236
|
+
const pageSize = Number.isFinite(opts.pageSize) ? opts.pageSize : 10;
|
|
237
|
+
const include = opts.include || {};
|
|
238
|
+
if (include[KIND_ORDER] === false) return;
|
|
239
|
+
|
|
240
|
+
let pageNumber = 1;
|
|
241
|
+
while (true) {
|
|
242
|
+
const query = { pageNumber, pageSize, ts: Date.now() };
|
|
243
|
+
// anti_token signing seam — best-effort. null when no signProvider.
|
|
244
|
+
let antiToken = null;
|
|
245
|
+
if (this._signProvider) {
|
|
246
|
+
antiToken = await this._signProvider({
|
|
247
|
+
url: PINDUODUO_ORDERS_URL,
|
|
248
|
+
query,
|
|
249
|
+
cookies: this._cookieAuth.toHeader(),
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
const resp = await this._fetchFn({
|
|
253
|
+
url: PINDUODUO_ORDERS_URL,
|
|
254
|
+
cookies: this._cookieAuth.toHeader(),
|
|
255
|
+
antiToken,
|
|
256
|
+
query,
|
|
257
|
+
});
|
|
258
|
+
const orders = extractOrders(resp);
|
|
259
|
+
if (!orders.length) break;
|
|
260
|
+
let pageHasNew = false;
|
|
261
|
+
let reachedWatermark = false;
|
|
262
|
+
for (const raw of orders) {
|
|
263
|
+
const rec = orderToRecord(raw);
|
|
264
|
+
if (!rec) continue;
|
|
265
|
+
if (rec.placedAt && rec.placedAt < sinceMs) {
|
|
266
|
+
reachedWatermark = true; // everything from here on is older
|
|
267
|
+
break;
|
|
268
|
+
}
|
|
269
|
+
pageHasNew = true;
|
|
270
|
+
yield {
|
|
271
|
+
adapter: NAME,
|
|
272
|
+
originalId: rec.orderId,
|
|
273
|
+
capturedAt: rec.paidAt || rec.placedAt || Date.now(),
|
|
274
|
+
payload: { record: rec },
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
// Stop once we've crossed the watermark, drained the page, or the page
|
|
278
|
+
// came back short (last page).
|
|
279
|
+
if (reachedWatermark || !pageHasNew || orders.length < pageSize) break;
|
|
280
|
+
pageNumber += 1;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
187
284
|
normalize(raw) {
|
|
188
285
|
if (!raw || !raw.payload) {
|
|
189
286
|
throw new Error("PinduoduoAdapter.normalize: payload missing");
|
|
190
287
|
}
|
|
191
|
-
//
|
|
288
|
+
// Cookie-api mode wraps a normalized record under payload.record; snapshot
|
|
289
|
+
// mode carries the raw event fields directly on the payload.
|
|
290
|
+
if (raw.payload.record) {
|
|
291
|
+
return normalizeOrderRecord(raw.payload.record, {
|
|
292
|
+
adapterName: NAME,
|
|
293
|
+
adapterVersion: VERSION,
|
|
294
|
+
});
|
|
295
|
+
}
|
|
192
296
|
const rec = snapshotEventToRecord(raw.payload);
|
|
193
297
|
return normalizeOrderRecord(rec, {
|
|
194
298
|
adapterName: NAME,
|
|
@@ -208,6 +312,104 @@ function stableOriginalId(kind, id) {
|
|
|
208
312
|
return `pinduoduo:${kind}:${safe}`;
|
|
209
313
|
}
|
|
210
314
|
|
|
315
|
+
/**
|
|
316
|
+
* Pull the order array out of a transaction_list response. Pinduoduo nests it
|
|
317
|
+
* under different keys across endpoint versions; the injected fetchFn may also
|
|
318
|
+
* pre-flatten to `{ orders }`. Tolerant of all common shapes.
|
|
319
|
+
*/
|
|
320
|
+
function extractOrders(resp) {
|
|
321
|
+
if (!resp || typeof resp !== "object") return [];
|
|
322
|
+
if (Array.isArray(resp.orders)) return resp.orders;
|
|
323
|
+
if (Array.isArray(resp.order_list)) return resp.order_list;
|
|
324
|
+
if (Array.isArray(resp.list)) return resp.list;
|
|
325
|
+
if (resp.result && Array.isArray(resp.result.order_list)) return resp.result.order_list;
|
|
326
|
+
if (resp.result && Array.isArray(resp.result.list)) return resp.result.list;
|
|
327
|
+
return [];
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Map one pinduoduo transaction_list order object → vendor-neutral OrderRecord.
|
|
332
|
+
* Pinduoduo amounts are in 分 (cents); converted to 元 here. Field names are
|
|
333
|
+
* best-effort across endpoint versions (camelCase + snake_case fallbacks).
|
|
334
|
+
*/
|
|
335
|
+
function orderToRecord(o) {
|
|
336
|
+
if (!o || typeof o !== "object") return null;
|
|
337
|
+
const orderId = o.order_sn || o.orderSn || o.orderId || o.id;
|
|
338
|
+
if (!orderId) return null;
|
|
339
|
+
const merchant = o.mall_name || o.mallName || o.merchantName || o.shop_name || "拼多多";
|
|
340
|
+
|
|
341
|
+
const items = [];
|
|
342
|
+
const rawItems = o.goods_list || o.order_goods || o.goodsList || o.items || [];
|
|
343
|
+
for (const it of Array.isArray(rawItems) ? rawItems : []) {
|
|
344
|
+
if (!it) continue;
|
|
345
|
+
items.push({
|
|
346
|
+
name: it.goods_name || it.goodsName || it.name || it.skuName,
|
|
347
|
+
quantity: parseInt(it.goods_number || it.goods_count || it.quantity || 1, 10),
|
|
348
|
+
unitPrice: centsToYuan(it.goods_price || it.goodsPrice || it.unitPrice || 0),
|
|
349
|
+
sku: it.sku_id || it.skuId || it.goods_id || it.sku || null,
|
|
350
|
+
});
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
return {
|
|
354
|
+
vendorId: "pinduoduo",
|
|
355
|
+
orderId: String(orderId),
|
|
356
|
+
placedAt: parseTime(o.order_time || o.create_at || o.createAt || o.order_create_at),
|
|
357
|
+
paidAt: parseTime(o.pay_time || o.payTime || o.group_order_pay_time),
|
|
358
|
+
status: mapStatus(pickStatusText(o)),
|
|
359
|
+
merchantName: merchant,
|
|
360
|
+
totalAmount: {
|
|
361
|
+
value: centsToYuan(o.order_amount || o.orderAmount || o.pay_amount || o.total_amount || 0),
|
|
362
|
+
currency: "CNY",
|
|
363
|
+
},
|
|
364
|
+
items,
|
|
365
|
+
recipient: o.receive_name || o.receiver || o.recipient || null,
|
|
366
|
+
shippingAddress: o.address || o.receive_address || o.shippingAddress || null,
|
|
367
|
+
trackingNumber: o.tracking_number || o.waybill_no || o.trackingNumber || null,
|
|
368
|
+
extras: { capturedBy: "cookie-api", platform: "pinduoduo" },
|
|
369
|
+
};
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/**
|
|
373
|
+
* Pinduoduo carries a human-readable status under several keys; prefer text
|
|
374
|
+
* over the numeric `order_status` code so mapStatus's keyword match works.
|
|
375
|
+
*/
|
|
376
|
+
function pickStatusText(o) {
|
|
377
|
+
const text =
|
|
378
|
+
o.order_status_prompt ||
|
|
379
|
+
o.orderStatusPrompt ||
|
|
380
|
+
o.status_prompt ||
|
|
381
|
+
o.statusPrompt ||
|
|
382
|
+
o.status_desc ||
|
|
383
|
+
null;
|
|
384
|
+
if (text) return text;
|
|
385
|
+
// Fall back to the numeric order_status code (best-effort PDD mapping).
|
|
386
|
+
const code = o.order_status != null ? o.order_status : o.orderStatus;
|
|
387
|
+
switch (Number(code)) {
|
|
388
|
+
case 1:
|
|
389
|
+
return "待付款";
|
|
390
|
+
case 2:
|
|
391
|
+
return "待发货";
|
|
392
|
+
case 3:
|
|
393
|
+
return "已发货";
|
|
394
|
+
case 4:
|
|
395
|
+
return "已完成";
|
|
396
|
+
case 5:
|
|
397
|
+
case 6:
|
|
398
|
+
return "已关闭";
|
|
399
|
+
default:
|
|
400
|
+
return o.status != null ? String(o.status) : "";
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
function centsToYuan(v) {
|
|
405
|
+
const n = Number(v);
|
|
406
|
+
if (!Number.isFinite(n)) return 0;
|
|
407
|
+
// Snapshot/test inputs may already be 元 with a decimal point; treat any
|
|
408
|
+
// non-integer as 元, integers as 分.
|
|
409
|
+
if (!Number.isInteger(n)) return n;
|
|
410
|
+
return Math.round(n) / 100;
|
|
411
|
+
}
|
|
412
|
+
|
|
211
413
|
function snapshotEventToRecord(ev) {
|
|
212
414
|
const items = [];
|
|
213
415
|
const rawItems = Array.isArray(ev.items) ? ev.items : [];
|
|
@@ -266,8 +468,14 @@ function mapStatus(s) {
|
|
|
266
468
|
return "placed";
|
|
267
469
|
}
|
|
268
470
|
|
|
471
|
+
async function defaultFetch(_opts) {
|
|
472
|
+
throw new Error("PinduoduoAdapter: no fetchFn configured");
|
|
473
|
+
}
|
|
474
|
+
|
|
269
475
|
module.exports = {
|
|
270
476
|
PinduoduoAdapter,
|
|
477
|
+
orderToRecord,
|
|
478
|
+
extractOrders,
|
|
271
479
|
NAME,
|
|
272
480
|
VERSION,
|
|
273
481
|
SNAPSHOT_SCHEMA_VERSION,
|
|
@@ -537,6 +537,8 @@ function normalizeHistory(p, raw, ingestedAt) {
|
|
|
537
537
|
awemeId,
|
|
538
538
|
author,
|
|
539
539
|
duration,
|
|
540
|
+
// Source surface from the local video_record.db (homepage_hot / etc.).
|
|
541
|
+
enterFrom: row.enterFrom || row.enter_from || p.enterFrom || null,
|
|
540
542
|
},
|
|
541
543
|
}],
|
|
542
544
|
persons: [], places: [], items: [], topics: [],
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AwemeDetailClient — resolves Douyin aweme (video) ids to human-readable
|
|
3
|
+
* metadata (desc / author / duration) so watch-history events show WHAT was
|
|
4
|
+
* watched, not just an id.
|
|
5
|
+
*
|
|
6
|
+
* Real-device finding 2026-06-11: the web detail endpoint
|
|
7
|
+
* https://www.douyin.com/aweme/v1/web/aweme/detail/?aweme_id=<id>
|
|
8
|
+
* &device_platform=webapp&aid=6383&channel=channel_pc_web
|
|
9
|
+
* returns HTTP 200 + full `aweme_detail` JSON (desc, author.nickname, duration,
|
|
10
|
+
* create_time) with **just a browser UA + Referer — no X-Bogus / cookie / msToken**
|
|
11
|
+
* for this guest request shape. So title resolution is a plain HTTP client, not a
|
|
12
|
+
* sign-bridge. (If Douyin later enforces signing here, this becomes the seam to
|
|
13
|
+
* route through a DouyinSignBridge — same pattern as toutiao/xhs.)
|
|
14
|
+
*
|
|
15
|
+
* Rate-friendly: dedups ids, caps per run, sleeps between calls, fails soft per
|
|
16
|
+
* id (an unresolved id just keeps "(no title)" — never aborts the sync).
|
|
17
|
+
*/
|
|
18
|
+
"use strict";
|
|
19
|
+
|
|
20
|
+
const DEFAULT_BASE_URL = "https://www.douyin.com";
|
|
21
|
+
const BROWSER_HEADERS = Object.freeze({
|
|
22
|
+
"User-Agent":
|
|
23
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
|
|
24
|
+
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
25
|
+
Referer: "https://www.douyin.com/",
|
|
26
|
+
"Accept-Language": "zh-CN,zh;q=0.9",
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
class AwemeDetailClient {
|
|
30
|
+
constructor(opts = {}) {
|
|
31
|
+
this.baseUrl = (opts.baseUrl || DEFAULT_BASE_URL).replace(/\/+$/, "");
|
|
32
|
+
this._fetch =
|
|
33
|
+
opts.fetch || (typeof globalThis.fetch === "function" ? globalThis.fetch : null);
|
|
34
|
+
this._sleep =
|
|
35
|
+
opts.sleep || ((ms) => new Promise((r) => setTimeout(r, ms)));
|
|
36
|
+
this.delayMs = Number.isFinite(opts.delayMs) ? opts.delayMs : 200;
|
|
37
|
+
this.lastErrorCode = 0;
|
|
38
|
+
this.lastErrorMessage = null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
_setErr(code, msg) {
|
|
42
|
+
this.lastErrorCode = code;
|
|
43
|
+
this.lastErrorMessage = msg;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* Resolve one aweme id → {awemeId, desc, author, durationMs, createTime} or
|
|
48
|
+
* null on any error (sets lastError).
|
|
49
|
+
*/
|
|
50
|
+
async fetchDetail(aid) {
|
|
51
|
+
if (typeof this._fetch !== "function") {
|
|
52
|
+
this._setErr(-2, "AwemeDetailClient: fetch not available — pass opts.fetch or run on Node 18+");
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
const url =
|
|
56
|
+
`${this.baseUrl}/aweme/v1/web/aweme/detail/?aweme_id=${encodeURIComponent(String(aid))}` +
|
|
57
|
+
`&device_platform=webapp&aid=6383&channel=channel_pc_web`;
|
|
58
|
+
let resp;
|
|
59
|
+
try {
|
|
60
|
+
resp = await this._fetch(url, { method: "GET", headers: { ...BROWSER_HEADERS } });
|
|
61
|
+
} catch (e) {
|
|
62
|
+
this._setErr(-4, "network: " + (e && e.message ? e.message : String(e)));
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
const body = await resp.text();
|
|
66
|
+
if (!resp.ok) {
|
|
67
|
+
this._setErr(resp.status, `HTTP ${resp.status}`);
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
let obj;
|
|
71
|
+
try {
|
|
72
|
+
obj = JSON.parse(body);
|
|
73
|
+
} catch (e) {
|
|
74
|
+
this._setErr(-3, "parse: " + (e && e.message ? e.message : String(e)));
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
const code = typeof obj.status_code === "number" ? obj.status_code : 0;
|
|
78
|
+
if (code !== 0) {
|
|
79
|
+
this._setErr(code, (obj.status_msg || `status_code=${code}`).toString());
|
|
80
|
+
return null;
|
|
81
|
+
}
|
|
82
|
+
const d = obj.aweme_detail;
|
|
83
|
+
if (!d || typeof d !== "object") {
|
|
84
|
+
this._setErr(-5, "no aweme_detail (deleted/private video?)");
|
|
85
|
+
return null;
|
|
86
|
+
}
|
|
87
|
+
this._setErr(0, null);
|
|
88
|
+
return {
|
|
89
|
+
awemeId: String(aid),
|
|
90
|
+
desc: d.desc || null,
|
|
91
|
+
author: (d.author && d.author.nickname) || null,
|
|
92
|
+
durationMs: Number.isFinite(d.duration) ? d.duration : null,
|
|
93
|
+
createTime: Number.isFinite(d.create_time) ? d.create_time : null,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
/**
|
|
98
|
+
* Resolve many ids → Map<aid, detail>. Dedups, caps at `limit`, sleeps
|
|
99
|
+
* `delayMs` between calls. Per-id failures are skipped (not in the map).
|
|
100
|
+
* @param {string[]} aids
|
|
101
|
+
* @param {{limit?: number}} [opts]
|
|
102
|
+
*/
|
|
103
|
+
async resolveMany(aids, opts = {}) {
|
|
104
|
+
const uniq = [...new Set((aids || []).map(String))];
|
|
105
|
+
const cap = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : uniq.length;
|
|
106
|
+
const out = new Map();
|
|
107
|
+
let n = 0;
|
|
108
|
+
for (const aid of uniq) {
|
|
109
|
+
if (n >= cap) break;
|
|
110
|
+
const d = await this.fetchDetail(aid);
|
|
111
|
+
n += 1;
|
|
112
|
+
if (d) out.set(aid, d);
|
|
113
|
+
if (this.delayMs > 0 && n < cap) await this._sleep(this.delayMs);
|
|
114
|
+
}
|
|
115
|
+
return out;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
module.exports = { AwemeDetailClient, BROWSER_HEADERS };
|
|
@@ -159,7 +159,121 @@ async function collectAndSync(bridge, registry, opts = {}) {
|
|
|
159
159
|
};
|
|
160
160
|
}
|
|
161
161
|
|
|
162
|
+
// ── Watch-history (video_record.db) path ─────────────────────────────────
|
|
163
|
+
// Distinct from the IM-db path above: pulls the plaintext video_record.db and
|
|
164
|
+
// emits `history` events (KIND_HISTORY → BROWSE) the social-douyin adapter
|
|
165
|
+
// already normalizes. No X-Bogus, no SQLCipher — the durable "what/when the
|
|
166
|
+
// user watched" signal. See watch-history-reader.js.
|
|
167
|
+
const DOUYIN_SNAPSHOT_SCHEMA_VERSION = 1;
|
|
168
|
+
|
|
169
|
+
async function collectWatchHistory(bridge, opts = {}) {
|
|
170
|
+
if (!bridge || typeof bridge.invoke !== "function") {
|
|
171
|
+
throw new TypeError(
|
|
172
|
+
"DouyinAdbCollector.collectWatchHistory: bridge must expose invoke(method, params)",
|
|
173
|
+
);
|
|
174
|
+
}
|
|
175
|
+
const now = opts.now || Date.now;
|
|
176
|
+
const limit = Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 2000;
|
|
177
|
+
const res = await bridge.invoke("douyin.watch-history", { limit });
|
|
178
|
+
if (!res || !Array.isArray(res.records)) {
|
|
179
|
+
throw new Error(
|
|
180
|
+
"DouyinAdbCollector.collectWatchHistory: bridge.invoke('douyin.watch-history') returned malformed payload",
|
|
181
|
+
);
|
|
182
|
+
}
|
|
183
|
+
const uid = res.uid || opts.uid || null;
|
|
184
|
+
const events = [];
|
|
185
|
+
for (const r of res.records) {
|
|
186
|
+
if (!r || !r.awemeId) continue;
|
|
187
|
+
events.push({
|
|
188
|
+
kind: "history",
|
|
189
|
+
id: `history-${r.awemeId}-${r.capturedAt || ""}`,
|
|
190
|
+
capturedAt: r.capturedAt || now(),
|
|
191
|
+
awemeId: r.awemeId,
|
|
192
|
+
enterFrom: r.enterFrom || null,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Optional title enrichment: resolve aweme ids → desc/author/duration via the
|
|
197
|
+
// web detail endpoint (plain HTTP, no signing) so events show WHAT was watched.
|
|
198
|
+
// Capped + dedup'd + fail-soft; an unresolved id just keeps "(no title)".
|
|
199
|
+
let titlesResolved = 0;
|
|
200
|
+
if (opts.resolveTitles && events.length > 0) {
|
|
201
|
+
const client =
|
|
202
|
+
opts._detailClient ||
|
|
203
|
+
new (require("./aweme-detail-client").AwemeDetailClient)({
|
|
204
|
+
fetch: opts.fetch,
|
|
205
|
+
delayMs: opts.titleDelayMs,
|
|
206
|
+
});
|
|
207
|
+
// Resolve most-recent first (events come back DESC by view time).
|
|
208
|
+
const titles = await client.resolveMany(
|
|
209
|
+
events.map((e) => e.awemeId),
|
|
210
|
+
{ limit: Number.isInteger(opts.titleLimit) && opts.titleLimit > 0 ? opts.titleLimit : 60 },
|
|
211
|
+
);
|
|
212
|
+
for (const e of events) {
|
|
213
|
+
const t = titles.get(e.awemeId);
|
|
214
|
+
if (t) {
|
|
215
|
+
// normalizeHistory reads title/author/duration off the snapshot event.
|
|
216
|
+
e.title = t.desc;
|
|
217
|
+
e.author = t.author;
|
|
218
|
+
e.duration = t.durationMs;
|
|
219
|
+
titlesResolved += 1;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
const snapshot = {
|
|
225
|
+
schemaVersion: DOUYIN_SNAPSHOT_SCHEMA_VERSION,
|
|
226
|
+
snapshottedAt: now(),
|
|
227
|
+
account: {
|
|
228
|
+
...(uid ? { shortId: String(uid) } : {}),
|
|
229
|
+
displayName: opts.displayName,
|
|
230
|
+
},
|
|
231
|
+
events,
|
|
232
|
+
};
|
|
233
|
+
const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
|
|
234
|
+
return {
|
|
235
|
+
snapshotPath,
|
|
236
|
+
uid,
|
|
237
|
+
eventCounts: { history: events.length, total: events.length },
|
|
238
|
+
titlesResolved,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
async function collectWatchHistoryAndSync(bridge, registry, opts = {}) {
|
|
243
|
+
if (!registry || typeof registry.syncAdapter !== "function") {
|
|
244
|
+
throw new TypeError(
|
|
245
|
+
"DouyinAdbCollector.collectWatchHistoryAndSync: registry must expose syncAdapter(name, options)",
|
|
246
|
+
);
|
|
247
|
+
}
|
|
248
|
+
const collectResult = await collectWatchHistory(bridge, opts);
|
|
249
|
+
let syncReport = null;
|
|
250
|
+
let cleanupFailed = false;
|
|
251
|
+
try {
|
|
252
|
+
syncReport = await registry.syncAdapter("social-douyin", {
|
|
253
|
+
inputPath: collectResult.snapshotPath,
|
|
254
|
+
});
|
|
255
|
+
} finally {
|
|
256
|
+
try {
|
|
257
|
+
cleanupSnapshotJson(collectResult.snapshotPath);
|
|
258
|
+
} catch (_e) {
|
|
259
|
+
cleanupFailed = true;
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
return {
|
|
263
|
+
...syncReport,
|
|
264
|
+
douyin: {
|
|
265
|
+
uid: collectResult.uid,
|
|
266
|
+
eventCounts: collectResult.eventCounts,
|
|
267
|
+
titlesResolved: collectResult.titlesResolved || 0,
|
|
268
|
+
mode: "watch-history",
|
|
269
|
+
cleanupFailed,
|
|
270
|
+
},
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
|
|
162
274
|
module.exports = {
|
|
163
275
|
collect,
|
|
164
276
|
collectAndSync,
|
|
277
|
+
collectWatchHistory,
|
|
278
|
+
collectWatchHistoryAndSync,
|
|
165
279
|
};
|
|
@@ -38,13 +38,30 @@ const {
|
|
|
38
38
|
cleanupSnapshotJson,
|
|
39
39
|
SNAPSHOT_SCHEMA_VERSION,
|
|
40
40
|
} = require("./snapshot-builder");
|
|
41
|
-
const {
|
|
41
|
+
const {
|
|
42
|
+
collect,
|
|
43
|
+
collectAndSync,
|
|
44
|
+
collectWatchHistory,
|
|
45
|
+
collectWatchHistoryAndSync,
|
|
46
|
+
} = require("./collector");
|
|
47
|
+
const {
|
|
48
|
+
createDouyinWatchExtension,
|
|
49
|
+
VIDEO_RECORD_DB_REMOTE_PATH,
|
|
50
|
+
} = require("./watch-history-reader");
|
|
51
|
+
const { AwemeDetailClient } = require("./aweme-detail-client");
|
|
42
52
|
|
|
43
53
|
module.exports = {
|
|
44
54
|
// Extension factory (wiring registers this on the bridge)
|
|
45
55
|
createDouyinDbExtension,
|
|
46
56
|
DOUYIN_DB_REMOTE_DIR,
|
|
47
57
|
IM_DB_PATTERN,
|
|
58
|
+
// Watch-history (video_record.db) extension + path
|
|
59
|
+
createDouyinWatchExtension,
|
|
60
|
+
VIDEO_RECORD_DB_REMOTE_PATH,
|
|
61
|
+
collectWatchHistory,
|
|
62
|
+
collectWatchHistoryAndSync,
|
|
63
|
+
// Aweme title resolver (web detail endpoint, no signing)
|
|
64
|
+
AwemeDetailClient,
|
|
48
65
|
// Parser + builder (also exposed for advanced callers / tests)
|
|
49
66
|
parseImDb,
|
|
50
67
|
buildSnapshot,
|