@chainlesschain/personal-data-hub 0.3.1 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/email-adapter-snapshot.test.js +237 -0
- package/__tests__/adapters/email-adapter.test.js +1 -1
- package/__tests__/adapters/email-pdf-extractor.test.js +1 -1
- package/__tests__/adapters/email-retry-progress.test.js +1 -1
- package/__tests__/adapters/email-templates.test.js +1 -1
- package/__tests__/adapters/social-bilibili-adb-api-client.test.js +721 -0
- package/__tests__/adapters/social-bilibili-adb-chromium-cookies-reader.test.js +346 -0
- package/__tests__/adapters/social-bilibili-adb-collector.test.js +284 -0
- package/__tests__/adapters/social-bilibili-adb-cookies-extension.test.js +343 -0
- package/__tests__/adapters/social-bilibili-adb-snapshot-builder.test.js +296 -0
- package/__tests__/adapters/social-douyin-adb-collector.test.js +254 -0
- package/__tests__/adapters/social-douyin-adb-im-db-parser.test.js +304 -0
- package/__tests__/adapters/social-douyin-adb-snapshot-builder.test.js +216 -0
- package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +432 -0
- package/__tests__/adapters/social-kuaishou-adb-collector.test.js +276 -0
- package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +141 -0
- package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +178 -0
- package/__tests__/adapters/social-toutiao-adb-api-client.test.js +537 -0
- package/__tests__/adapters/social-toutiao-adb-collector.test.js +285 -0
- package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +163 -0
- package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +196 -0
- package/__tests__/adapters/social-weibo-adb-api-client.test.js +362 -0
- package/__tests__/adapters/social-weibo-adb-collector.test.js +201 -0
- package/__tests__/adapters/social-weibo-adb-snapshot-builder.test.js +189 -0
- package/__tests__/adapters/social-xiaohongshu-adb-collector.test.js +207 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +351 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign.test.js +130 -0
- package/__tests__/adapters/system-data-android.test.js +32 -1
- package/__tests__/longtail-adapters.test.js +15 -2
- package/__tests__/shopping-adapters.test.js +96 -0
- package/__tests__/sign-providers.test.js +62 -0
- package/__tests__/travel-adapters.test.js +66 -0
- package/__tests__/whatsapp-adapter.test.js +5 -2
- package/lib/adapters/browser-history-chrome/chrome-db-reader.js +11 -1
- package/lib/adapters/email-imap/email-adapter.js +224 -17
- package/lib/adapters/messaging-telegram/index.js +15 -12
- package/lib/adapters/messaging-whatsapp/index.js +15 -12
- package/lib/adapters/shopping-taobao/index.js +161 -21
- package/lib/adapters/social-bilibili-adb/api-client.js +555 -0
- package/lib/adapters/social-bilibili-adb/chromium-cookies-reader.js +296 -0
- package/lib/adapters/social-bilibili-adb/collector.js +190 -0
- package/lib/adapters/social-bilibili-adb/cookies-extension.js +250 -0
- package/lib/adapters/social-bilibili-adb/index.js +51 -0
- package/lib/adapters/social-bilibili-adb/snapshot-builder.js +197 -0
- package/lib/adapters/social-douyin/index.js +4 -0
- package/lib/adapters/social-douyin-adb/collector.js +165 -0
- package/lib/adapters/social-douyin-adb/db-extension.js +281 -0
- package/lib/adapters/social-douyin-adb/im-db-parser.js +287 -0
- package/lib/adapters/social-douyin-adb/index.js +57 -0
- package/lib/adapters/social-douyin-adb/snapshot-builder.js +174 -0
- package/lib/adapters/social-kuaishou-adb/api-client.js +397 -0
- package/lib/adapters/social-kuaishou-adb/collector.js +196 -0
- package/lib/adapters/social-kuaishou-adb/cookies-extension.js +261 -0
- package/lib/adapters/social-kuaishou-adb/index.js +53 -0
- package/lib/adapters/social-kuaishou-adb/snapshot-builder.js +145 -0
- package/lib/adapters/social-toutiao-adb/api-client.js +377 -0
- package/lib/adapters/social-toutiao-adb/collector.js +200 -0
- package/lib/adapters/social-toutiao-adb/cookies-extension.js +266 -0
- package/lib/adapters/social-toutiao-adb/index.js +52 -0
- package/lib/adapters/social-toutiao-adb/snapshot-builder.js +148 -0
- package/lib/adapters/social-weibo-adb/api-client.js +281 -0
- package/lib/adapters/social-weibo-adb/collector.js +169 -0
- package/lib/adapters/social-weibo-adb/cookies-extension.js +251 -0
- package/lib/adapters/social-weibo-adb/index.js +55 -0
- package/lib/adapters/social-weibo-adb/snapshot-builder.js +145 -0
- package/lib/adapters/social-xiaohongshu-adb/api-client.js +309 -0
- package/lib/adapters/social-xiaohongshu-adb/collector.js +209 -0
- package/lib/adapters/social-xiaohongshu-adb/cookies-extension.js +211 -0
- package/lib/adapters/social-xiaohongshu-adb/index.js +50 -0
- package/lib/adapters/social-xiaohongshu-adb/sign.js +90 -0
- package/lib/adapters/social-xiaohongshu-adb/snapshot-builder.js +126 -0
- package/lib/adapters/system-data-android/adapter.js +77 -3
- package/lib/adapters/travel-amap/index.js +16 -10
- package/lib/adapters/travel-ctrip/index.js +25 -9
- package/lib/adapters/vscode/vscode-reader.js +7 -1
- package/lib/sign-providers/index.js +20 -0
- package/lib/sign-providers/interface.js +82 -0
- package/lib/sign-providers/null-sign-provider.js +30 -0
- package/package.json +10 -1
|
@@ -0,0 +1,377 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 6c (Toutiao C 路径 — 2026-05-25): Node-side ToutiaoApiClient.
|
|
5
|
+
*
|
|
6
|
+
* Byte-parity port of ToutiaoApiClient.kt. Endpoints:
|
|
7
|
+
* - `/passport/account/info/v2/?aid=24` — cookies-only, no _signature
|
|
8
|
+
* - `/api/news/feed/v90/?category=__all__` — needs _signature
|
|
9
|
+
* - `/article/v2/tab_comments/` — needs _signature
|
|
10
|
+
* - `/api/search/content/` — needs _signature
|
|
11
|
+
*
|
|
12
|
+
* **signProvider injection (Phase 6c)**: defaults to NULL_SIGN_PROVIDER —
|
|
13
|
+
* signedUrl returns null, so the 3 signed endpoints short-circuit and set
|
|
14
|
+
* lastErrorCode=-99. Desktop wiring injects ToutiaoSignBridge which runs
|
|
15
|
+
* Toutiao's own acrawler.js → ~100% hit rate.
|
|
16
|
+
*
|
|
17
|
+
* **Anti-bot signal**: User-Agent must be desktop Chrome 120+. Referer +
|
|
18
|
+
* Origin = https://www.toutiao.com/. Without ttwid + __ac_nonce + msToken
|
|
19
|
+
* cookies the endpoints may return 412/403 HTML — surfaced as
|
|
20
|
+
* lastErrorCode=resp.status.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const { NULL_SIGN_PROVIDER } = require("../../sign-providers");
|
|
24
|
+
|
|
25
|
+
const DEFAULT_BASE_URL = "https://www.toutiao.com/";
|
|
26
|
+
|
|
27
|
+
const BROWSER_UA =
|
|
28
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
|
|
29
|
+
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
|
|
30
|
+
|
|
31
|
+
const BROWSER_HEADERS = Object.freeze({
|
|
32
|
+
"User-Agent": BROWSER_UA,
|
|
33
|
+
Referer: "https://www.toutiao.com/",
|
|
34
|
+
Origin: "https://www.toutiao.com",
|
|
35
|
+
Accept: "application/json, text/plain, */*",
|
|
36
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
/** Toutiao web client id (Douyin web = 2906). */
|
|
40
|
+
const AID_TOUTIAO_WEB = "24";
|
|
41
|
+
|
|
42
|
+
function normalizeMs(v) {
|
|
43
|
+
if (typeof v !== "number" || !Number.isFinite(v) || v <= 0) return 0;
|
|
44
|
+
return v > 1e12 ? v : v * 1000;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
class ToutiaoApiClient {
|
|
48
|
+
constructor(opts = {}) {
|
|
49
|
+
this.baseUrl = opts.baseUrl || DEFAULT_BASE_URL;
|
|
50
|
+
if (!this.baseUrl.endsWith("/")) this.baseUrl += "/";
|
|
51
|
+
this._fetch = opts.fetch || globalThis.fetch;
|
|
52
|
+
if (typeof this._fetch !== "function") {
|
|
53
|
+
throw new Error(
|
|
54
|
+
"ToutiaoApiClient: fetch not available — pass opts.fetch or run on Node 18+",
|
|
55
|
+
);
|
|
56
|
+
}
|
|
57
|
+
this._now = opts.now || Date.now;
|
|
58
|
+
// Phase 6c: signProvider injectable. Desktop wiring injects
|
|
59
|
+
// ToutiaoSignBridge (Electron WebContentsView running acrawler.js).
|
|
60
|
+
// CLI / tests get NULL_SIGN_PROVIDER → signUrl returns null → 3
|
|
61
|
+
// signed endpoints short-circuit with lastErrorCode=-99 (mirror of
|
|
62
|
+
// Android NullSignProvider behavior).
|
|
63
|
+
this.signProvider = opts.signProvider || NULL_SIGN_PROVIDER;
|
|
64
|
+
this.lastErrorCode = 0;
|
|
65
|
+
this.lastErrorMessage = null;
|
|
66
|
+
// Diagnostic counters — collector reads to surface "bridge upgrade
|
|
67
|
+
// succeeded" in the report. Each signed endpoint hits the bridge
|
|
68
|
+
// exactly once.
|
|
69
|
+
this._bridgeHits = 0;
|
|
70
|
+
this._fallbackHits = 0;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Extract uid from cookie header. Mirror of Kotlin extractUid:
|
|
75
|
+
* passport_uid first, then multi_sids first segment, then __ac_uid /
|
|
76
|
+
* tt_uid legacy fallback. Returns null if none present (cookie
|
|
77
|
+
* anonymous or login incomplete).
|
|
78
|
+
*/
|
|
79
|
+
extractUid(cookie) {
|
|
80
|
+
if (typeof cookie !== "string" || cookie.length === 0) {
|
|
81
|
+
this._setLastError(-1, "cookie 为空");
|
|
82
|
+
return null;
|
|
83
|
+
}
|
|
84
|
+
const passportMatch = /(?:^|; ?)passport_uid=(\d+)/.exec(cookie);
|
|
85
|
+
if (
|
|
86
|
+
passportMatch &&
|
|
87
|
+
passportMatch[1] &&
|
|
88
|
+
passportMatch[1] !== "0"
|
|
89
|
+
) {
|
|
90
|
+
this._clearLastError();
|
|
91
|
+
return passportMatch[1];
|
|
92
|
+
}
|
|
93
|
+
const multiMatch = /(?:^|; ?)multi_sids=([^;]+)/.exec(cookie);
|
|
94
|
+
if (multiMatch && multiMatch[1]) {
|
|
95
|
+
const firstUid = multiMatch[1].split(";")[0].split(":")[0].trim();
|
|
96
|
+
if (firstUid && /^\d+$/.test(firstUid) && firstUid !== "0") {
|
|
97
|
+
this._clearLastError();
|
|
98
|
+
return firstUid;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
const legacyMatch = /(?:^|; ?)(?:__ac_uid|tt_uid)=(\d+)/.exec(cookie);
|
|
102
|
+
if (legacyMatch && legacyMatch[1] && legacyMatch[1] !== "0") {
|
|
103
|
+
this._clearLastError();
|
|
104
|
+
return legacyMatch[1];
|
|
105
|
+
}
|
|
106
|
+
this._setLastError(
|
|
107
|
+
-7,
|
|
108
|
+
"cookie 缺 passport_uid / multi_sids / __ac_uid — 登录未完成或仅游客态",
|
|
109
|
+
);
|
|
110
|
+
return null;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
async _doGetJson(url, cookie, requireSign, purpose) {
|
|
114
|
+
let finalUrl = url;
|
|
115
|
+
if (requireSign) {
|
|
116
|
+
// Phase 6c: ask bridge to sign URL. NULL_SIGN_PROVIDER returns null
|
|
117
|
+
// → endpoint short-circuits with -99. Tests can inject a fake
|
|
118
|
+
// provider that returns a mutated URL.
|
|
119
|
+
const signed = await this.signProvider.signUrl(url, purpose);
|
|
120
|
+
if (!signed) {
|
|
121
|
+
this._setLastError(
|
|
122
|
+
-99,
|
|
123
|
+
"_signature unavailable (signProvider returned null — bridge not warm or rotated)",
|
|
124
|
+
);
|
|
125
|
+
this._fallbackHits += 1;
|
|
126
|
+
return null;
|
|
127
|
+
}
|
|
128
|
+
finalUrl = signed;
|
|
129
|
+
this._bridgeHits += 1;
|
|
130
|
+
}
|
|
131
|
+
const headers = { ...BROWSER_HEADERS, Cookie: cookie };
|
|
132
|
+
try {
|
|
133
|
+
const resp = await this._fetch(finalUrl.toString(), {
|
|
134
|
+
method: "GET",
|
|
135
|
+
headers,
|
|
136
|
+
});
|
|
137
|
+
const body = await resp.text();
|
|
138
|
+
if (!resp.ok) {
|
|
139
|
+
this._setLastError(resp.status, `HTTP ${resp.status}`);
|
|
140
|
+
return null;
|
|
141
|
+
}
|
|
142
|
+
const trimmed = body.trimStart();
|
|
143
|
+
if (!trimmed.startsWith("{")) {
|
|
144
|
+
this._setLastError(
|
|
145
|
+
-4,
|
|
146
|
+
"non-json (cookie expired or anti-bot triggered)",
|
|
147
|
+
);
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
150
|
+
let obj;
|
|
151
|
+
try {
|
|
152
|
+
obj = JSON.parse(body);
|
|
153
|
+
} catch (e) {
|
|
154
|
+
this._setLastError(-3, "parse: " + (e.message || String(e)));
|
|
155
|
+
return null;
|
|
156
|
+
}
|
|
157
|
+
this._clearLastError();
|
|
158
|
+
return obj;
|
|
159
|
+
} catch (e) {
|
|
160
|
+
this._setLastError(-2, "IO: " + (e.message || String(e)));
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
_setLastError(code, message) {
|
|
166
|
+
this.lastErrorCode = code;
|
|
167
|
+
this.lastErrorMessage = message;
|
|
168
|
+
}
|
|
169
|
+
_clearLastError() {
|
|
170
|
+
this.lastErrorCode = 0;
|
|
171
|
+
this.lastErrorMessage = null;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
/**
|
|
175
|
+
* Fetch /passport/account/info/v2/?aid=24 — cookies-only, no _signature
|
|
176
|
+
* required. Returns ProfileInfo or null on failure.
|
|
177
|
+
*/
|
|
178
|
+
async fetchProfile(cookie) {
|
|
179
|
+
const url = new URL("passport/account/info/v2/", this.baseUrl);
|
|
180
|
+
url.searchParams.set("aid", AID_TOUTIAO_WEB);
|
|
181
|
+
const obj = await this._doGetJson(url, cookie, false, "profile");
|
|
182
|
+
if (!obj) return null;
|
|
183
|
+
const statusCode =
|
|
184
|
+
typeof obj.status_code === "number" ? obj.status_code : null;
|
|
185
|
+
if (statusCode == null) {
|
|
186
|
+
this._setLastError(
|
|
187
|
+
-5,
|
|
188
|
+
`passport/info/v2 missing status_code (keys=[${Object.keys(obj).join(",")}])`,
|
|
189
|
+
);
|
|
190
|
+
return null;
|
|
191
|
+
}
|
|
192
|
+
if (statusCode !== 0) {
|
|
193
|
+
const msg =
|
|
194
|
+
obj.status_msg ||
|
|
195
|
+
obj.message ||
|
|
196
|
+
obj.error_description ||
|
|
197
|
+
`status_code=${statusCode}`;
|
|
198
|
+
this._setLastError(statusCode, String(msg));
|
|
199
|
+
return null;
|
|
200
|
+
}
|
|
201
|
+
const data = obj.data;
|
|
202
|
+
if (!data || typeof data !== "object") {
|
|
203
|
+
this._setLastError(-6, "status_code=0 but no `data` object");
|
|
204
|
+
return null;
|
|
205
|
+
}
|
|
206
|
+
const rawUid =
|
|
207
|
+
(data.user_id && String(data.user_id)) ||
|
|
208
|
+
(Number.isFinite(data.user_id_str) && data.user_id_str > 0 &&
|
|
209
|
+
String(data.user_id_str)) ||
|
|
210
|
+
null;
|
|
211
|
+
if (!rawUid) {
|
|
212
|
+
this._setLastError(
|
|
213
|
+
-7,
|
|
214
|
+
`ok but data lacks user_id (cookie missing sessionid?); dataKeys=[${Object.keys(data).join(",")}]`,
|
|
215
|
+
);
|
|
216
|
+
return null;
|
|
217
|
+
}
|
|
218
|
+
return {
|
|
219
|
+
uid: rawUid,
|
|
220
|
+
nickname:
|
|
221
|
+
data.screen_name ||
|
|
222
|
+
data.name ||
|
|
223
|
+
data.nickname ||
|
|
224
|
+
"(unnamed)",
|
|
225
|
+
avatarUrl: data.avatar_url || data.avatar_thumb || null,
|
|
226
|
+
mobile: data.mobile || null,
|
|
227
|
+
description: data.description || data.signature || null,
|
|
228
|
+
followingCount: Number.isFinite(data.following_count)
|
|
229
|
+
? data.following_count
|
|
230
|
+
: 0,
|
|
231
|
+
followerCount: Number.isFinite(data.followers_count)
|
|
232
|
+
? data.followers_count
|
|
233
|
+
: 0,
|
|
234
|
+
mediaId:
|
|
235
|
+
data.media_id != null && String(data.media_id) !== "0"
|
|
236
|
+
? String(data.media_id)
|
|
237
|
+
: null,
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
/**
|
|
242
|
+
* Fetch /api/news/feed/v90/?category=__all__ — recommended feed.
|
|
243
|
+
* Requires _signature. Returns FeedItem[] (empty on failure).
|
|
244
|
+
*/
|
|
245
|
+
async fetchFeed(cookie, opts = {}) {
|
|
246
|
+
const limit =
|
|
247
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 50;
|
|
248
|
+
const url = new URL("api/news/feed/v90/", this.baseUrl);
|
|
249
|
+
url.searchParams.set("category", "__all__");
|
|
250
|
+
url.searchParams.set("aid", AID_TOUTIAO_WEB);
|
|
251
|
+
url.searchParams.set("client_extra_params", "{}");
|
|
252
|
+
url.searchParams.set("count", String(limit));
|
|
253
|
+
const obj = await this._doGetJson(url, cookie, true, "feed");
|
|
254
|
+
if (!obj) return [];
|
|
255
|
+
const arr = Array.isArray(obj.data) ? obj.data : [];
|
|
256
|
+
const out = [];
|
|
257
|
+
const cap = Math.min(limit, arr.length);
|
|
258
|
+
for (let i = 0; i < cap; i++) {
|
|
259
|
+
const raw = arr[i];
|
|
260
|
+
if (!raw || typeof raw !== "object") continue;
|
|
261
|
+
// Some feed cells have the real article nested under raw_data
|
|
262
|
+
// (encoded JSON string); others are top-level.
|
|
263
|
+
let item = raw;
|
|
264
|
+
if (typeof raw.raw_data === "string") {
|
|
265
|
+
try {
|
|
266
|
+
item = JSON.parse(raw.raw_data);
|
|
267
|
+
} catch {
|
|
268
|
+
item = raw;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
const id =
|
|
272
|
+
(item.group_id && String(item.group_id)) ||
|
|
273
|
+
(item.item_id && String(item.item_id)) ||
|
|
274
|
+
null;
|
|
275
|
+
if (!id) continue;
|
|
276
|
+
out.push({
|
|
277
|
+
itemId: id,
|
|
278
|
+
title: item.title || "(no title)",
|
|
279
|
+
category: item.category || raw.category || null,
|
|
280
|
+
author:
|
|
281
|
+
(item.user_info && item.user_info.name) || item.source || null,
|
|
282
|
+
publishedAt: normalizeMs(item.behot_time || item.publish_time || 0),
|
|
283
|
+
readDuration: Number.isFinite(item.read_duration)
|
|
284
|
+
? item.read_duration
|
|
285
|
+
: 0,
|
|
286
|
+
source: item.source || null,
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
return out;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Fetch /article/v2/tab_comments/ — "tab_comments" is misleading; this
|
|
294
|
+
* is the user's saved-articles list. Requires _signature.
|
|
295
|
+
*/
|
|
296
|
+
async fetchCollection(cookie, opts = {}) {
|
|
297
|
+
const limit =
|
|
298
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 200;
|
|
299
|
+
const url = new URL("article/v2/tab_comments/", this.baseUrl);
|
|
300
|
+
url.searchParams.set("aid", AID_TOUTIAO_WEB);
|
|
301
|
+
url.searchParams.set("count", String(limit));
|
|
302
|
+
const obj = await this._doGetJson(url, cookie, true, "comments");
|
|
303
|
+
if (!obj) return [];
|
|
304
|
+
const arr = Array.isArray(obj.data) ? obj.data : [];
|
|
305
|
+
const out = [];
|
|
306
|
+
const cap = Math.min(limit, arr.length);
|
|
307
|
+
for (let i = 0; i < cap; i++) {
|
|
308
|
+
const item = arr[i];
|
|
309
|
+
if (!item || typeof item !== "object") continue;
|
|
310
|
+
const id =
|
|
311
|
+
(item.group_id && String(item.group_id)) ||
|
|
312
|
+
(item.item_id && String(item.item_id)) ||
|
|
313
|
+
null;
|
|
314
|
+
if (!id) continue;
|
|
315
|
+
out.push({
|
|
316
|
+
itemId: id,
|
|
317
|
+
title: item.title || "(no title)",
|
|
318
|
+
category: item.category || null,
|
|
319
|
+
author:
|
|
320
|
+
(item.user_info && item.user_info.name) || item.source || null,
|
|
321
|
+
savedAt: normalizeMs(item.behot_time || item.create_time || 0),
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
return out;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
/**
|
|
328
|
+
* Fetch /api/search/content/ — search history. Requires _signature.
|
|
329
|
+
* Two response shapes observed (data.user_search_history vs
|
|
330
|
+
* data.search_history); we try both.
|
|
331
|
+
*/
|
|
332
|
+
async fetchSearchHistory(cookie, opts = {}) {
|
|
333
|
+
const limit =
|
|
334
|
+
Number.isInteger(opts.limit) && opts.limit > 0 ? opts.limit : 100;
|
|
335
|
+
const url = new URL("api/search/content/", this.baseUrl);
|
|
336
|
+
url.searchParams.set("aid", AID_TOUTIAO_WEB);
|
|
337
|
+
url.searchParams.set("keyword", "");
|
|
338
|
+
url.searchParams.set("count", String(limit));
|
|
339
|
+
const obj = await this._doGetJson(url, cookie, true, "search");
|
|
340
|
+
if (!obj) return [];
|
|
341
|
+
const data = obj.data;
|
|
342
|
+
if (!data || typeof data !== "object") return [];
|
|
343
|
+
const arr = Array.isArray(data.user_search_history)
|
|
344
|
+
? data.user_search_history
|
|
345
|
+
: Array.isArray(data.search_history)
|
|
346
|
+
? data.search_history
|
|
347
|
+
: [];
|
|
348
|
+
const out = [];
|
|
349
|
+
const cap = Math.min(limit, arr.length);
|
|
350
|
+
const now = this._now();
|
|
351
|
+
for (let i = 0; i < cap; i++) {
|
|
352
|
+
const raw = arr[i];
|
|
353
|
+
let keyword = null;
|
|
354
|
+
let ts = 0;
|
|
355
|
+
if (raw && typeof raw === "object") {
|
|
356
|
+
keyword = raw.keyword || raw.query || null;
|
|
357
|
+
ts = normalizeMs(raw.time || raw.search_time || 0);
|
|
358
|
+
} else if (typeof raw === "string") {
|
|
359
|
+
keyword = raw;
|
|
360
|
+
ts = now - i * 1000;
|
|
361
|
+
}
|
|
362
|
+
if (!keyword) continue;
|
|
363
|
+
out.push({ keyword, searchedAt: ts });
|
|
364
|
+
}
|
|
365
|
+
return out;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
module.exports = {
|
|
370
|
+
ToutiaoApiClient,
|
|
371
|
+
_internals: {
|
|
372
|
+
AID_TOUTIAO_WEB,
|
|
373
|
+
BROWSER_UA,
|
|
374
|
+
BROWSER_HEADERS,
|
|
375
|
+
normalizeMs,
|
|
376
|
+
},
|
|
377
|
+
};
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 6c (Toutiao C 路径 — 2026-05-25): end-to-end orchestrator.
|
|
5
|
+
*
|
|
6
|
+
* bridge.invoke("toutiao.cookies") ← Phase 6c cookies extension
|
|
7
|
+
* │
|
|
8
|
+
* ▼ {cookie, uid, diagnostic}
|
|
9
|
+
* ToutiaoApiClient.fetchProfile ← passport endpoint (no _sig)
|
|
10
|
+
* │
|
|
11
|
+
* ▼ ProfileInfo
|
|
12
|
+
* signProvider.warmUp(cookie) ← Phase 6c bridge ready
|
|
13
|
+
* │
|
|
14
|
+
* ▼
|
|
15
|
+
* fetchFeed + fetchCollection + fetchSearchHistory (parallel, _signature)
|
|
16
|
+
* │
|
|
17
|
+
* ▼ 3 arrays (partial-failure OK; bridge ~100%, fallback 0%)
|
|
18
|
+
* buildSnapshot + writeSnapshotJson ← schemaVersion=1
|
|
19
|
+
* │
|
|
20
|
+
* ▼
|
|
21
|
+
* registry.syncAdapter("social-toutiao", { inputPath })
|
|
22
|
+
*
|
|
23
|
+
* Mirror of social-xiaohongshu-adb/collector.js but with URL-mutation
|
|
24
|
+
* signing (signProvider.signUrl) vs Xhs's header signing.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
const { ToutiaoApiClient } = require("./api-client");
|
|
28
|
+
const {
|
|
29
|
+
buildSnapshot,
|
|
30
|
+
writeSnapshotJson,
|
|
31
|
+
cleanupSnapshotJson,
|
|
32
|
+
} = require("./snapshot-builder");
|
|
33
|
+
|
|
34
|
+
async function collect(bridge, opts = {}) {
|
|
35
|
+
if (!bridge || typeof bridge.invoke !== "function") {
|
|
36
|
+
throw new TypeError(
|
|
37
|
+
"ToutiaoAdbCollector.collect: bridge must expose invoke(method, params)",
|
|
38
|
+
);
|
|
39
|
+
}
|
|
40
|
+
const now = opts.now || Date.now;
|
|
41
|
+
const signProvider = opts.signProvider || undefined;
|
|
42
|
+
const client =
|
|
43
|
+
opts.apiClient || new ToutiaoApiClient({ now, signProvider });
|
|
44
|
+
const limits = opts.limits || {};
|
|
45
|
+
|
|
46
|
+
const cookieResult = await bridge.invoke("toutiao.cookies");
|
|
47
|
+
if (
|
|
48
|
+
!cookieResult ||
|
|
49
|
+
typeof cookieResult.cookie !== "string"
|
|
50
|
+
) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
"ToutiaoAdbCollector.collect: bridge.invoke('toutiao.cookies') returned malformed payload — got cookie=" +
|
|
53
|
+
typeof cookieResult?.cookie,
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
const { cookie, uid: cookieUid, diagnostic: cookieDiagnostic } = cookieResult;
|
|
57
|
+
|
|
58
|
+
// Warm up the bridge before signed endpoints. Feature-detect because
|
|
59
|
+
// NullSignProvider doesn't define warmUp.
|
|
60
|
+
if (signProvider && typeof signProvider.warmUp === "function") {
|
|
61
|
+
try {
|
|
62
|
+
await signProvider.warmUp(cookie);
|
|
63
|
+
} catch (e) {
|
|
64
|
+
// Bridge warm-up failed — fall through. api-client will short-
|
|
65
|
+
// circuit signed endpoints with -99 since signUrl returns null.
|
|
66
|
+
client._setLastError(
|
|
67
|
+
-98,
|
|
68
|
+
`signProvider warm-up failed: ${e && e.message ? e.message : String(e)}`,
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
try {
|
|
74
|
+
// fetchProfile — passport endpoint, no _signature required.
|
|
75
|
+
const profile = await client.fetchProfile(cookie);
|
|
76
|
+
if (!profile) {
|
|
77
|
+
// Cookie expired or sessionid missing — emit empty snapshot using
|
|
78
|
+
// best-effort cookie-derived uid (or sentinel if also absent).
|
|
79
|
+
const uid = cookieUid || "unknown-user";
|
|
80
|
+
const snapshot = buildSnapshot({
|
|
81
|
+
uid,
|
|
82
|
+
displayName: opts.displayName,
|
|
83
|
+
snapshottedAt: now(),
|
|
84
|
+
});
|
|
85
|
+
const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
|
|
86
|
+
return {
|
|
87
|
+
snapshotPath,
|
|
88
|
+
uid: cookieUid,
|
|
89
|
+
nickname: null,
|
|
90
|
+
eventCounts: { profile: 0, feed: 0, collection: 0, search: 0, total: 0 },
|
|
91
|
+
lastErrorCode: client.lastErrorCode,
|
|
92
|
+
lastErrorMessage: client.lastErrorMessage,
|
|
93
|
+
cookieDiagnostic: cookieDiagnostic || null,
|
|
94
|
+
profileFetchFailed: true,
|
|
95
|
+
signProviderUsed: signProvider
|
|
96
|
+
? signProvider.constructor.name
|
|
97
|
+
: "none",
|
|
98
|
+
signProviderHits: client._bridgeHits,
|
|
99
|
+
signProviderFallbacks: client._fallbackHits,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Parallel 3 signed endpoints — partial failure tolerated.
|
|
104
|
+
const [feed, collection, search] = await Promise.all([
|
|
105
|
+
client.fetchFeed(cookie, {
|
|
106
|
+
limit: Number.isInteger(limits.feed) ? limits.feed : undefined,
|
|
107
|
+
}),
|
|
108
|
+
client.fetchCollection(cookie, {
|
|
109
|
+
limit: Number.isInteger(limits.collection)
|
|
110
|
+
? limits.collection
|
|
111
|
+
: undefined,
|
|
112
|
+
}),
|
|
113
|
+
client.fetchSearchHistory(cookie, {
|
|
114
|
+
limit: Number.isInteger(limits.search) ? limits.search : undefined,
|
|
115
|
+
}),
|
|
116
|
+
]);
|
|
117
|
+
|
|
118
|
+
const snapshot = buildSnapshot({
|
|
119
|
+
uid: profile.uid,
|
|
120
|
+
displayName: opts.displayName || profile.nickname,
|
|
121
|
+
profile,
|
|
122
|
+
feed,
|
|
123
|
+
collection,
|
|
124
|
+
search,
|
|
125
|
+
snapshottedAt: now(),
|
|
126
|
+
});
|
|
127
|
+
const snapshotPath = writeSnapshotJson(snapshot, { dir: opts.stagingDir });
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
snapshotPath,
|
|
131
|
+
uid: profile.uid,
|
|
132
|
+
nickname: profile.nickname,
|
|
133
|
+
eventCounts: {
|
|
134
|
+
profile: 1,
|
|
135
|
+
feed: feed.length,
|
|
136
|
+
collection: collection.length,
|
|
137
|
+
search: search.length,
|
|
138
|
+
total: snapshot.events.length,
|
|
139
|
+
},
|
|
140
|
+
lastErrorCode: client.lastErrorCode,
|
|
141
|
+
lastErrorMessage: client.lastErrorMessage,
|
|
142
|
+
cookieDiagnostic: cookieDiagnostic || null,
|
|
143
|
+
profileFetchFailed: false,
|
|
144
|
+
signProviderUsed: signProvider ? signProvider.constructor.name : "none",
|
|
145
|
+
signProviderHits: client._bridgeHits,
|
|
146
|
+
signProviderFallbacks: client._fallbackHits,
|
|
147
|
+
};
|
|
148
|
+
} finally {
|
|
149
|
+
if (signProvider && typeof signProvider.shutdown === "function") {
|
|
150
|
+
try {
|
|
151
|
+
await signProvider.shutdown();
|
|
152
|
+
} catch (_e) {
|
|
153
|
+
// Best-effort — shutdown errors don't block sync result.
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
async function collectAndSync(bridge, registry, opts = {}) {
|
|
160
|
+
if (!registry || typeof registry.syncAdapter !== "function") {
|
|
161
|
+
throw new TypeError(
|
|
162
|
+
"ToutiaoAdbCollector.collectAndSync: registry must expose syncAdapter(name, options)",
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
const collectResult = await collect(bridge, opts);
|
|
166
|
+
let syncReport = null;
|
|
167
|
+
let cleanupFailed = false;
|
|
168
|
+
try {
|
|
169
|
+
syncReport = await registry.syncAdapter("social-toutiao", {
|
|
170
|
+
inputPath: collectResult.snapshotPath,
|
|
171
|
+
});
|
|
172
|
+
} finally {
|
|
173
|
+
try {
|
|
174
|
+
cleanupSnapshotJson(collectResult.snapshotPath);
|
|
175
|
+
} catch (_e) {
|
|
176
|
+
cleanupFailed = true;
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
return {
|
|
180
|
+
...syncReport,
|
|
181
|
+
toutiao: {
|
|
182
|
+
uid: collectResult.uid,
|
|
183
|
+
nickname: collectResult.nickname,
|
|
184
|
+
eventCounts: collectResult.eventCounts,
|
|
185
|
+
lastErrorCode: collectResult.lastErrorCode,
|
|
186
|
+
lastErrorMessage: collectResult.lastErrorMessage,
|
|
187
|
+
cookieDiagnostic: collectResult.cookieDiagnostic,
|
|
188
|
+
profileFetchFailed: collectResult.profileFetchFailed,
|
|
189
|
+
signProviderUsed: collectResult.signProviderUsed,
|
|
190
|
+
signProviderHits: collectResult.signProviderHits,
|
|
191
|
+
signProviderFallbacks: collectResult.signProviderFallbacks,
|
|
192
|
+
cleanupFailed,
|
|
193
|
+
},
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
module.exports = {
|
|
198
|
+
collect,
|
|
199
|
+
collectAndSync,
|
|
200
|
+
};
|