@chainlesschain/personal-data-hub 0.3.6 → 0.3.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/__tests__/adapters/social-kuaishou-adb-api-client.test.js +432 -0
- package/__tests__/adapters/social-kuaishou-adb-collector.test.js +276 -0
- package/__tests__/adapters/social-kuaishou-adb-cookies-extension.test.js +141 -0
- package/__tests__/adapters/social-kuaishou-adb-snapshot-builder.test.js +178 -0
- package/__tests__/adapters/social-toutiao-adb-api-client.test.js +537 -0
- package/__tests__/adapters/social-toutiao-adb-collector.test.js +285 -0
- package/__tests__/adapters/social-toutiao-adb-cookies-extension.test.js +163 -0
- package/__tests__/adapters/social-toutiao-adb-snapshot-builder.test.js +196 -0
- package/__tests__/adapters/social-xiaohongshu-adb-sign-provider-injection.test.js +351 -0
- package/lib/adapters/social-kuaishou-adb/api-client.js +397 -0
- package/lib/adapters/social-kuaishou-adb/collector.js +196 -0
- package/lib/adapters/social-kuaishou-adb/cookies-extension.js +261 -0
- package/lib/adapters/social-kuaishou-adb/index.js +53 -0
- package/lib/adapters/social-kuaishou-adb/snapshot-builder.js +145 -0
- package/lib/adapters/social-toutiao-adb/api-client.js +377 -0
- package/lib/adapters/social-toutiao-adb/collector.js +200 -0
- package/lib/adapters/social-toutiao-adb/cookies-extension.js +266 -0
- package/lib/adapters/social-toutiao-adb/index.js +52 -0
- package/lib/adapters/social-toutiao-adb/snapshot-builder.js +148 -0
- package/lib/adapters/social-xiaohongshu-adb/api-client.js +36 -5
- package/lib/adapters/social-xiaohongshu-adb/collector.js +102 -51
- package/package.json +5 -1
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 6c (Toutiao C 路径 — 2026-05-25): toutiao.cookies ADB extension factory.
|
|
5
|
+
*
|
|
6
|
+
* Mirror of `social-xiaohongshu-adb/cookies-extension.js` (P3c). Reads the
|
|
7
|
+
* Chromium cookies SQLite from Toutiao Android app (com.ss.android.article.news)
|
|
8
|
+
* via `su -c base64`. Returns Cookie header + pre-extracted uid (if present
|
|
9
|
+
* in cookie) + a session-id liveness flag.
|
|
10
|
+
*
|
|
11
|
+
* Required cookies (without these, fetchProfile returns status_code != 0):
|
|
12
|
+
* - one of `passport_uid` / `multi_sids` / `__ac_uid` → uid identification
|
|
13
|
+
* - `sessionid` OR `sessionid_ss` → passport endpoint auth
|
|
14
|
+
*
|
|
15
|
+
* Either missing → TOUTIAO_COOKIES_INCOMPLETE so UI surfaces a "relog on
|
|
16
|
+
* phone" banner. _signature endpoint cookies (msToken / __ac_nonce / ttwid)
|
|
17
|
+
* rotate fast (5-15min) so we don't fail-fast on them — fetchProfile will
|
|
18
|
+
* reveal whether they're stale.
|
|
19
|
+
*
|
|
20
|
+
* Returns:
|
|
21
|
+
* {
|
|
22
|
+
* cookie: string,
|
|
23
|
+
* uid: string|null, // best-effort uid pre-extract (may be null
|
|
24
|
+
* // if cookie only has session — caller
|
|
25
|
+
* // falls back to api.fetchProfile)
|
|
26
|
+
* extractedAt: number,
|
|
27
|
+
* diagnostic: {
|
|
28
|
+
* cookieCount: number,
|
|
29
|
+
* hadEncrypted: boolean,
|
|
30
|
+
* cookieNames: string[],
|
|
31
|
+
* }
|
|
32
|
+
* }
|
|
33
|
+
*/
|
|
34
|
+
|
|
35
|
+
const fs = require("node:fs");
|
|
36
|
+
const path = require("node:path");
|
|
37
|
+
const os = require("node:os");
|
|
38
|
+
const crypto = require("node:crypto");
|
|
39
|
+
|
|
40
|
+
const {
|
|
41
|
+
readChromiumCookies,
|
|
42
|
+
} = require("../social-bilibili-adb/chromium-cookies-reader");
|
|
43
|
+
|
|
44
|
+
const TOUTIAO_COOKIES_REMOTE_PATH =
|
|
45
|
+
"/data/data/com.ss.android.article.news/app_webview/Default/Cookies";
|
|
46
|
+
|
|
47
|
+
const TOUTIAO_COOKIE_HOST_DOMAIN = "toutiao.com";
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Lenient — at least one of sessionid / sessionid_ss must be present, AND
|
|
51
|
+
* at least one of the uid candidates. Toutiao's anti-bot cookies (msToken /
|
|
52
|
+
* __ac_nonce / ttwid) rotate fast so we don't fail-fast on them.
|
|
53
|
+
*/
|
|
54
|
+
const TOUTIAO_SESSION_COOKIES = Object.freeze(["sessionid", "sessionid_ss"]);
|
|
55
|
+
const TOUTIAO_UID_COOKIES = Object.freeze([
|
|
56
|
+
"passport_uid",
|
|
57
|
+
"multi_sids",
|
|
58
|
+
"__ac_uid",
|
|
59
|
+
"tt_uid",
|
|
60
|
+
]);
|
|
61
|
+
|
|
62
|
+
async function pullCookiesViaSu(adb, serial, opts) {
|
|
63
|
+
const adbOpts = { serial, timeoutMs: opts?.timeoutMs || 60_000 };
|
|
64
|
+
const lsOut = await adb(
|
|
65
|
+
[
|
|
66
|
+
"shell",
|
|
67
|
+
"su",
|
|
68
|
+
"-c",
|
|
69
|
+
`ls ${TOUTIAO_COOKIES_REMOTE_PATH} 2>/dev/null || echo NOT_FOUND`,
|
|
70
|
+
],
|
|
71
|
+
adbOpts,
|
|
72
|
+
);
|
|
73
|
+
const lsLine = lsOut.replace(/\r+$/gm, "").trim();
|
|
74
|
+
if (lsLine === "NOT_FOUND" || lsLine === "") {
|
|
75
|
+
throw new Error(
|
|
76
|
+
"TOUTIAO_NOT_INSTALLED: " +
|
|
77
|
+
TOUTIAO_COOKIES_REMOTE_PATH +
|
|
78
|
+
" not found. Install Toutiao App (今日头条 com.ss.android.article.news) + log in once, then retry. Note: 极速版 (.lite) uses a different package — only the standard app is supported.",
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
const idOut = await adb(["shell", "su", "-c", "id -u"], adbOpts);
|
|
82
|
+
const idLine = idOut.replace(/\r+$/gm, "").trim();
|
|
83
|
+
if (idLine !== "0" && !idLine.includes("uid=0")) {
|
|
84
|
+
throw new Error(
|
|
85
|
+
"TOUTIAO_NO_ROOT: this phone isn't rooted (su returned `" +
|
|
86
|
+
idLine.substring(0, 60) +
|
|
87
|
+
"`). Toutiao release APK isn't debuggable, so root is required to read the Chromium cookies DB.",
|
|
88
|
+
);
|
|
89
|
+
}
|
|
90
|
+
const b64 = await adb(
|
|
91
|
+
[
|
|
92
|
+
"shell",
|
|
93
|
+
"su",
|
|
94
|
+
"-c",
|
|
95
|
+
`base64 ${TOUTIAO_COOKIES_REMOTE_PATH} | tr -d '\\n\\r'`,
|
|
96
|
+
],
|
|
97
|
+
{ ...adbOpts, timeoutMs: opts?.timeoutMs || 60_000 },
|
|
98
|
+
);
|
|
99
|
+
const b64Clean = b64.replace(/[\r\n\t ]+/g, "");
|
|
100
|
+
if (b64Clean.length === 0) {
|
|
101
|
+
throw new Error(
|
|
102
|
+
"TOUTIAO_COOKIES_EMPTY: base64 stream returned 0 bytes (su exec may have silently failed on MIUI / OEM ROM)",
|
|
103
|
+
);
|
|
104
|
+
}
|
|
105
|
+
let buf;
|
|
106
|
+
try {
|
|
107
|
+
buf = Buffer.from(b64Clean, "base64");
|
|
108
|
+
} catch (e) {
|
|
109
|
+
throw new Error(
|
|
110
|
+
"TOUTIAO_BASE64_PARSE: stream wasn't valid base64 (" +
|
|
111
|
+
(e.message || String(e)) +
|
|
112
|
+
")",
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
if (buf.length < 1024) {
|
|
116
|
+
throw new Error(
|
|
117
|
+
"TOUTIAO_COOKIES_TRUNCATED: decoded file is only " +
|
|
118
|
+
buf.length +
|
|
119
|
+
" bytes — expected ≥4KB sqlite",
|
|
120
|
+
);
|
|
121
|
+
}
|
|
122
|
+
const magic = buf.subarray(0, 16).toString("latin1");
|
|
123
|
+
if (!magic.startsWith("SQLite format 3")) {
|
|
124
|
+
throw new Error(
|
|
125
|
+
"TOUTIAO_NOT_SQLITE: decoded file lacks `SQLite format 3` magic header",
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
const tmpDir = os.tmpdir();
|
|
129
|
+
const tmpFile = path.join(
|
|
130
|
+
tmpDir,
|
|
131
|
+
`cc-toutiao-cookies-${crypto.randomUUID()}.db`,
|
|
132
|
+
);
|
|
133
|
+
fs.writeFileSync(tmpFile, buf);
|
|
134
|
+
return tmpFile;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* Build a Cookie header from a chromium-cookies array. Picks the
|
|
139
|
+
* longest hostKey when multiple entries share a name (mirror of Xhs +
|
|
140
|
+
* Weibo logic — handles .toutiao.com vs www.toutiao.com duplicates).
|
|
141
|
+
*
|
|
142
|
+
* Extracts uid best-effort: passport_uid > multi_sids first segment >
|
|
143
|
+
* __ac_uid > tt_uid. Returns null uid if no candidate present.
|
|
144
|
+
*/
|
|
145
|
+
function assembleToutiaoCookieHeader(cookies) {
|
|
146
|
+
if (!Array.isArray(cookies)) {
|
|
147
|
+
throw new TypeError(
|
|
148
|
+
"assembleToutiaoCookieHeader: cookies must be an array",
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
const byName = new Map();
|
|
152
|
+
for (const c of cookies) {
|
|
153
|
+
if (
|
|
154
|
+
!byName.has(c.name) ||
|
|
155
|
+
c.hostKey.length > (byName.get(c.name).hostKey || "").length
|
|
156
|
+
) {
|
|
157
|
+
byName.set(c.name, c);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
const hasSession = TOUTIAO_SESSION_COOKIES.some((n) => byName.has(n));
|
|
161
|
+
const uid = pickUidFromCookieMap(byName);
|
|
162
|
+
const present = new Set(byName.keys());
|
|
163
|
+
if (!hasSession) {
|
|
164
|
+
return {
|
|
165
|
+
header: null,
|
|
166
|
+
uid: null,
|
|
167
|
+
present,
|
|
168
|
+
missing: TOUTIAO_SESSION_COOKIES.filter((n) => !byName.has(n)),
|
|
169
|
+
};
|
|
170
|
+
}
|
|
171
|
+
const header = Array.from(byName.values())
|
|
172
|
+
.map((c) => `${c.name}=${c.value}`)
|
|
173
|
+
.join("; ");
|
|
174
|
+
return { header, uid, present, missing: [] };
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
function pickUidFromCookieMap(byName) {
|
|
178
|
+
const passport = byName.get("passport_uid")?.value;
|
|
179
|
+
if (passport && /^\d+$/.test(passport) && passport !== "0") {
|
|
180
|
+
return passport;
|
|
181
|
+
}
|
|
182
|
+
const multi = byName.get("multi_sids")?.value;
|
|
183
|
+
if (multi) {
|
|
184
|
+
// Format "12345:abcd;67890:efgh" — pick first uid segment
|
|
185
|
+
const firstUid = multi.split(";")[0].split(":")[0].trim();
|
|
186
|
+
if (firstUid && /^\d+$/.test(firstUid) && firstUid !== "0") {
|
|
187
|
+
return firstUid;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
const acUid = byName.get("__ac_uid")?.value;
|
|
191
|
+
if (acUid && /^\d+$/.test(acUid) && acUid !== "0") {
|
|
192
|
+
return acUid;
|
|
193
|
+
}
|
|
194
|
+
const ttUid = byName.get("tt_uid")?.value;
|
|
195
|
+
if (ttUid && /^\d+$/.test(ttUid) && ttUid !== "0") {
|
|
196
|
+
return ttUid;
|
|
197
|
+
}
|
|
198
|
+
return null;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function createToutiaoCookiesExtension(factoryOpts = {}) {
|
|
202
|
+
const timeoutMs = factoryOpts.timeoutMs || 60_000;
|
|
203
|
+
const onCleanupFailed = factoryOpts.onCleanupFailed || (() => {});
|
|
204
|
+
|
|
205
|
+
return async function toutiaoCookiesHandler(_params, ctx) {
|
|
206
|
+
if (
|
|
207
|
+
!ctx ||
|
|
208
|
+
typeof ctx.adb !== "function" ||
|
|
209
|
+
typeof ctx.pickDevice !== "function"
|
|
210
|
+
) {
|
|
211
|
+
throw new TypeError(
|
|
212
|
+
"toutiao.cookies extension: ctx must provide {adb, pickDevice}",
|
|
213
|
+
);
|
|
214
|
+
}
|
|
215
|
+
const serial = await ctx.pickDevice();
|
|
216
|
+
let tmpFile = null;
|
|
217
|
+
try {
|
|
218
|
+
tmpFile = await pullCookiesViaSu(ctx.adb, serial, { timeoutMs });
|
|
219
|
+
const cookies = readChromiumCookies(tmpFile, TOUTIAO_COOKIE_HOST_DOMAIN);
|
|
220
|
+
const cookieCount = cookies.length;
|
|
221
|
+
const hadEncrypted = (cookies._skippedEncryptedCount || 0) > 0;
|
|
222
|
+
const { header, uid, missing, present } =
|
|
223
|
+
assembleToutiaoCookieHeader(cookies);
|
|
224
|
+
if (header === null) {
|
|
225
|
+
throw new Error(
|
|
226
|
+
"TOUTIAO_COOKIES_INCOMPLETE: missing required session cookies " +
|
|
227
|
+
JSON.stringify(missing) +
|
|
228
|
+
". Likely the user logged out, or has never logged in via the Toutiao app's WebView (open any article link to populate). hadEncrypted=" +
|
|
229
|
+
hadEncrypted +
|
|
230
|
+
".",
|
|
231
|
+
);
|
|
232
|
+
}
|
|
233
|
+
return {
|
|
234
|
+
cookie: header,
|
|
235
|
+
uid,
|
|
236
|
+
extractedAt: Date.now(),
|
|
237
|
+
diagnostic: {
|
|
238
|
+
cookieCount,
|
|
239
|
+
hadEncrypted,
|
|
240
|
+
cookieNames: Array.from(present),
|
|
241
|
+
},
|
|
242
|
+
};
|
|
243
|
+
} finally {
|
|
244
|
+
if (tmpFile) {
|
|
245
|
+
try {
|
|
246
|
+
fs.unlinkSync(tmpFile);
|
|
247
|
+
} catch (_e) {
|
|
248
|
+
onCleanupFailed(tmpFile);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
module.exports = {
|
|
256
|
+
createToutiaoCookiesExtension,
|
|
257
|
+
TOUTIAO_COOKIES_REMOTE_PATH,
|
|
258
|
+
TOUTIAO_COOKIE_HOST_DOMAIN,
|
|
259
|
+
TOUTIAO_SESSION_COOKIES,
|
|
260
|
+
TOUTIAO_UID_COOKIES,
|
|
261
|
+
assembleToutiaoCookieHeader,
|
|
262
|
+
_internals: {
|
|
263
|
+
pullCookiesViaSu,
|
|
264
|
+
pickUidFromCookieMap,
|
|
265
|
+
},
|
|
266
|
+
};
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* social-toutiao-adb — Phase 6c (Toutiao C 路径) entry.
|
|
5
|
+
*
|
|
6
|
+
* Desktop ADB pulls Chromium cookies from Toutiao Android app
|
|
7
|
+
* (com.ss.android.article.news) via `su -c base64`, then runs Toutiao web
|
|
8
|
+
* HTTP via ToutiaoApiClient. _signature path: desktop wiring injects
|
|
9
|
+
* ToutiaoSignBridge (Electron WebContentsView running acrawler.js, ~100%
|
|
10
|
+
* hit rate); CLI / test contexts get NullSignProvider → signed endpoints
|
|
11
|
+
* short-circuit with lastErrorCode=-99.
|
|
12
|
+
*
|
|
13
|
+
* Pipeline:
|
|
14
|
+
* bridge.invoke("toutiao.cookies") → {cookie, uid}
|
|
15
|
+
* → ToutiaoApiClient.fetchProfile (no _sig)
|
|
16
|
+
* → fetchFeed + fetchCollection + fetchSearchHistory (signed, parallel)
|
|
17
|
+
* → buildSnapshot + writeSnapshotJson
|
|
18
|
+
* → registry.syncAdapter("social-toutiao", { inputPath })
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
const {
|
|
22
|
+
createToutiaoCookiesExtension,
|
|
23
|
+
TOUTIAO_COOKIES_REMOTE_PATH,
|
|
24
|
+
TOUTIAO_COOKIE_HOST_DOMAIN,
|
|
25
|
+
TOUTIAO_SESSION_COOKIES,
|
|
26
|
+
TOUTIAO_UID_COOKIES,
|
|
27
|
+
assembleToutiaoCookieHeader,
|
|
28
|
+
} = require("./cookies-extension");
|
|
29
|
+
const { ToutiaoApiClient } = require("./api-client");
|
|
30
|
+
const {
|
|
31
|
+
buildSnapshot,
|
|
32
|
+
writeSnapshotJson,
|
|
33
|
+
cleanupSnapshotJson,
|
|
34
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
35
|
+
} = require("./snapshot-builder");
|
|
36
|
+
const { collect, collectAndSync } = require("./collector");
|
|
37
|
+
|
|
38
|
+
module.exports = {
|
|
39
|
+
createToutiaoCookiesExtension,
|
|
40
|
+
TOUTIAO_COOKIES_REMOTE_PATH,
|
|
41
|
+
TOUTIAO_COOKIE_HOST_DOMAIN,
|
|
42
|
+
TOUTIAO_SESSION_COOKIES,
|
|
43
|
+
TOUTIAO_UID_COOKIES,
|
|
44
|
+
assembleToutiaoCookieHeader,
|
|
45
|
+
ToutiaoApiClient,
|
|
46
|
+
buildSnapshot,
|
|
47
|
+
writeSnapshotJson,
|
|
48
|
+
cleanupSnapshotJson,
|
|
49
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
50
|
+
collect,
|
|
51
|
+
collectAndSync,
|
|
52
|
+
};
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Phase 6c (Toutiao C 路径 — 2026-05-25): API responses → snapshot JSON.
|
|
5
|
+
*
|
|
6
|
+
* Matches the existing `social-toutiao` adapter's snapshot mode schema
|
|
7
|
+
* (schemaVersion=1). Kinds: profile / read / collection / search.
|
|
8
|
+
*
|
|
9
|
+
* Toutiao uid is numeric string; account.uid is set verbatim.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
const fs = require("node:fs");
|
|
13
|
+
const path = require("node:path");
|
|
14
|
+
const os = require("node:os");
|
|
15
|
+
const crypto = require("node:crypto");
|
|
16
|
+
|
|
17
|
+
const SNAPSHOT_SCHEMA_VERSION = 1;
|
|
18
|
+
|
|
19
|
+
function buildSnapshot(input) {
|
|
20
|
+
if (!input || typeof input !== "object") {
|
|
21
|
+
throw new TypeError("buildSnapshot: input must be an object");
|
|
22
|
+
}
|
|
23
|
+
const uid = input.uid;
|
|
24
|
+
if (typeof uid !== "string" || uid.length === 0) {
|
|
25
|
+
throw new TypeError("buildSnapshot: input.uid must be a non-empty string");
|
|
26
|
+
}
|
|
27
|
+
const snapshottedAt =
|
|
28
|
+
Number.isFinite(input.snapshottedAt) && input.snapshottedAt > 0
|
|
29
|
+
? input.snapshottedAt
|
|
30
|
+
: Date.now();
|
|
31
|
+
const account = {
|
|
32
|
+
uid,
|
|
33
|
+
displayName: typeof input.displayName === "string" ? input.displayName : "",
|
|
34
|
+
};
|
|
35
|
+
const events = [];
|
|
36
|
+
|
|
37
|
+
// profile (1 event — matches Android collector emitting one profile event
|
|
38
|
+
// per snapshot — adapter normalize() upserts the person record).
|
|
39
|
+
if (input.profile && typeof input.profile === "object") {
|
|
40
|
+
const p = input.profile;
|
|
41
|
+
events.push({
|
|
42
|
+
kind: "profile",
|
|
43
|
+
id: `profile-${uid}`,
|
|
44
|
+
capturedAt: snapshottedAt,
|
|
45
|
+
uid,
|
|
46
|
+
nickname: p.nickname || account.displayName,
|
|
47
|
+
avatarUrl: p.avatarUrl || null,
|
|
48
|
+
mobile: p.mobile || null,
|
|
49
|
+
description: p.description || null,
|
|
50
|
+
followingCount:
|
|
51
|
+
typeof p.followingCount === "number" ? p.followingCount : 0,
|
|
52
|
+
followerCount:
|
|
53
|
+
typeof p.followerCount === "number" ? p.followerCount : 0,
|
|
54
|
+
mediaId: p.mediaId || null,
|
|
55
|
+
});
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// read history
|
|
59
|
+
const feed = Array.isArray(input.feed) ? input.feed : [];
|
|
60
|
+
feed.forEach((f, idx) => {
|
|
61
|
+
if (!f || typeof f !== "object") return;
|
|
62
|
+
events.push({
|
|
63
|
+
kind: "read",
|
|
64
|
+
id: f.itemId ? `read-${f.itemId}` : `read-${idx}`,
|
|
65
|
+
capturedAt:
|
|
66
|
+
typeof f.publishedAt === "number" && f.publishedAt > 0
|
|
67
|
+
? f.publishedAt
|
|
68
|
+
: snapshottedAt,
|
|
69
|
+
itemId: f.itemId || null,
|
|
70
|
+
title: f.title || null,
|
|
71
|
+
category: f.category || null,
|
|
72
|
+
author: f.author || null,
|
|
73
|
+
readDuration: typeof f.readDuration === "number" ? f.readDuration : 0,
|
|
74
|
+
source: f.source || null,
|
|
75
|
+
});
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
// collection
|
|
79
|
+
const collection = Array.isArray(input.collection) ? input.collection : [];
|
|
80
|
+
collection.forEach((c, idx) => {
|
|
81
|
+
if (!c || typeof c !== "object") return;
|
|
82
|
+
events.push({
|
|
83
|
+
kind: "collection",
|
|
84
|
+
id: c.itemId ? `collect-${c.itemId}` : `collect-${idx}`,
|
|
85
|
+
capturedAt:
|
|
86
|
+
typeof c.savedAt === "number" && c.savedAt > 0
|
|
87
|
+
? c.savedAt
|
|
88
|
+
: snapshottedAt,
|
|
89
|
+
itemId: c.itemId || null,
|
|
90
|
+
title: c.title || null,
|
|
91
|
+
category: c.category || null,
|
|
92
|
+
author: c.author || null,
|
|
93
|
+
});
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
// search history
|
|
97
|
+
const search = Array.isArray(input.search) ? input.search : [];
|
|
98
|
+
search.forEach((s, idx) => {
|
|
99
|
+
if (!s || typeof s !== "object" || !s.keyword) return;
|
|
100
|
+
events.push({
|
|
101
|
+
kind: "search",
|
|
102
|
+
id: `search-${s.keyword}:${s.searchedAt || idx}`,
|
|
103
|
+
capturedAt:
|
|
104
|
+
typeof s.searchedAt === "number" && s.searchedAt > 0
|
|
105
|
+
? s.searchedAt
|
|
106
|
+
: snapshottedAt,
|
|
107
|
+
keyword: s.keyword,
|
|
108
|
+
searchAt: s.searchedAt || snapshottedAt,
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
return {
|
|
113
|
+
schemaVersion: SNAPSHOT_SCHEMA_VERSION,
|
|
114
|
+
snapshottedAt,
|
|
115
|
+
account,
|
|
116
|
+
events,
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function writeSnapshotJson(snapshot, opts = {}) {
|
|
121
|
+
const dir = opts.dir || os.tmpdir();
|
|
122
|
+
const fileName =
|
|
123
|
+
opts.fileName || `cc-toutiao-snapshot-${crypto.randomUUID()}.json`;
|
|
124
|
+
if (fileName.includes("/") || fileName.includes("\\")) {
|
|
125
|
+
throw new Error(
|
|
126
|
+
"writeSnapshotJson: opts.fileName must be a basename, not a path",
|
|
127
|
+
);
|
|
128
|
+
}
|
|
129
|
+
const full = path.join(dir, fileName);
|
|
130
|
+
fs.writeFileSync(full, JSON.stringify(snapshot), "utf-8");
|
|
131
|
+
return full;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
function cleanupSnapshotJson(filePath) {
|
|
135
|
+
if (!filePath) return;
|
|
136
|
+
try {
|
|
137
|
+
fs.unlinkSync(filePath);
|
|
138
|
+
} catch (_e) {
|
|
139
|
+
// ignore
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
module.exports = {
|
|
144
|
+
buildSnapshot,
|
|
145
|
+
writeSnapshotJson,
|
|
146
|
+
cleanupSnapshotJson,
|
|
147
|
+
SNAPSHOT_SCHEMA_VERSION,
|
|
148
|
+
};
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
*/
|
|
20
20
|
|
|
21
21
|
const { computeXsXt } = require("./sign");
|
|
22
|
+
const { NULL_SIGN_PROVIDER } = require("../../sign-providers");
|
|
22
23
|
|
|
23
24
|
const DEFAULT_BASE_URL = "https://edith.xiaohongshu.com/";
|
|
24
25
|
|
|
@@ -80,19 +81,49 @@ class XhsApiClient {
|
|
|
80
81
|
);
|
|
81
82
|
}
|
|
82
83
|
this._now = opts.now || Date.now;
|
|
84
|
+
// Phase 6b: signProvider injectable. Desktop wiring injects
|
|
85
|
+
// XhsSignBridge (Electron WebContentsView running xhs.js, ~100% hit
|
|
86
|
+
// rate). CLI / tests get NULL_SIGN_PROVIDER → falls back to the
|
|
87
|
+
// in-process best-effort computeXsXt (~60% GET / <30% POST hit).
|
|
88
|
+
// Both code paths are present so the client works in either context
|
|
89
|
+
// without the caller having to swap api-client implementations.
|
|
90
|
+
this.signProvider = opts.signProvider || NULL_SIGN_PROVIDER;
|
|
83
91
|
this.lastErrorCode = 0;
|
|
84
92
|
this.lastErrorMessage = null;
|
|
93
|
+
// Diagnostic counters — collector reads these to decide whether to
|
|
94
|
+
// surface "bridge upgrade succeeded" in the report.
|
|
95
|
+
this._bridgeHits = 0;
|
|
96
|
+
this._fallbackHits = 0;
|
|
85
97
|
}
|
|
86
98
|
|
|
87
99
|
async _doGetJson(url, cookie, a1, requireSign) {
|
|
88
100
|
const headers = { ...BROWSER_HEADERS, Cookie: cookie };
|
|
89
101
|
if (requireSign && a1) {
|
|
90
102
|
const pathWithQuery = url.pathname + url.search;
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
}
|
|
94
|
-
|
|
95
|
-
|
|
103
|
+
// Phase 6b: prefer bridge over in-process computeXsXt.
|
|
104
|
+
// signedHeaders is async — bridge does executeJavaScript across
|
|
105
|
+
// Electron IPC. Returns {} on cold bridge / xhs.js rotation / IPC
|
|
106
|
+
// error, in which case we fall back to the best-effort md5.
|
|
107
|
+
const bridgeHeaders = await this.signProvider.signedHeaders(
|
|
108
|
+
url,
|
|
109
|
+
`${pathWithQuery}|`,
|
|
110
|
+
);
|
|
111
|
+
const bridgeKeys = Object.keys(bridgeHeaders);
|
|
112
|
+
if (bridgeKeys.length > 0) {
|
|
113
|
+
// Bridge produced headers — use them verbatim. xhs.js returns
|
|
114
|
+
// X-s / X-t (lowercase t in some builds) / X-s-common; we let
|
|
115
|
+
// the bridge's normalizeXhsHeader handle case.
|
|
116
|
+
Object.assign(headers, bridgeHeaders);
|
|
117
|
+
this._bridgeHits += 1;
|
|
118
|
+
} else {
|
|
119
|
+
// Fallback: in-process best-effort md5 (P3c path).
|
|
120
|
+
const { xs, xt } = computeXsXt(pathWithQuery, null, a1, {
|
|
121
|
+
now: this._now,
|
|
122
|
+
});
|
|
123
|
+
headers["X-S"] = xs;
|
|
124
|
+
headers["X-T"] = xt;
|
|
125
|
+
this._fallbackHits += 1;
|
|
126
|
+
}
|
|
96
127
|
}
|
|
97
128
|
try {
|
|
98
129
|
const resp = await this._fetch(url.toString(), {
|