@xbrowser/xiaohongshu 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +504 -0
- package/package.json +35 -0
package/index.ts
ADDED
|
@@ -0,0 +1,504 @@
|
|
|
1
|
+
import type { XCLIAPI } from '@dyyz1993/xcli-core';
|
|
2
|
+
import { ok, fail } from '@dyyz1993/xcli-core';
|
|
3
|
+
import { z } from 'zod';
|
|
4
|
+
import { detectSsr } from '../shared/ssr-detect.js';
|
|
5
|
+
|
|
6
|
+
type Page = import('playwright-core').Page;
|
|
7
|
+
type Response = import('playwright-core').Response;
|
|
8
|
+
|
|
9
|
+
const XHS_BASE = 'https://www.xiaohongshu.com';
|
|
10
|
+
const API = {
|
|
11
|
+
FEED: '/api/sns/web/v1/feed',
|
|
12
|
+
USER_POSTED: '/api/sns/web/v1/user_posted',
|
|
13
|
+
COMMENT_PAGE: '/api/sns/web/v2/comment/page',
|
|
14
|
+
USER_INFO: '/api/sns/web/v1/user/otherinfo',
|
|
15
|
+
SEARCH_NOTES: '/api/sns/web/v1/search/notes',
|
|
16
|
+
HOME_FEED: '/api/sns/web/v1/homefeed',
|
|
17
|
+
} as const;
|
|
18
|
+
|
|
19
|
+
function n(v: unknown): number { return Number(v ?? 0); }
|
|
20
|
+
function s(v: unknown): string { return String(v ?? ''); }
|
|
21
|
+
|
|
22
|
+
function g(obj: unknown, path: string): unknown {
|
|
23
|
+
let cur: unknown = obj;
|
|
24
|
+
for (const k of path.split('.')) {
|
|
25
|
+
if (!cur || typeof cur !== 'object') return undefined;
|
|
26
|
+
cur = (cur as Record<string, unknown>)[k];
|
|
27
|
+
}
|
|
28
|
+
return cur;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
function formatTime(ts: number): string {
|
|
32
|
+
if (ts <= 0) return '';
|
|
33
|
+
const d = new Date(ts);
|
|
34
|
+
const p = (v: number) => String(v).padStart(2, '0');
|
|
35
|
+
return `${d.getFullYear()}-${p(d.getMonth() + 1)}-${p(d.getDate())} ${p(d.getHours())}:${p(d.getMinutes())}:${p(d.getSeconds())}`;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function parseNote(item: Record<string, unknown>) {
|
|
39
|
+
const nc = (item.note_card || item) as Record<string, unknown>;
|
|
40
|
+
const inter = ((nc.interact_info ?? {}) || {}) as Record<string, unknown>;
|
|
41
|
+
const user = ((nc.user ?? {}) || {}) as Record<string, unknown>;
|
|
42
|
+
const cover = (nc.cover ?? {}) as Record<string, unknown>;
|
|
43
|
+
const images = Array.isArray(nc.image_list)
|
|
44
|
+
? nc.image_list.map((img: unknown) => { const i = img as Record<string, unknown>; return s(i.url_default || i.url); })
|
|
45
|
+
: [];
|
|
46
|
+
const video = nc.video;
|
|
47
|
+
const videoUrl = video && typeof video === 'object' ? s((video as Record<string, unknown>).url) : '';
|
|
48
|
+
const tags = Array.isArray(nc.tag_list) ? nc.tag_list.map((t: unknown) => s((t as Record<string, unknown>).name)) : [];
|
|
49
|
+
return {
|
|
50
|
+
noteId: s(nc.note_id), type: s(nc.type), title: s(nc.title), desc: s(nc.desc),
|
|
51
|
+
cover: s(cover.url_default || cover.url), images, videoUrl,
|
|
52
|
+
author: { userId: s(user.user_id), nickname: s(user.nickname), avatar: s(user.avatar || '') },
|
|
53
|
+
statistics: {
|
|
54
|
+
likedCount: s(inter.liked_count), collectedCount: s(inter.collected_count),
|
|
55
|
+
commentCount: s(inter.comment_count), shareCount: s(inter.share_count),
|
|
56
|
+
},
|
|
57
|
+
tags, time: n(nc.time), lastUpdateTime: n(nc.last_update_time),
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function parseComment(item: Record<string, unknown>) {
|
|
62
|
+
const ui = (item.user_info ?? {}) as Record<string, unknown>;
|
|
63
|
+
const ct = n(item.create_time);
|
|
64
|
+
return {
|
|
65
|
+
id: s(item.id || item.comment_id), content: s(item.content),
|
|
66
|
+
author: { userId: s(ui.user_id || ''), nickname: s(ui.nickname || ''), avatar: s(ui.image || '') },
|
|
67
|
+
likedCount: n(item.like_count || item.liked_count),
|
|
68
|
+
subCommentCount: n(item.sub_comment_count || item.sub_comment_total),
|
|
69
|
+
ipLocation: s(item.ip_location), createTime: ct, createTimeStr: formatTime(ct),
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
function parseUser(data: Record<string, unknown>) {
|
|
74
|
+
const user = ((data as Record<string, unknown>).user ?? data) as Record<string, unknown>;
|
|
75
|
+
return {
|
|
76
|
+
userId: s(user.user_id), nickname: s(user.nickname), redId: s(user.red_id || user.xhsId),
|
|
77
|
+
avatar: s(user.image), desc: s(user.desc), gender: s(user.gender), ipLocation: s(user.ip_location),
|
|
78
|
+
tags: Array.isArray(user.tag) ? user.tag.map((t: unknown) => s((t as Record<string, unknown>).name || t)) : [],
|
|
79
|
+
statistics: { notes: n(user.notes), fans: n(user.fans), following: n(user.follows), interaction: n(user.interaction) },
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function parseNoteBrief(item: Record<string, unknown>) {
|
|
84
|
+
const nc = ((item.note_card ?? item) ?? {}) as Record<string, unknown>;
|
|
85
|
+
const user = ((nc.user ?? {}) || {}) as Record<string, unknown>;
|
|
86
|
+
const inter = ((nc.interact_info ?? {}) || {}) as Record<string, unknown>;
|
|
87
|
+
const cover = (nc.cover ?? {}) as Record<string, unknown>;
|
|
88
|
+
return {
|
|
89
|
+
noteId: s(nc.note_id || item.id), type: s(nc.type),
|
|
90
|
+
title: s(nc.title || nc.display_title), cover: s(cover.url_default || cover.url || ''),
|
|
91
|
+
author: { userId: s(user.user_id), nickname: s(user.nickname) },
|
|
92
|
+
likedCount: s(inter.liked_count),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
interface Interceptor { items: () => Record<string, unknown>[]; dispose: () => void }
|
|
97
|
+
|
|
98
|
+
function interceptApi(page: Page, urlPattern: string, dataKey: string, idKey: string): Interceptor {
|
|
99
|
+
const items: Record<string, unknown>[] = [];
|
|
100
|
+
const seenIds = new Set<string>();
|
|
101
|
+
const handler = async (response: Response) => {
|
|
102
|
+
if (!response.url().includes(urlPattern)) return;
|
|
103
|
+
try {
|
|
104
|
+
const json = await response.json();
|
|
105
|
+
const root = (json as Record<string, unknown>)?.data;
|
|
106
|
+
if ((json as Record<string, unknown>)?.success === false) {
|
|
107
|
+
if (process.env.DEBUG) console.warn('[xhs] API returned success=false for', urlPattern);
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
if (!root) return;
|
|
111
|
+
const list = (root as Record<string, unknown>)?.[dataKey];
|
|
112
|
+
if (!Array.isArray(list)) return;
|
|
113
|
+
for (const item of list) {
|
|
114
|
+
const id = s(g(item, idKey));
|
|
115
|
+
if (!id || seenIds.has(id)) continue;
|
|
116
|
+
seenIds.add(id);
|
|
117
|
+
items.push(item as Record<string, unknown>);
|
|
118
|
+
}
|
|
119
|
+
} catch (err) {
|
|
120
|
+
if (process.env.DEBUG) console.warn('[xhs] interceptApi parse error:', (err as Error)?.message);
|
|
121
|
+
}
|
|
122
|
+
};
|
|
123
|
+
page.on('response', handler);
|
|
124
|
+
return { items: () => items, dispose: () => page.off('response', handler) };
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function interceptFirst<T>(page: Page, urlPattern: string, extractor: (json: unknown) => T | null) {
|
|
128
|
+
let result: T | null = null;
|
|
129
|
+
const handler = async (response: Response) => {
|
|
130
|
+
if (result || !response.url().includes(urlPattern)) return;
|
|
131
|
+
try {
|
|
132
|
+
const json = await response.json();
|
|
133
|
+
if ((json as Record<string, unknown>)?.success === false) {
|
|
134
|
+
if (process.env.DEBUG) console.warn('[xhs] API returned success=false for', urlPattern);
|
|
135
|
+
return;
|
|
136
|
+
}
|
|
137
|
+
const extracted = extractor(json);
|
|
138
|
+
if (extracted) result = extracted;
|
|
139
|
+
} catch (err) {
|
|
140
|
+
if (process.env.DEBUG) console.warn('[xhs] interceptFirst parse error:', (err as Error)?.message);
|
|
141
|
+
}
|
|
142
|
+
};
|
|
143
|
+
page.on('response', handler);
|
|
144
|
+
return { get: () => result, dispose: () => page.off('response', handler) };
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
type WaitForHumanFn = (opts?: { reason?: string; timeout?: number }) => Promise<{ solved: boolean }>;
|
|
148
|
+
|
|
149
|
+
async function scrollAndCollect(
|
|
150
|
+
page: Page, maxPages: number, getItemCount: () => number,
|
|
151
|
+
opts: { delay?: number; staleThreshold?: number; waitForHuman?: WaitForHumanFn } = {},
|
|
152
|
+
) {
|
|
153
|
+
const { delay = 2500, staleThreshold = 3, waitForHuman } = opts;
|
|
154
|
+
let lastCount = getItemCount(), staleCount = 0;
|
|
155
|
+
for (let i = 0; i < maxPages; i++) {
|
|
156
|
+
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
|
|
157
|
+
await page.waitForTimeout(delay + Math.random() * 1000);
|
|
158
|
+
const cur = getItemCount();
|
|
159
|
+
if (cur === 0 && lastCount === 0 && i >= 1 && waitForHuman) {
|
|
160
|
+
await waitForHuman({ reason: '小红书可能需要验证,请在浏览器中完成滑块验证', timeout: 120 });
|
|
161
|
+
}
|
|
162
|
+
if (cur === lastCount) staleCount++; else { staleCount = 0; lastCount = cur; }
|
|
163
|
+
if (staleCount >= staleThreshold) break;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
async function waitForInterceptor<T>(getter: () => T | null, maxMs = 10000): Promise<T | null> {
|
|
168
|
+
for (let w = 0; w < maxMs / 500; w++) {
|
|
169
|
+
const r = getter();
|
|
170
|
+
if (r) return r;
|
|
171
|
+
await new Promise<void>(res => setTimeout(res, 500));
|
|
172
|
+
}
|
|
173
|
+
return getter();
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async function dismissModals(page: Page) {
|
|
177
|
+
await page.evaluate(() => {
|
|
178
|
+
document.querySelectorAll('[class*="login-layer"], [class*="mask"], [class*="overlay"]').forEach((el) => {
|
|
179
|
+
if (el instanceof HTMLElement) el.style.display = 'none';
|
|
180
|
+
});
|
|
181
|
+
document.body.style.overflow = '';
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
function buildCtxTips(ctx: Record<string, unknown>): string[] {
|
|
186
|
+
const tips: string[] = [];
|
|
187
|
+
if (!ctx.cdpEndpoint) tips.push('建议使用 --cdp 9221 连接 Chrome 浏览器');
|
|
188
|
+
tips.push(`Session: ${ctx.sessionId || 'default'}`);
|
|
189
|
+
return tips;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function errResult(message: string, tips: string[]) {
|
|
193
|
+
return fail(message, tips);
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
export default function (xcli: XCLIAPI): void {
|
|
197
|
+
const site = xcli.createSite({ name: 'xiaohongshu', url: XHS_BASE, description: '小红书数据采集' });
|
|
198
|
+
|
|
199
|
+
site.command('detail', {
|
|
200
|
+
description: '获取笔记详情(API 拦截)',
|
|
201
|
+
scope: 'browser',
|
|
202
|
+
parameters: z.object({ noteId: z.string().describe('笔记 ID') }),
|
|
203
|
+
examples: [{ cmd: 'xbrowser xiaohongshu detail --noteId "67xxxxxxxxxxxxxx"', description: '获取笔记详情' }],
|
|
204
|
+
result: z.any(),
|
|
205
|
+
handler: async (params, ctx) => {
|
|
206
|
+
try {
|
|
207
|
+
const page = (ctx as Record<string, unknown>).page as Page;
|
|
208
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
209
|
+
const tips = buildCtxTips(ctx as Record<string, unknown>);
|
|
210
|
+
const waitForHuman = (ctx as Record<string, unknown>).waitForHuman as WaitForHumanFn | undefined;
|
|
211
|
+
const interceptor = interceptFirst<Record<string, unknown>>(page, API.FEED, (json) => {
|
|
212
|
+
const data = (json as Record<string, unknown>)?.data;
|
|
213
|
+
if (!data || typeof data !== 'object') return null;
|
|
214
|
+
const items = (data as Record<string, unknown>)?.items;
|
|
215
|
+
if (!Array.isArray(items) || items.length === 0) return null;
|
|
216
|
+
return items[0] as Record<string, unknown>;
|
|
217
|
+
});
|
|
218
|
+
try {
|
|
219
|
+
await page.goto(`${XHS_BASE}/explore/${params.noteId}`, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
220
|
+
await dismissModals(page);
|
|
221
|
+
|
|
222
|
+
const ssr = await detectSsr(page);
|
|
223
|
+
if (ssr) {
|
|
224
|
+
tips.push(ssr.tip);
|
|
225
|
+
if (ssr.dataKeys?.length) tips.push(`SSR 数据 keys: ${ssr.dataKeys.join(', ')}`);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
let raw = await waitForInterceptor(interceptor.get);
|
|
229
|
+
if (!raw && waitForHuman) {
|
|
230
|
+
await waitForHuman({ reason: '小红书笔记详情加载失败,可能需要登录或验证', timeout: 120 });
|
|
231
|
+
raw = await waitForInterceptor(interceptor.get, 5000);
|
|
232
|
+
}
|
|
233
|
+
if (!raw) return fail('未获取到笔记数据,可能笔记不存在或需要登录', [...tips, '未获取到笔记数据,可能笔记不存在或需要登录']);
|
|
234
|
+
const note = parseNote(raw);
|
|
235
|
+
tips.push(`笔记: ${note.title?.slice(0, 50) || note.desc?.slice(0, 50)}`);
|
|
236
|
+
return ok(note, tips);
|
|
237
|
+
} finally { interceptor.dispose(); }
|
|
238
|
+
} catch (error) { return errResult(error instanceof Error ? error.message : '未知错误', ['获取笔记详情失败']); }
|
|
239
|
+
},
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
site.command('notes', {
|
|
243
|
+
description: '采集用户笔记列表(API 拦截)',
|
|
244
|
+
scope: 'browser',
|
|
245
|
+
parameters: z.object({ userId: z.string().describe('用户 ID'), maxPages: z.number().default(5).describe('最大滚动次数') }),
|
|
246
|
+
examples: [{ cmd: 'xbrowser xiaohongshu notes --userId "5xxxxxxxxxxxx"', description: '采集用户笔记' }],
|
|
247
|
+
result: z.any(),
|
|
248
|
+
handler: async (params, ctx) => {
|
|
249
|
+
try {
|
|
250
|
+
const page = (ctx as Record<string, unknown>).page as Page;
|
|
251
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
252
|
+
const tips = buildCtxTips(ctx as Record<string, unknown>);
|
|
253
|
+
const waitForHuman = (ctx as Record<string, unknown>).waitForHuman as WaitForHumanFn | undefined;
|
|
254
|
+
const interceptor = interceptApi(page, API.USER_POSTED, 'notes', 'note_id');
|
|
255
|
+
try {
|
|
256
|
+
await page.goto(`${XHS_BASE}/user/profile/${params.userId}`, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
257
|
+
await dismissModals(page);
|
|
258
|
+
|
|
259
|
+
const ssr = await detectSsr(page);
|
|
260
|
+
if (ssr) {
|
|
261
|
+
tips.push(ssr.tip);
|
|
262
|
+
if (ssr.dataKeys?.length) tips.push(`SSR 数据 keys: ${ssr.dataKeys.join(', ')}`);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
await scrollAndCollect(page, params.maxPages || 5, () => interceptor.items().length, { waitForHuman });
|
|
266
|
+
const notes = interceptor.items().map(parseNote);
|
|
267
|
+
tips.push(`采集到 ${notes.length} 条笔记`);
|
|
268
|
+
return ok({ total: notes.length, notes }, tips);
|
|
269
|
+
} finally { interceptor.dispose(); }
|
|
270
|
+
} catch (error) { return errResult(error instanceof Error ? error.message : '未知错误', ['采集用户笔记失败']); }
|
|
271
|
+
},
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
site.command('profile', {
|
|
275
|
+
description: '获取用户资料(API 拦截 + DOM 兜底)',
|
|
276
|
+
scope: 'browser',
|
|
277
|
+
parameters: z.object({ userId: z.string().describe('用户 ID') }),
|
|
278
|
+
examples: [{ cmd: 'xbrowser xiaohongshu profile --userId "5xxxxxxxxxxxx"', description: '获取用户资料' }],
|
|
279
|
+
result: z.any(),
|
|
280
|
+
handler: async (params, ctx) => {
|
|
281
|
+
try {
|
|
282
|
+
const page = (ctx as Record<string, unknown>).page as Page;
|
|
283
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
284
|
+
const tips = buildCtxTips(ctx as Record<string, unknown>);
|
|
285
|
+
const waitForHuman = (ctx as Record<string, unknown>).waitForHuman as WaitForHumanFn | undefined;
|
|
286
|
+
const interceptor = interceptFirst<Record<string, unknown>>(page, API.USER_INFO, (json) => {
|
|
287
|
+
const data = (json as Record<string, unknown>)?.data;
|
|
288
|
+
return data && typeof data === 'object' ? data as Record<string, unknown> : null;
|
|
289
|
+
});
|
|
290
|
+
try {
|
|
291
|
+
await page.goto(`${XHS_BASE}/user/profile/${params.userId}`, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
292
|
+
await dismissModals(page);
|
|
293
|
+
|
|
294
|
+
const ssr = await detectSsr(page);
|
|
295
|
+
if (ssr) {
|
|
296
|
+
tips.push(ssr.tip);
|
|
297
|
+
if (ssr.dataKeys?.length) tips.push(`SSR 数据 keys: ${ssr.dataKeys.join(', ')}`);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
let raw = await waitForInterceptor(interceptor.get);
|
|
301
|
+
if (!raw && waitForHuman) {
|
|
302
|
+
await waitForHuman({ reason: '小红书用户资料加载失败,可能需要登录或验证', timeout: 120 });
|
|
303
|
+
raw = await waitForInterceptor(interceptor.get, 5000);
|
|
304
|
+
}
|
|
305
|
+
if (raw) { const user = parseUser(raw); tips.push(`用户: ${user.nickname}`); return ok(user, tips); }
|
|
306
|
+
const domInfo = await page.evaluate(() => {
|
|
307
|
+
const nickname =
|
|
308
|
+
document.querySelector('[class*="nickname"]')?.textContent?.trim() ||
|
|
309
|
+
document.querySelector('[class*="userName"]')?.textContent?.trim() ||
|
|
310
|
+
document.querySelector('[class*="user-name"]')?.textContent?.trim() ||
|
|
311
|
+
'';
|
|
312
|
+
const desc = document.querySelector('[class*="desc"]')?.textContent?.trim() || '';
|
|
313
|
+
const avatar = document.querySelector('[class*="avatar"] img')?.getAttribute('src') || '';
|
|
314
|
+
const stats: Record<string, string> = {};
|
|
315
|
+
document.querySelectorAll('[class*="count"]').forEach((el) => {
|
|
316
|
+
const label = el.previousElementSibling?.textContent?.trim() || '';
|
|
317
|
+
if (label) stats[label] = el.textContent?.trim() || '';
|
|
318
|
+
});
|
|
319
|
+
return { nickname, desc, avatar, stats };
|
|
320
|
+
});
|
|
321
|
+
tips.push(`用户(DOM): ${domInfo.nickname}`);
|
|
322
|
+
return ok({ userId: params.userId, ...domInfo }, tips);
|
|
323
|
+
} finally { interceptor.dispose(); }
|
|
324
|
+
} catch (error) { return errResult(error instanceof Error ? error.message : '未知错误', ['获取用户资料失败']); }
|
|
325
|
+
},
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
site.command('search', {
|
|
329
|
+
description: '搜索笔记(API 拦截)',
|
|
330
|
+
scope: 'browser',
|
|
331
|
+
parameters: z.object({ keyword: z.string().describe('搜索关键词'), maxPages: z.number().default(3).describe('最大滚动次数') }),
|
|
332
|
+
examples: [{ cmd: 'xbrowser xiaohongshu search --keyword "美食推荐"', description: '搜索笔记' }],
|
|
333
|
+
result: z.any(),
|
|
334
|
+
handler: async (params, ctx) => {
|
|
335
|
+
try {
|
|
336
|
+
const page = (ctx as Record<string, unknown>).page as Page;
|
|
337
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
338
|
+
const tips = buildCtxTips(ctx as Record<string, unknown>);
|
|
339
|
+
const waitForHuman = (ctx as Record<string, unknown>).waitForHuman as WaitForHumanFn | undefined;
|
|
340
|
+
const interceptor = interceptApi(page, API.SEARCH_NOTES, 'items', 'id');
|
|
341
|
+
try {
|
|
342
|
+
await page.goto(`${XHS_BASE}/search_result?keyword=${encodeURIComponent(params.keyword)}&source=web_search_result_notes`, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
343
|
+
await dismissModals(page);
|
|
344
|
+
|
|
345
|
+
const ssr = await detectSsr(page);
|
|
346
|
+
if (ssr) {
|
|
347
|
+
tips.push(ssr.tip);
|
|
348
|
+
if (ssr.dataKeys?.length) tips.push(`SSR 数据 keys: ${ssr.dataKeys.join(', ')}`);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
await scrollAndCollect(page, params.maxPages || 3, () => interceptor.items().length, { waitForHuman });
|
|
352
|
+
const notes = interceptor.items().map(parseNoteBrief);
|
|
353
|
+
tips.push(`搜索到 ${notes.length} 条笔记`);
|
|
354
|
+
return ok({ keyword: params.keyword, total: notes.length, notes }, tips);
|
|
355
|
+
} finally { interceptor.dispose(); }
|
|
356
|
+
} catch (error) { return errResult(error instanceof Error ? error.message : '未知错误', ['搜索笔记失败']); }
|
|
357
|
+
},
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
site.command('comments', {
|
|
361
|
+
description: '获取笔记评论(API 拦截)',
|
|
362
|
+
scope: 'browser',
|
|
363
|
+
parameters: z.object({ noteId: z.string().describe('笔记 ID'), maxPages: z.number().default(8).describe('最大滚动次数') }),
|
|
364
|
+
examples: [{ cmd: 'xbrowser xiaohongshu comments --noteId "67xxxxxxxxxxxxxx"', description: '获取笔记评论' }],
|
|
365
|
+
result: z.any(),
|
|
366
|
+
handler: async (params, ctx) => {
|
|
367
|
+
try {
|
|
368
|
+
const page = (ctx as Record<string, unknown>).page as Page;
|
|
369
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
370
|
+
const tips = buildCtxTips(ctx as Record<string, unknown>);
|
|
371
|
+
const waitForHuman = (ctx as Record<string, unknown>).waitForHuman as WaitForHumanFn | undefined;
|
|
372
|
+
const interceptor = interceptApi(page, API.COMMENT_PAGE, 'comments', 'id');
|
|
373
|
+
try {
|
|
374
|
+
await page.goto(`${XHS_BASE}/explore/${params.noteId}`, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
375
|
+
await dismissModals(page);
|
|
376
|
+
|
|
377
|
+
const ssr = await detectSsr(page);
|
|
378
|
+
if (ssr) {
|
|
379
|
+
tips.push(ssr.tip);
|
|
380
|
+
if (ssr.dataKeys?.length) tips.push(`SSR 数据 keys: ${ssr.dataKeys.join(', ')}`);
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
await scrollAndCollect(page, params.maxPages || 8, () => interceptor.items().length, {
|
|
384
|
+
delay: 3000,
|
|
385
|
+
staleThreshold: 4,
|
|
386
|
+
waitForHuman,
|
|
387
|
+
});
|
|
388
|
+
const comments = interceptor.items().map(parseComment);
|
|
389
|
+
tips.push(`采集到 ${comments.length} 条评论`);
|
|
390
|
+
return ok({ total: comments.length, comments }, tips);
|
|
391
|
+
} finally { interceptor.dispose(); }
|
|
392
|
+
} catch (error) { return errResult(error instanceof Error ? error.message : '未知错误', ['获取笔记评论失败']); }
|
|
393
|
+
},
|
|
394
|
+
});
|
|
395
|
+
|
|
396
|
+
site.command('feed', {
|
|
397
|
+
description: '获取首页推荐(API 拦截)',
|
|
398
|
+
scope: 'browser',
|
|
399
|
+
parameters: z.object({ maxPages: z.number().default(3).describe('最大滚动次数') }),
|
|
400
|
+
examples: [{ cmd: 'xbrowser xiaohongshu feed', description: '获取首页推荐' }],
|
|
401
|
+
result: z.any(),
|
|
402
|
+
handler: async (params, ctx) => {
|
|
403
|
+
try {
|
|
404
|
+
const page = (ctx as Record<string, unknown>).page as Page;
|
|
405
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
406
|
+
const tips = buildCtxTips(ctx as Record<string, unknown>);
|
|
407
|
+
const waitForHuman = (ctx as Record<string, unknown>).waitForHuman as WaitForHumanFn | undefined;
|
|
408
|
+
const interceptor = interceptApi(page, API.HOME_FEED, 'items', 'id');
|
|
409
|
+
try {
|
|
410
|
+
await page.goto(`${XHS_BASE}/explore`, { waitUntil: 'domcontentloaded', timeout: 30000 });
|
|
411
|
+
await dismissModals(page);
|
|
412
|
+
|
|
413
|
+
const ssr = await detectSsr(page);
|
|
414
|
+
if (ssr) {
|
|
415
|
+
tips.push(ssr.tip);
|
|
416
|
+
if (ssr.dataKeys?.length) tips.push(`SSR 数据 keys: ${ssr.dataKeys.join(', ')}`);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
await scrollAndCollect(page, params.maxPages || 3, () => interceptor.items().length, { waitForHuman });
|
|
420
|
+
const notes = interceptor.items().map(parseNoteBrief);
|
|
421
|
+
tips.push(`获取到 ${notes.length} 条推荐笔记`);
|
|
422
|
+
return ok({ total: notes.length, notes }, tips);
|
|
423
|
+
} finally { interceptor.dispose(); }
|
|
424
|
+
} catch (error) { return errResult(error instanceof Error ? error.message : '未知错误', ['获取首页推荐失败']); }
|
|
425
|
+
},
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
site.command('resolve-url', {
|
|
429
|
+
description: '解析小红书短链',
|
|
430
|
+
scope: 'browser',
|
|
431
|
+
parameters: z.object({ url: z.string().describe('短链 URL') }),
|
|
432
|
+
examples: [{ cmd: 'xbrowser xiaohongshu resolve-url --url "https://xhslink.com/xxx"', description: '解析短链' }],
|
|
433
|
+
result: z.any(),
|
|
434
|
+
handler: async (params, ctx) => {
|
|
435
|
+
try {
|
|
436
|
+
const page = (ctx as Record<string, unknown>).page as Page;
|
|
437
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
438
|
+
const tips = buildCtxTips(ctx as Record<string, unknown>);
|
|
439
|
+
await page.goto(params.url, { waitUntil: 'domcontentloaded', timeout: 15000 });
|
|
440
|
+
await page.waitForTimeout(3000);
|
|
441
|
+
|
|
442
|
+
const ssr = await detectSsr(page);
|
|
443
|
+
if (ssr) {
|
|
444
|
+
tips.push(ssr.tip);
|
|
445
|
+
if (ssr.dataKeys?.length) tips.push(`SSR 数据 keys: ${ssr.dataKeys.join(', ')}`);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
const finalUrl = page.url();
|
|
449
|
+
const noteIdMatch = finalUrl.match(/\/explore\/([a-zA-Z0-9]+)/);
|
|
450
|
+
const userIdMatch = finalUrl.match(/\/user\/profile\/([a-zA-Z0-9]+)/);
|
|
451
|
+
const noteId = noteIdMatch ? noteIdMatch[1] : '';
|
|
452
|
+
const userId = userIdMatch ? userIdMatch[1] : '';
|
|
453
|
+
tips.push(`最终 URL: ${finalUrl}`);
|
|
454
|
+
if (noteId) tips.push(`笔记 ID: ${noteId}`);
|
|
455
|
+
if (userId) tips.push(`用户 ID: ${userId}`);
|
|
456
|
+
return ok({ originalUrl: params.url, finalUrl, noteId, userId }, tips);
|
|
457
|
+
} catch (error) { return errResult(error instanceof Error ? error.message : '未知错误', ['解析短链失败']); }
|
|
458
|
+
},
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
site.command('search-image', {
|
|
462
|
+
description: '小红书图片搜索',
|
|
463
|
+
scope: 'browser',
|
|
464
|
+
parameters: z.object({
|
|
465
|
+
query: z.string(),
|
|
466
|
+
limit: z.number().optional().default(10),
|
|
467
|
+
page: z.any().optional(),
|
|
468
|
+
timeout: z.number().optional().default(20000),
|
|
469
|
+
}),
|
|
470
|
+
result: z.any(),
|
|
471
|
+
handler: async (params, ctx) => {
|
|
472
|
+
const page = (params.page as import('playwright').Page) || (ctx as Record<string, unknown>).page as import('playwright').Page;
|
|
473
|
+
if (!page) throw new Error('需要浏览器页面');
|
|
474
|
+
try {
|
|
475
|
+
await page.goto('https://www.xiaohongshu.com/search_result?keyword=' + encodeURIComponent(params.query) + '&source=web_search_result_notes', { waitUntil: 'networkidle', timeout: params.timeout });
|
|
476
|
+
await page.waitForTimeout(6000);
|
|
477
|
+
for (let i = 0; i < 8; i++) { await page.evaluate(() => window.scrollBy(0, 800)); await page.waitForTimeout(1500); }
|
|
478
|
+
const results = await page.evaluate((limit) => {
|
|
479
|
+
const imgs: Array<Record<string, unknown>> = [];
|
|
480
|
+
let allImgs = document.querySelectorAll('img[src*="xhscdn"], img[src*="sns-webpic"], img[src*="xiaohongshu"]');
|
|
481
|
+
if (allImgs.length === 0) {
|
|
482
|
+
allImgs = document.querySelectorAll('img');
|
|
483
|
+
}
|
|
484
|
+
allImgs.forEach((img) => {
|
|
485
|
+
if (imgs.length >= limit) return;
|
|
486
|
+
const el = img as HTMLImageElement;
|
|
487
|
+
const src = el.src || el.getAttribute('data-src') || '';
|
|
488
|
+
const finalSrc = src.startsWith('//') ? 'https:' + src : src;
|
|
489
|
+
if (!finalSrc.startsWith('http')) return;
|
|
490
|
+
if (el.width < 30) return;
|
|
491
|
+
if (finalSrc.includes('logo') || finalSrc.includes('icon') || finalSrc.includes('avatar')) return;
|
|
492
|
+
imgs.push({
|
|
493
|
+
title: el.alt || '', thumbnailUrl: finalSrc, sourceUrl: el.closest('a')?.href || '',
|
|
494
|
+
originalUrl: finalSrc.replace(/\/\d+$/, '/0'), width: el.naturalWidth || el.width || 0,
|
|
495
|
+
height: el.naturalHeight || el.height || 0, format: 'jpg', sourceSite: 'xiaohongshu',
|
|
496
|
+
});
|
|
497
|
+
});
|
|
498
|
+
return imgs;
|
|
499
|
+
}, params.limit);
|
|
500
|
+
return ok({ query: params.query, engine: 'xiaohongshu', results, total: results.length, timestamp: Date.now() }, [`小红书 "${params.query}",共 ${results.length} 张`]);
|
|
501
|
+
} catch (error) { return fail(error instanceof Error ? error.message : '未知错误'); }
|
|
502
|
+
},
|
|
503
|
+
});
|
|
504
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@xbrowser/xiaohongshu",
|
|
3
|
+
"version": "1.1.0",
|
|
4
|
+
"description": "小红书 - 笔记详情、用户信息、搜索、评论采集",
|
|
5
|
+
"main": "index.ts",
|
|
6
|
+
"keywords": [
|
|
7
|
+
"xbrowser",
|
|
8
|
+
"xiaohongshu",
|
|
9
|
+
"redbook",
|
|
10
|
+
"xhs",
|
|
11
|
+
"笔记",
|
|
12
|
+
"social-media"
|
|
13
|
+
],
|
|
14
|
+
"author": "XBrowser Team",
|
|
15
|
+
"license": "MIT",
|
|
16
|
+
"xbrowser": {
|
|
17
|
+
"site": "https://www.xiaohongshu.com",
|
|
18
|
+
"requiresLogin": false,
|
|
19
|
+
"commands": [
|
|
20
|
+
"detail",
|
|
21
|
+
"notes",
|
|
22
|
+
"profile",
|
|
23
|
+
"search",
|
|
24
|
+
"comments",
|
|
25
|
+
"feed",
|
|
26
|
+
"resolve-url"
|
|
27
|
+
]
|
|
28
|
+
},
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"zod": "^3.24.0"
|
|
31
|
+
},
|
|
32
|
+
"peerDependencies": {
|
|
33
|
+
"@dyyz1993/xcli-core": ">=1.0.0"
|
|
34
|
+
}
|
|
35
|
+
}
|