@jackwener/opencli 1.7.17 → 1.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -8
- package/README.zh-CN.md +9 -8
- package/cli-manifest.json +585 -9
- package/clis/ctrip/ctrip.test.js +486 -1
- package/clis/ctrip/flight.js +136 -0
- package/clis/ctrip/hotel-search.js +132 -0
- package/clis/ctrip/utils.js +298 -0
- package/clis/doubao/utils.js +17 -0
- package/clis/doubao/utils.test.js +61 -0
- package/clis/google/search.js +16 -6
- package/clis/google-scholar/search.js +20 -5
- package/clis/google-scholar/search.test.js +35 -2
- package/clis/reddit/home.js +117 -0
- package/clis/reddit/home.test.js +127 -0
- package/clis/reddit/read.js +400 -54
- package/clis/reddit/read.test.js +315 -12
- package/clis/reddit/reply.js +182 -0
- package/clis/reddit/reply.test.js +89 -0
- package/clis/reddit/subreddit-info.js +117 -0
- package/clis/reddit/subreddit-info.test.js +163 -0
- package/clis/reddit/whoami.js +84 -0
- package/clis/reddit/whoami.test.js +105 -0
- package/clis/rednote/comments.js +76 -0
- package/clis/rednote/download.js +59 -0
- package/clis/rednote/feed.js +95 -0
- package/clis/rednote/navigation.test.js +26 -0
- package/clis/rednote/note.js +68 -0
- package/clis/rednote/notifications.js +139 -0
- package/clis/rednote/rednote.test.js +157 -0
- package/clis/rednote/search.js +101 -0
- package/clis/rednote/user.js +55 -0
- package/clis/twitter/bookmark-folder.js +3 -1
- package/clis/twitter/bookmarks.js +3 -1
- package/clis/twitter/followers.js +20 -5
- package/clis/twitter/followers.test.js +44 -0
- package/clis/twitter/following.js +36 -20
- package/clis/twitter/following.test.js +60 -8
- package/clis/twitter/likes.js +28 -13
- package/clis/twitter/likes.test.js +111 -1
- package/clis/twitter/list-add.js +128 -204
- package/clis/twitter/list-add.test.js +97 -1
- package/clis/twitter/list-tweets.js +13 -4
- package/clis/twitter/list-tweets.test.js +48 -0
- package/clis/twitter/lists.js +5 -2
- package/clis/twitter/post.js +23 -4
- package/clis/twitter/post.test.js +30 -0
- package/clis/twitter/profile.js +16 -8
- package/clis/twitter/profile.test.js +39 -0
- package/clis/twitter/reply.js +133 -10
- package/clis/twitter/reply.test.js +55 -0
- package/clis/twitter/search.js +188 -170
- package/clis/twitter/search.test.js +96 -258
- package/clis/twitter/shared.js +167 -16
- package/clis/twitter/shared.test.js +102 -1
- package/clis/twitter/timeline.js +3 -1
- package/clis/twitter/tweets.js +147 -51
- package/clis/twitter/tweets.test.js +238 -1
- package/clis/xiaohongshu/comments.js +57 -26
- package/clis/xiaohongshu/comments.test.js +63 -1
- package/clis/xiaohongshu/download.js +32 -23
- package/clis/xiaohongshu/feed.js +23 -15
- package/clis/xiaohongshu/note-helpers.js +16 -6
- package/clis/xiaohongshu/note.js +26 -20
- package/clis/xiaohongshu/notifications.js +26 -19
- package/clis/xiaohongshu/search.js +201 -37
- package/clis/xiaohongshu/search.test.js +82 -8
- package/clis/xiaohongshu/user-helpers.js +13 -4
- package/clis/xiaohongshu/user-helpers.test.js +20 -0
- package/clis/xiaohongshu/user.js +9 -4
- package/clis/xueqiu/earnings-date.js +2 -2
- package/clis/xueqiu/kline.js +2 -2
- package/clis/xueqiu/utils.js +19 -0
- package/clis/xueqiu/utils.test.js +26 -0
- package/clis/youtube/transcript.js +28 -3
- package/clis/youtube/transcript.test.js +90 -1
- package/clis/zhihu/answer-detail.js +233 -0
- package/clis/zhihu/answer-detail.test.js +330 -0
- package/clis/zhihu/question.js +44 -10
- package/clis/zhihu/question.test.js +78 -1
- package/clis/zhihu/recommend.js +103 -0
- package/clis/zhihu/recommend.test.js +143 -0
- package/dist/src/browser/base-page.d.ts +3 -2
- package/dist/src/browser/base-page.test.js +2 -2
- package/dist/src/browser/cdp.js +3 -3
- package/dist/src/browser/page.d.ts +3 -2
- package/dist/src/browser/page.js +4 -4
- package/dist/src/browser/page.test.js +31 -0
- package/dist/src/browser/utils.d.ts +10 -0
- package/dist/src/browser/utils.js +37 -0
- package/dist/src/browser/utils.test.d.ts +1 -0
- package/dist/src/browser/utils.test.js +29 -0
- package/dist/src/cli-argv-preprocess.d.ts +37 -0
- package/dist/src/cli-argv-preprocess.js +131 -0
- package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
- package/dist/src/cli-argv-preprocess.test.js +130 -0
- package/dist/src/cli.js +123 -86
- package/dist/src/cli.test.js +32 -22
- package/dist/src/commands/daemon.js +6 -7
- package/dist/src/doctor.js +21 -17
- package/dist/src/doctor.test.js +2 -0
- package/dist/src/download/progress.js +15 -11
- package/dist/src/download/progress.test.d.ts +1 -0
- package/dist/src/download/progress.test.js +25 -0
- package/dist/src/execution.js +1 -3
- package/dist/src/execution.test.js +4 -16
- package/dist/src/help.d.ts +11 -0
- package/dist/src/help.js +46 -5
- package/dist/src/logger.js +8 -9
- package/dist/src/main.js +16 -0
- package/dist/src/output.js +4 -5
- package/dist/src/runtime-detect.d.ts +1 -1
- package/dist/src/runtime-detect.js +1 -1
- package/dist/src/runtime-detect.test.js +3 -2
- package/dist/src/tui.d.ts +0 -1
- package/dist/src/tui.js +9 -22
- package/dist/src/types.d.ts +3 -1
- package/dist/src/update-check.js +4 -5
- package/package.json +5 -4
package/clis/xiaohongshu/note.js
CHANGED
|
@@ -9,25 +9,12 @@
|
|
|
9
9
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
10
10
|
import { AuthRequiredError, CliError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
11
11
|
import { parseNoteId, buildNoteUrl } from './note-helpers.js';
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
strategy: Strategy.COOKIE,
|
|
19
|
-
navigateBefore: false,
|
|
20
|
-
args: [
|
|
21
|
-
{ name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
|
|
22
|
-
],
|
|
23
|
-
columns: ['field', 'value'],
|
|
24
|
-
func: async (page, kwargs) => {
|
|
25
|
-
const raw = String(kwargs['note-id']);
|
|
26
|
-
const noteId = parseNoteId(raw);
|
|
27
|
-
const url = buildNoteUrl(raw, { commandName: 'xiaohongshu note' });
|
|
28
|
-
await page.goto(url);
|
|
29
|
-
await page.wait({ time: 2 + Math.random() * 3 });
|
|
30
|
-
const data = await page.evaluate(`
|
|
12
|
+
/**
|
|
13
|
+
* Host-agnostic IIFE that scrapes note title / author / counts / tags from a
|
|
14
|
+
* rendered note detail page. Exported so the rednote adapter can reuse the
|
|
15
|
+
* exact same selector set without copying it.
|
|
16
|
+
*/
|
|
17
|
+
export const NOTE_EXTRACT_JS = `
|
|
31
18
|
(() => {
|
|
32
19
|
const bodyText = document.body?.innerText || ''
|
|
33
20
|
const loginWall = /登录后查看|请登录/.test(bodyText)
|
|
@@ -58,7 +45,26 @@ cli({
|
|
|
58
45
|
|
|
59
46
|
return { pageUrl: location.href, securityBlock, loginWall, notFound, title, desc, author, likes, collects, comments, tags }
|
|
60
47
|
})()
|
|
61
|
-
|
|
48
|
+
`;
|
|
49
|
+
export const command = cli({
|
|
50
|
+
site: 'xiaohongshu',
|
|
51
|
+
name: 'note',
|
|
52
|
+
access: 'read',
|
|
53
|
+
description: '获取小红书笔记正文和互动数据',
|
|
54
|
+
domain: 'www.xiaohongshu.com',
|
|
55
|
+
strategy: Strategy.COOKIE,
|
|
56
|
+
navigateBefore: false,
|
|
57
|
+
args: [
|
|
58
|
+
{ name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
|
|
59
|
+
],
|
|
60
|
+
columns: ['field', 'value'],
|
|
61
|
+
func: async (page, kwargs) => {
|
|
62
|
+
const raw = String(kwargs['note-id']);
|
|
63
|
+
const noteId = parseNoteId(raw);
|
|
64
|
+
const url = buildNoteUrl(raw, { commandName: 'xiaohongshu note' });
|
|
65
|
+
await page.goto(url);
|
|
66
|
+
await page.wait({ time: 2 + Math.random() * 3 });
|
|
67
|
+
const data = await page.evaluate(NOTE_EXTRACT_JS);
|
|
62
68
|
if (!data || typeof data !== 'object') {
|
|
63
69
|
throw new EmptyResultError('xiaohongshu/note', 'Unexpected evaluate response');
|
|
64
70
|
}
|
|
@@ -1,23 +1,11 @@
|
|
|
1
1
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
browser: true,
|
|
10
|
-
args: [
|
|
11
|
-
{
|
|
12
|
-
name: 'type',
|
|
13
|
-
default: 'mentions',
|
|
14
|
-
help: 'Notification type: mentions, likes, or connections',
|
|
15
|
-
},
|
|
16
|
-
{ name: 'limit', type: 'int', default: 20, help: 'Number of notifications to return' },
|
|
17
|
-
],
|
|
18
|
-
columns: ['rank', 'user', 'action', 'content', 'note', 'time'],
|
|
19
|
-
pipeline: [
|
|
20
|
-
{ navigate: 'https://www.xiaohongshu.com/notification' },
|
|
2
|
+
/**
|
|
3
|
+
* Build the notifications pipeline for the given web host. Exported so the
|
|
4
|
+
* rednote adapter can register the same pipeline against www.rednote.com.
|
|
5
|
+
*/
|
|
6
|
+
export function buildNotificationsPipeline(webHost) {
|
|
7
|
+
return [
|
|
8
|
+
{ navigate: `https://${webHost}/notification` },
|
|
21
9
|
{ tap: {
|
|
22
10
|
store: 'notification',
|
|
23
11
|
action: 'getNotification',
|
|
@@ -35,5 +23,24 @@ cli({
|
|
|
35
23
|
time: '${{ item.time }}',
|
|
36
24
|
} },
|
|
37
25
|
{ limit: '${{ args.limit | default(20) }}' },
|
|
26
|
+
];
|
|
27
|
+
}
|
|
28
|
+
export const command = cli({
|
|
29
|
+
site: 'xiaohongshu',
|
|
30
|
+
name: 'notifications',
|
|
31
|
+
access: 'read',
|
|
32
|
+
description: '小红书通知 (mentions/likes/connections)',
|
|
33
|
+
domain: 'www.xiaohongshu.com',
|
|
34
|
+
strategy: Strategy.INTERCEPT,
|
|
35
|
+
browser: true,
|
|
36
|
+
args: [
|
|
37
|
+
{
|
|
38
|
+
name: 'type',
|
|
39
|
+
default: 'mentions',
|
|
40
|
+
help: 'Notification type: mentions, likes, or connections',
|
|
41
|
+
},
|
|
42
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Number of notifications to return' },
|
|
38
43
|
],
|
|
44
|
+
columns: ['rank', 'user', 'action', 'content', 'note', 'time'],
|
|
45
|
+
pipeline: buildNotificationsPipeline('www.xiaohongshu.com'),
|
|
39
46
|
});
|
|
@@ -6,16 +6,24 @@
|
|
|
6
6
|
* Ref: https://github.com/jackwener/opencli/issues/10
|
|
7
7
|
*/
|
|
8
8
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
9
|
-
import { AuthRequiredError } from '@jackwener/opencli/errors';
|
|
9
|
+
import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
|
|
10
10
|
/**
|
|
11
11
|
* Wait for search results or login wall using MutationObserver (max 5s).
|
|
12
12
|
* Returns 'content' if note items appeared, 'login_wall' if login gate
|
|
13
13
|
* detected, or 'timeout' if neither appeared within the deadline.
|
|
14
|
+
*
|
|
15
|
+
* Note-item detection tries the legacy `section.note-item` class first
|
|
16
|
+
* (still observed in many sessions, including rednote) and falls back to
|
|
17
|
+
* a `<section>` element containing a `/search_result/` or `/explore/`
|
|
18
|
+
* link. Issue #1506 reports the class being dropped on some xhs renders.
|
|
14
19
|
*/
|
|
15
20
|
const WAIT_FOR_CONTENT_JS = `
|
|
16
21
|
new Promise((resolve) => {
|
|
22
|
+
const findNoteCard = () => document.querySelector(
|
|
23
|
+
'section.note-item, section:has(a[href*="/search_result/"]), section:has(a[href*="/explore/"])'
|
|
24
|
+
);
|
|
17
25
|
const detect = () => {
|
|
18
|
-
if (
|
|
26
|
+
if (findNoteCard()) return 'content';
|
|
19
27
|
if (/登录后查看搜索结果/.test(document.body?.innerText || '')) return 'login_wall';
|
|
20
28
|
return null;
|
|
21
29
|
};
|
|
@@ -52,49 +60,146 @@ export function stripXhsAuthorDateSuffix(value) {
|
|
|
52
60
|
const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim();
|
|
53
61
|
return stripped || text;
|
|
54
62
|
}
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
63
|
+
export function parseLimit(raw) {
|
|
64
|
+
const parsed = Number(raw ?? 20);
|
|
65
|
+
if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
|
|
66
|
+
throw new ArgumentError(`--limit must be an integer between 1 and 100, got ${JSON.stringify(raw)}`);
|
|
67
|
+
}
|
|
68
|
+
if (parsed < 1 || parsed > 100) {
|
|
69
|
+
throw new ArgumentError(`--limit must be between 1 and 100, got ${parsed}`);
|
|
70
|
+
}
|
|
71
|
+
return parsed;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Build a "scroll until enough or plateaued" IIFE used in place of a fixed
|
|
75
|
+
* `autoScroll({ times: N })`. Xiaohongshu's search results page lazy-loads
|
|
76
|
+
* ~5-7 notes per scroll, so the previous `times: 2` capped extraction at
|
|
77
|
+
* ~13 items regardless of `--limit` (see #1471). This helper drives scrolls
|
|
78
|
+
* dynamically:
|
|
79
|
+
*
|
|
80
|
+
* - count visible `section.note-item` rows (excluding related-search
|
|
81
|
+
* `.query-note-item` rows)
|
|
82
|
+
* - if count >= targetCount → break (got enough)
|
|
83
|
+
* - if two consecutive scrolls add no new rows → break (DOM plateaued,
|
|
84
|
+
* no more lazy-load available)
|
|
85
|
+
* - hard cap at `maxScrolls` iterations (default 15) to bound runtime
|
|
86
|
+
*
|
|
87
|
+
* Exported so the rednote adapter (same DOM shape) can reuse it.
|
|
88
|
+
*/
|
|
89
|
+
export function buildScrollUntilJs(targetCount, maxScrolls = 15) {
|
|
90
|
+
if (!Number.isSafeInteger(targetCount) || targetCount < 1) {
|
|
91
|
+
throw new ArgumentError(`targetCount must be a positive integer, got ${JSON.stringify(targetCount)}`);
|
|
92
|
+
}
|
|
93
|
+
if (!Number.isSafeInteger(maxScrolls) || maxScrolls < 1) {
|
|
94
|
+
throw new ArgumentError(`maxScrolls must be a positive integer, got ${JSON.stringify(maxScrolls)}`);
|
|
95
|
+
}
|
|
96
|
+
return `
|
|
97
|
+
(async () => {
|
|
98
|
+
const isVisibleNote = (el) => {
|
|
99
|
+
if (el.classList.contains('query-note-item')) return false;
|
|
100
|
+
const rect = el.getBoundingClientRect();
|
|
101
|
+
if (rect.width <= 0 || rect.height <= 0) return false;
|
|
102
|
+
const style = getComputedStyle(el);
|
|
103
|
+
return style.display !== 'none' && style.visibility !== 'hidden';
|
|
104
|
+
};
|
|
105
|
+
// Note containers: legacy \`section.note-item\` first, fallback to
|
|
106
|
+
// any \`<section>\` that wraps a search-result/explore note link
|
|
107
|
+
// (#1506 reports the class being dropped on some xhs renders).
|
|
108
|
+
const collectNoteCards = () => {
|
|
109
|
+
const classMatches = document.querySelectorAll('section.note-item');
|
|
110
|
+
if (classMatches.length > 0) return classMatches;
|
|
111
|
+
const sections = new Set();
|
|
112
|
+
for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
|
|
113
|
+
const section = a.closest('section');
|
|
114
|
+
if (section) sections.add(section);
|
|
115
|
+
}
|
|
116
|
+
return sections;
|
|
117
|
+
};
|
|
118
|
+
const countItems = () => {
|
|
119
|
+
let count = 0;
|
|
120
|
+
for (const el of collectNoteCards()) {
|
|
121
|
+
if (isVisibleNote(el)) count++;
|
|
122
|
+
}
|
|
123
|
+
return count;
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
let lastCount = countItems();
|
|
127
|
+
let plateauRounds = 0;
|
|
128
|
+
for (let i = 0; i < ${maxScrolls}; i++) {
|
|
129
|
+
if (countItems() >= ${targetCount}) break;
|
|
130
|
+
const lastHeight = document.body.scrollHeight;
|
|
131
|
+
window.scrollTo(0, lastHeight);
|
|
132
|
+
await new Promise((resolve) => {
|
|
133
|
+
let to;
|
|
134
|
+
const ob = new MutationObserver(() => {
|
|
135
|
+
if (document.body.scrollHeight > lastHeight) {
|
|
136
|
+
clearTimeout(to);
|
|
137
|
+
ob.disconnect();
|
|
138
|
+
setTimeout(resolve, 200);
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
ob.observe(document.body, { childList: true, subtree: true });
|
|
142
|
+
to = setTimeout(() => { ob.disconnect(); resolve(null); }, 2500);
|
|
143
|
+
});
|
|
144
|
+
const newCount = countItems();
|
|
145
|
+
if (newCount === lastCount) {
|
|
146
|
+
plateauRounds++;
|
|
147
|
+
if (plateauRounds >= 2) break;
|
|
148
|
+
} else {
|
|
149
|
+
plateauRounds = 0;
|
|
150
|
+
lastCount = newCount;
|
|
151
|
+
}
|
|
77
152
|
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
153
|
+
return countItems();
|
|
154
|
+
})()
|
|
155
|
+
`;
|
|
156
|
+
}
|
|
157
|
+
/**
|
|
158
|
+
* Build the search-result extraction IIFE. The web host is baked into the
|
|
159
|
+
* `normalizeUrl` fallback so relative `/explore/...` hrefs resolve to a full
|
|
160
|
+
* URL on the calling site. Exported so the rednote adapter can call it with
|
|
161
|
+
* `www.rednote.com` without duplicating the selector logic.
|
|
162
|
+
*/
|
|
163
|
+
export function buildSearchExtractJs(webHost) {
|
|
164
|
+
return `
|
|
81
165
|
(() => {
|
|
82
166
|
const normalizeUrl = (href) => {
|
|
83
167
|
if (!href) return '';
|
|
84
168
|
if (href.startsWith('http://') || href.startsWith('https://')) return href;
|
|
85
|
-
if (href.startsWith('/')) return 'https
|
|
169
|
+
if (href.startsWith('/')) return 'https://${webHost}' + href;
|
|
86
170
|
return '';
|
|
87
171
|
};
|
|
88
172
|
|
|
89
173
|
const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
|
|
90
174
|
const stripXhsAuthorDateSuffix = ${stripXhsAuthorDateSuffix.toString()};
|
|
175
|
+
const isVisibleNote = (el) => {
|
|
176
|
+
const rect = el.getBoundingClientRect();
|
|
177
|
+
if (rect.width <= 0 || rect.height <= 0) return false;
|
|
178
|
+
const style = getComputedStyle(el);
|
|
179
|
+
return style.display !== 'none' && style.visibility !== 'hidden';
|
|
180
|
+
};
|
|
91
181
|
|
|
92
182
|
const results = [];
|
|
93
183
|
const seen = new Set();
|
|
94
184
|
|
|
95
|
-
|
|
185
|
+
// Note containers: legacy \`section.note-item\` first, fallback to any
|
|
186
|
+
// \`<section>\` wrapping a search-result/explore link (#1506 reports the
|
|
187
|
+
// class being dropped on some xhs renders).
|
|
188
|
+
const collectNoteCards = () => {
|
|
189
|
+
const classMatches = document.querySelectorAll('section.note-item');
|
|
190
|
+
if (classMatches.length > 0) return classMatches;
|
|
191
|
+
const sections = new Set();
|
|
192
|
+
for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
|
|
193
|
+
const section = a.closest('section');
|
|
194
|
+
if (section) sections.add(section);
|
|
195
|
+
}
|
|
196
|
+
return sections;
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
for (const el of collectNoteCards()) {
|
|
96
200
|
// Skip "related searches" sections
|
|
97
|
-
if (el.classList
|
|
201
|
+
if (el.classList?.contains('query-note-item')) continue;
|
|
202
|
+
if (!isVisibleNote(el)) continue;
|
|
98
203
|
|
|
99
204
|
const titleEl = el.querySelector('.title, .note-title, a.title, .footer .title span');
|
|
100
205
|
const nameEl = el.querySelector('a.author .name, .author-name, .nick-name, .name');
|
|
@@ -114,28 +219,87 @@ cli({
|
|
|
114
219
|
const authorLinkEl = el.querySelector('a.author, a[href*="/user/profile/"]');
|
|
115
220
|
|
|
116
221
|
const url = normalizeUrl(detailLinkEl?.getAttribute('href') || '');
|
|
117
|
-
if (!url)
|
|
222
|
+
if (!url) continue;
|
|
118
223
|
|
|
119
224
|
const key = url;
|
|
120
|
-
if (seen.has(key))
|
|
225
|
+
if (seen.has(key)) continue;
|
|
121
226
|
seen.add(key);
|
|
122
227
|
|
|
228
|
+
// Fallback title: the new bare-section render keeps the note caption
|
|
229
|
+
// inside the search_result anchor's first span, not in a class-named
|
|
230
|
+
// .title element. Pull from there when the class-based pick is empty.
|
|
231
|
+
let title = cleanText(titleEl?.textContent || '');
|
|
232
|
+
if (!title) {
|
|
233
|
+
const captionSpan = detailLinkEl?.querySelector('span');
|
|
234
|
+
title = cleanText(captionSpan?.textContent || '');
|
|
235
|
+
}
|
|
236
|
+
|
|
123
237
|
results.push({
|
|
124
|
-
title
|
|
238
|
+
title,
|
|
125
239
|
author,
|
|
126
240
|
likes: cleanText(likesEl?.textContent || '0'),
|
|
127
241
|
url,
|
|
128
242
|
author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''),
|
|
129
243
|
});
|
|
130
|
-
}
|
|
244
|
+
}
|
|
131
245
|
|
|
132
246
|
return results;
|
|
133
247
|
})()
|
|
134
|
-
|
|
135
|
-
|
|
248
|
+
`;
|
|
249
|
+
}
|
|
250
|
+
export const command = cli({
|
|
251
|
+
site: 'xiaohongshu',
|
|
252
|
+
name: 'search',
|
|
253
|
+
access: 'read',
|
|
254
|
+
description: '搜索小红书笔记',
|
|
255
|
+
domain: 'www.xiaohongshu.com',
|
|
256
|
+
strategy: Strategy.COOKIE,
|
|
257
|
+
navigateBefore: false,
|
|
258
|
+
args: [
|
|
259
|
+
{ name: 'query', required: true, positional: true, help: 'Search keyword' },
|
|
260
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Number of results' },
|
|
261
|
+
],
|
|
262
|
+
columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'],
|
|
263
|
+
func: async (page, kwargs) => {
|
|
264
|
+
const limit = parseLimit(kwargs.limit);
|
|
265
|
+
const keyword = encodeURIComponent(kwargs.query);
|
|
266
|
+
await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
|
|
267
|
+
// Wait for search results to render (or login wall to appear).
|
|
268
|
+
// Uses MutationObserver to resolve as soon as content appears,
|
|
269
|
+
// instead of a fixed delay + blind retry.
|
|
270
|
+
const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS);
|
|
271
|
+
if (waitResult === 'login_wall') {
|
|
272
|
+
throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
|
|
273
|
+
}
|
|
274
|
+
// Extract before scrolling. Xiaohongshu uses a virtualized masonry
|
|
275
|
+
// layout, so scrolling to the bottom can evict the initially visible
|
|
276
|
+
// note cards from the DOM and make extraction return [] even though the
|
|
277
|
+
// browser rendered results correctly.
|
|
278
|
+
const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
|
|
279
|
+
let payload = Array.isArray(initialPayload) ? initialPayload : [];
|
|
280
|
+
if (payload.length < limit) {
|
|
281
|
+
// Scroll until enough rows are rendered or the lazy-load plateaus.
|
|
282
|
+
// Replaces the previous fixed `autoScroll({ times: 2 })` which capped
|
|
283
|
+
// extraction at ~13 notes regardless of `--limit` (#1471).
|
|
284
|
+
await page.evaluate(buildScrollUntilJs(limit));
|
|
285
|
+
const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
|
|
286
|
+
if (Array.isArray(scrolledPayload)) {
|
|
287
|
+
const seen = new Set(payload.map((item) => item.url).filter(Boolean));
|
|
288
|
+
for (const item of scrolledPayload) {
|
|
289
|
+
if (item?.url && seen.has(item.url))
|
|
290
|
+
continue;
|
|
291
|
+
if (item?.url)
|
|
292
|
+
seen.add(item.url);
|
|
293
|
+
payload.push(item);
|
|
294
|
+
if (payload.length >= limit)
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
const data = payload;
|
|
136
300
|
return data
|
|
137
301
|
.filter((item) => item.title)
|
|
138
|
-
.slice(0,
|
|
302
|
+
.slice(0, limit)
|
|
139
303
|
.map((item, i) => ({
|
|
140
304
|
rank: i + 1,
|
|
141
305
|
...item,
|
|
@@ -1,7 +1,11 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from 'vitest';
|
|
2
2
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
3
|
import { JSDOM } from 'jsdom';
|
|
4
|
-
import { __test__, noteIdToDate } from './search.js';
|
|
4
|
+
import { __test__, buildScrollUntilJs, noteIdToDate } from './search.js';
|
|
5
|
+
|
|
6
|
+
function markVisible(el) {
|
|
7
|
+
el.getBoundingClientRect = () => ({ width: 100, height: 100 });
|
|
8
|
+
}
|
|
5
9
|
function createPageMock(evaluateResults) {
|
|
6
10
|
const evaluate = vi.fn();
|
|
7
11
|
for (const result of evaluateResults) {
|
|
@@ -31,6 +35,16 @@ function createPageMock(evaluateResults) {
|
|
|
31
35
|
};
|
|
32
36
|
}
|
|
33
37
|
describe('xiaohongshu search', () => {
|
|
38
|
+
it('rejects invalid limit before browser navigation', async () => {
|
|
39
|
+
const cmd = getRegistry().get('xiaohongshu/search');
|
|
40
|
+
const page = createPageMock([]);
|
|
41
|
+
|
|
42
|
+
await expect(cmd.func(page, { query: '特斯拉', limit: 0 })).rejects.toMatchObject({
|
|
43
|
+
code: 'ARGUMENT',
|
|
44
|
+
message: expect.stringContaining('--limit'),
|
|
45
|
+
});
|
|
46
|
+
expect(page.goto).not.toHaveBeenCalled();
|
|
47
|
+
});
|
|
34
48
|
it('throws a clear error when the search page is blocked by a login wall', async () => {
|
|
35
49
|
const cmd = getRegistry().get('xiaohongshu/search');
|
|
36
50
|
expect(cmd?.func).toBeTypeOf('function');
|
|
@@ -39,7 +53,8 @@ describe('xiaohongshu search', () => {
|
|
|
39
53
|
'login_wall',
|
|
40
54
|
]);
|
|
41
55
|
await expect(cmd.func(page, { query: '特斯拉', limit: 5 })).rejects.toThrow('Xiaohongshu search results are blocked behind a login wall');
|
|
42
|
-
//
|
|
56
|
+
// No scroll-until / autoScroll call when a login wall is detected early.
|
|
57
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
43
58
|
expect(page.autoScroll).not.toHaveBeenCalled();
|
|
44
59
|
});
|
|
45
60
|
it('returns ranked results with search_result url and author_url preserved', async () => {
|
|
@@ -50,7 +65,7 @@ describe('xiaohongshu search', () => {
|
|
|
50
65
|
const page = createPageMock([
|
|
51
66
|
// First evaluate: MutationObserver wait (content appeared)
|
|
52
67
|
'content',
|
|
53
|
-
// Second evaluate:
|
|
68
|
+
// Second evaluate: initial DOM extraction (already enough results)
|
|
54
69
|
[
|
|
55
70
|
{
|
|
56
71
|
title: '某鱼买FSD被坑了4万',
|
|
@@ -82,7 +97,7 @@ describe('xiaohongshu search', () => {
|
|
|
82
97
|
const page = createPageMock([
|
|
83
98
|
// First evaluate: MutationObserver wait (content appeared)
|
|
84
99
|
'content',
|
|
85
|
-
// Second evaluate:
|
|
100
|
+
// Second evaluate: initial DOM extraction (already enough valid rows)
|
|
86
101
|
[
|
|
87
102
|
{
|
|
88
103
|
title: 'Result A',
|
|
@@ -118,15 +133,36 @@ describe('xiaohongshu search', () => {
|
|
|
118
133
|
const page = createPageMock([
|
|
119
134
|
// First evaluate: MutationObserver wait (content appeared)
|
|
120
135
|
'content',
|
|
121
|
-
// Second evaluate: extraction (
|
|
136
|
+
// Second evaluate: initial extraction (no rows rendered)
|
|
122
137
|
[],
|
|
123
138
|
]);
|
|
124
139
|
const result = (await cmd.func(page, { query: '测试等待', limit: 5 }));
|
|
125
140
|
expect(result).toHaveLength(0);
|
|
126
141
|
// Only one navigation, no retry
|
|
127
142
|
expect(page.goto).toHaveBeenCalledTimes(1);
|
|
128
|
-
//
|
|
129
|
-
expect(page.evaluate).toHaveBeenCalledTimes(
|
|
143
|
+
// Four evaluate calls: wait, initial extraction, scroll-until, post-scroll extraction.
|
|
144
|
+
expect(page.evaluate).toHaveBeenCalledTimes(4);
|
|
145
|
+
});
|
|
146
|
+
it('scrolls only when the initial extraction has fewer rows than requested', async () => {
|
|
147
|
+
const cmd = getRegistry().get('xiaohongshu/search');
|
|
148
|
+
expect(cmd?.func).toBeTypeOf('function');
|
|
149
|
+
const page = createPageMock([
|
|
150
|
+
'content',
|
|
151
|
+
[
|
|
152
|
+
{ title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' },
|
|
153
|
+
],
|
|
154
|
+
3,
|
|
155
|
+
[
|
|
156
|
+
{ title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' },
|
|
157
|
+
{ title: 'Result B', author: 'UserB', likes: '5', url: 'https://www.xiaohongshu.com/search_result/bbb', author_url: '' },
|
|
158
|
+
],
|
|
159
|
+
]);
|
|
160
|
+
|
|
161
|
+
const result = (await cmd.func(page, { query: '测试等待', limit: 2 }));
|
|
162
|
+
|
|
163
|
+
expect(result).toHaveLength(2);
|
|
164
|
+
expect(result.map((item) => item.title)).toEqual(['Result A', 'Result B']);
|
|
165
|
+
expect(page.evaluate).toHaveBeenCalledTimes(4);
|
|
130
166
|
});
|
|
131
167
|
it('separates fallback author text from appended relative date', async () => {
|
|
132
168
|
const cmd = getRegistry().get('xiaohongshu/search');
|
|
@@ -141,9 +177,10 @@ describe('xiaohongshu search', () => {
|
|
|
141
177
|
<span class="count">8</span>
|
|
142
178
|
</section>
|
|
143
179
|
`, { url: 'https://www.xiaohongshu.com/search_result?keyword=test' });
|
|
180
|
+
markVisible(dom.window.document.querySelector('section.note-item'));
|
|
144
181
|
const page = createPageMock([]);
|
|
145
182
|
page.evaluate.mockImplementationOnce(async () => 'content');
|
|
146
|
-
page.evaluate.mockImplementationOnce(async (script) => Function('document', `return (${script})`)(dom.window.document));
|
|
183
|
+
page.evaluate.mockImplementationOnce(async (script) => Function('document', 'getComputedStyle', `return (${script})`)(dom.window.document, dom.window.getComputedStyle.bind(dom.window)));
|
|
147
184
|
|
|
148
185
|
const result = await cmd.func(page, { query: '测试', limit: 1 });
|
|
149
186
|
|
|
@@ -155,6 +192,43 @@ describe('xiaohongshu search', () => {
|
|
|
155
192
|
});
|
|
156
193
|
});
|
|
157
194
|
});
|
|
195
|
+
describe('buildScrollUntilJs', () => {
|
|
196
|
+
it('inlines the target count and default maxScrolls into the generated IIFE', () => {
|
|
197
|
+
const js = buildScrollUntilJs(40);
|
|
198
|
+
// Target count must drive the early-exit check (#1471: --limit > 13 was capped).
|
|
199
|
+
expect(js).toContain('countItems() >= 40');
|
|
200
|
+
// Default safety cap of 15 to bound runtime on infinite-scroll pages.
|
|
201
|
+
expect(js).toContain('i < 15');
|
|
202
|
+
// Plateau detection so the loop exits early when XHS stops lazy-loading
|
|
203
|
+
// instead of spinning all 15 iterations against an exhausted feed.
|
|
204
|
+
expect(js).toContain('plateauRounds');
|
|
205
|
+
// Related-search rows must not count toward the target.
|
|
206
|
+
expect(js).toContain("classList.contains('query-note-item')");
|
|
207
|
+
});
|
|
208
|
+
it('respects a custom maxScrolls override', () => {
|
|
209
|
+
const js = buildScrollUntilJs(100, 5);
|
|
210
|
+
expect(js).toContain('countItems() >= 100');
|
|
211
|
+
expect(js).toContain('i < 5');
|
|
212
|
+
});
|
|
213
|
+
it('counts only visible real note rows', async () => {
|
|
214
|
+
const dom = new JSDOM(`
|
|
215
|
+
<section class="note-item" id="visible"></section>
|
|
216
|
+
<section class="note-item query-note-item" id="query"></section>
|
|
217
|
+
<section class="note-item" id="hidden" style="display:none"></section>
|
|
218
|
+
`, { url: 'https://www.xiaohongshu.com/search_result?keyword=test' });
|
|
219
|
+
markVisible(dom.window.document.querySelector('#visible'));
|
|
220
|
+
markVisible(dom.window.document.querySelector('#query'));
|
|
221
|
+
markVisible(dom.window.document.querySelector('#hidden'));
|
|
222
|
+
|
|
223
|
+
const result = await Function('document', 'window', 'MutationObserver', 'getComputedStyle', `return (${buildScrollUntilJs(1)})`)(dom.window.document, dom.window, dom.window.MutationObserver, dom.window.getComputedStyle.bind(dom.window));
|
|
224
|
+
|
|
225
|
+
expect(result).toBe(1);
|
|
226
|
+
});
|
|
227
|
+
it('rejects unsafe helper arguments instead of interpolating them into code', () => {
|
|
228
|
+
expect(() => buildScrollUntilJs(0)).toThrow(/targetCount/);
|
|
229
|
+
expect(() => buildScrollUntilJs(10, 0)).toThrow(/maxScrolls/);
|
|
230
|
+
});
|
|
231
|
+
});
|
|
158
232
|
describe('stripXhsAuthorDateSuffix', () => {
|
|
159
233
|
it('only strips trailing date suffixes and preserves date-like author text', () => {
|
|
160
234
|
expect(__test__.stripXhsAuthorDateSuffix('作者名 3天前')).toBe('作者名');
|
|
@@ -27,12 +27,17 @@ export function flattenXhsNoteGroups(noteGroups) {
|
|
|
27
27
|
}
|
|
28
28
|
return notes;
|
|
29
29
|
}
|
|
30
|
-
|
|
30
|
+
/**
|
|
31
|
+
* Build a signed user-profile note URL on the given web host (defaults to
|
|
32
|
+
* `www.xiaohongshu.com`). The rednote adapter passes `'www.rednote.com'` so
|
|
33
|
+
* the same builder works for both sites.
|
|
34
|
+
*/
|
|
35
|
+
export function buildXhsNoteUrl(userId, noteId, xsecToken, webHost = 'www.xiaohongshu.com') {
|
|
31
36
|
const cleanUserId = toCleanString(userId);
|
|
32
37
|
const cleanNoteId = toCleanString(noteId);
|
|
33
38
|
if (!cleanUserId || !cleanNoteId)
|
|
34
39
|
return '';
|
|
35
|
-
const url = new URL(`https
|
|
40
|
+
const url = new URL(`https://${webHost}/user/profile/${cleanUserId}/${cleanNoteId}`);
|
|
36
41
|
const cleanToken = toCleanString(xsecToken);
|
|
37
42
|
if (cleanToken) {
|
|
38
43
|
url.searchParams.set('xsec_token', cleanToken);
|
|
@@ -40,7 +45,11 @@ export function buildXhsNoteUrl(userId, noteId, xsecToken) {
|
|
|
40
45
|
}
|
|
41
46
|
return url.toString();
|
|
42
47
|
}
|
|
43
|
-
|
|
48
|
+
/**
|
|
49
|
+
* Normalise a Pinia user-store snapshot into CLI rows. `webHost` is forwarded
|
|
50
|
+
* to `buildXhsNoteUrl` so the resulting URLs point at the calling site.
|
|
51
|
+
*/
|
|
52
|
+
export function extractXhsUserNotes(snapshot, fallbackUserId, webHost = 'www.xiaohongshu.com') {
|
|
44
53
|
const notes = flattenXhsNoteGroups(snapshot.noteGroups);
|
|
45
54
|
const rows = [];
|
|
46
55
|
const seen = new Set();
|
|
@@ -62,7 +71,7 @@ export function extractXhsUserNotes(snapshot, fallbackUserId) {
|
|
|
62
71
|
type: toCleanString(noteCard.type),
|
|
63
72
|
likes,
|
|
64
73
|
cover,
|
|
65
|
-
url: buildXhsNoteUrl(userId || fallbackUserId, noteId, xsecToken),
|
|
74
|
+
url: buildXhsNoteUrl(userId || fallbackUserId, noteId, xsecToken, webHost),
|
|
66
75
|
});
|
|
67
76
|
}
|
|
68
77
|
return rows;
|
|
@@ -20,6 +20,9 @@ describe('buildXhsNoteUrl', () => {
|
|
|
20
20
|
it('includes xsec token when available', () => {
|
|
21
21
|
expect(buildXhsNoteUrl('user123', 'note456', 'token789')).toBe('https://www.xiaohongshu.com/user/profile/user123/note456?xsec_token=token789&xsec_source=pc_user');
|
|
22
22
|
});
|
|
23
|
+
it('emits a rednote URL when webHost is overridden', () => {
|
|
24
|
+
expect(buildXhsNoteUrl('user123', 'note456', 'token789', 'www.rednote.com')).toBe('https://www.rednote.com/user/profile/user123/note456?xsec_token=token789&xsec_source=pc_user');
|
|
25
|
+
});
|
|
23
26
|
});
|
|
24
27
|
describe('extractXhsUserNotes', () => {
|
|
25
28
|
it('normalizes grouped note cards into CLI rows', () => {
|
|
@@ -96,4 +99,21 @@ describe('extractXhsUserNotes', () => {
|
|
|
96
99
|
expect(rows).toHaveLength(1);
|
|
97
100
|
expect(rows[0]?.title).toBe('keep me');
|
|
98
101
|
});
|
|
102
|
+
it('emits rednote-hosted URLs when webHost is overridden', () => {
|
|
103
|
+
const rows = extractXhsUserNotes({
|
|
104
|
+
noteGroups: [
|
|
105
|
+
[
|
|
106
|
+
{
|
|
107
|
+
xsecToken: 'tok',
|
|
108
|
+
noteCard: {
|
|
109
|
+
noteId: 'note-red',
|
|
110
|
+
displayTitle: 'rednote note',
|
|
111
|
+
user: { userId: 'user-red' },
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
],
|
|
115
|
+
],
|
|
116
|
+
}, 'fallback-user', 'www.rednote.com');
|
|
117
|
+
expect(rows[0]?.url).toBe('https://www.rednote.com/user/profile/user-red/note-red?xsec_token=tok&xsec_source=pc_user');
|
|
118
|
+
});
|
|
99
119
|
});
|