@jackwener/opencli 1.7.17 → 1.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. package/README.md +10 -8
  2. package/README.zh-CN.md +9 -8
  3. package/cli-manifest.json +585 -9
  4. package/clis/ctrip/ctrip.test.js +486 -1
  5. package/clis/ctrip/flight.js +136 -0
  6. package/clis/ctrip/hotel-search.js +132 -0
  7. package/clis/ctrip/utils.js +298 -0
  8. package/clis/doubao/utils.js +17 -0
  9. package/clis/doubao/utils.test.js +61 -0
  10. package/clis/google/search.js +16 -6
  11. package/clis/google-scholar/search.js +20 -5
  12. package/clis/google-scholar/search.test.js +35 -2
  13. package/clis/reddit/home.js +117 -0
  14. package/clis/reddit/home.test.js +127 -0
  15. package/clis/reddit/read.js +400 -54
  16. package/clis/reddit/read.test.js +315 -12
  17. package/clis/reddit/reply.js +182 -0
  18. package/clis/reddit/reply.test.js +89 -0
  19. package/clis/reddit/subreddit-info.js +117 -0
  20. package/clis/reddit/subreddit-info.test.js +163 -0
  21. package/clis/reddit/whoami.js +84 -0
  22. package/clis/reddit/whoami.test.js +105 -0
  23. package/clis/rednote/comments.js +76 -0
  24. package/clis/rednote/download.js +59 -0
  25. package/clis/rednote/feed.js +95 -0
  26. package/clis/rednote/navigation.test.js +26 -0
  27. package/clis/rednote/note.js +68 -0
  28. package/clis/rednote/notifications.js +139 -0
  29. package/clis/rednote/rednote.test.js +157 -0
  30. package/clis/rednote/search.js +101 -0
  31. package/clis/rednote/user.js +55 -0
  32. package/clis/twitter/bookmark-folder.js +3 -1
  33. package/clis/twitter/bookmarks.js +3 -1
  34. package/clis/twitter/followers.js +20 -5
  35. package/clis/twitter/followers.test.js +44 -0
  36. package/clis/twitter/following.js +36 -20
  37. package/clis/twitter/following.test.js +60 -8
  38. package/clis/twitter/likes.js +28 -13
  39. package/clis/twitter/likes.test.js +111 -1
  40. package/clis/twitter/list-add.js +128 -204
  41. package/clis/twitter/list-add.test.js +97 -1
  42. package/clis/twitter/list-tweets.js +13 -4
  43. package/clis/twitter/list-tweets.test.js +48 -0
  44. package/clis/twitter/lists.js +5 -2
  45. package/clis/twitter/post.js +23 -4
  46. package/clis/twitter/post.test.js +30 -0
  47. package/clis/twitter/profile.js +16 -8
  48. package/clis/twitter/profile.test.js +39 -0
  49. package/clis/twitter/reply.js +133 -10
  50. package/clis/twitter/reply.test.js +55 -0
  51. package/clis/twitter/search.js +188 -170
  52. package/clis/twitter/search.test.js +96 -258
  53. package/clis/twitter/shared.js +167 -16
  54. package/clis/twitter/shared.test.js +102 -1
  55. package/clis/twitter/timeline.js +3 -1
  56. package/clis/twitter/tweets.js +147 -51
  57. package/clis/twitter/tweets.test.js +238 -1
  58. package/clis/xiaohongshu/comments.js +57 -26
  59. package/clis/xiaohongshu/comments.test.js +63 -1
  60. package/clis/xiaohongshu/download.js +32 -23
  61. package/clis/xiaohongshu/feed.js +23 -15
  62. package/clis/xiaohongshu/note-helpers.js +16 -6
  63. package/clis/xiaohongshu/note.js +26 -20
  64. package/clis/xiaohongshu/notifications.js +26 -19
  65. package/clis/xiaohongshu/search.js +201 -37
  66. package/clis/xiaohongshu/search.test.js +82 -8
  67. package/clis/xiaohongshu/user-helpers.js +13 -4
  68. package/clis/xiaohongshu/user-helpers.test.js +20 -0
  69. package/clis/xiaohongshu/user.js +9 -4
  70. package/clis/xueqiu/earnings-date.js +2 -2
  71. package/clis/xueqiu/kline.js +2 -2
  72. package/clis/xueqiu/utils.js +19 -0
  73. package/clis/xueqiu/utils.test.js +26 -0
  74. package/clis/youtube/transcript.js +28 -3
  75. package/clis/youtube/transcript.test.js +90 -1
  76. package/clis/zhihu/answer-detail.js +233 -0
  77. package/clis/zhihu/answer-detail.test.js +330 -0
  78. package/clis/zhihu/question.js +44 -10
  79. package/clis/zhihu/question.test.js +78 -1
  80. package/clis/zhihu/recommend.js +103 -0
  81. package/clis/zhihu/recommend.test.js +143 -0
  82. package/dist/src/browser/base-page.d.ts +3 -2
  83. package/dist/src/browser/base-page.test.js +2 -2
  84. package/dist/src/browser/cdp.js +3 -3
  85. package/dist/src/browser/page.d.ts +3 -2
  86. package/dist/src/browser/page.js +4 -4
  87. package/dist/src/browser/page.test.js +31 -0
  88. package/dist/src/browser/utils.d.ts +10 -0
  89. package/dist/src/browser/utils.js +37 -0
  90. package/dist/src/browser/utils.test.d.ts +1 -0
  91. package/dist/src/browser/utils.test.js +29 -0
  92. package/dist/src/cli-argv-preprocess.d.ts +37 -0
  93. package/dist/src/cli-argv-preprocess.js +131 -0
  94. package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
  95. package/dist/src/cli-argv-preprocess.test.js +130 -0
  96. package/dist/src/cli.js +123 -86
  97. package/dist/src/cli.test.js +32 -22
  98. package/dist/src/commands/daemon.js +6 -7
  99. package/dist/src/doctor.js +21 -17
  100. package/dist/src/doctor.test.js +2 -0
  101. package/dist/src/download/progress.js +15 -11
  102. package/dist/src/download/progress.test.d.ts +1 -0
  103. package/dist/src/download/progress.test.js +25 -0
  104. package/dist/src/execution.js +1 -3
  105. package/dist/src/execution.test.js +4 -16
  106. package/dist/src/help.d.ts +11 -0
  107. package/dist/src/help.js +46 -5
  108. package/dist/src/logger.js +8 -9
  109. package/dist/src/main.js +16 -0
  110. package/dist/src/output.js +4 -5
  111. package/dist/src/runtime-detect.d.ts +1 -1
  112. package/dist/src/runtime-detect.js +1 -1
  113. package/dist/src/runtime-detect.test.js +3 -2
  114. package/dist/src/tui.d.ts +0 -1
  115. package/dist/src/tui.js +9 -22
  116. package/dist/src/types.d.ts +3 -1
  117. package/dist/src/update-check.js +4 -5
  118. package/package.json +5 -4
@@ -9,25 +9,12 @@
9
9
  import { cli, Strategy } from '@jackwener/opencli/registry';
10
10
  import { AuthRequiredError, CliError, EmptyResultError } from '@jackwener/opencli/errors';
11
11
  import { parseNoteId, buildNoteUrl } from './note-helpers.js';
12
- cli({
13
- site: 'xiaohongshu',
14
- name: 'note',
15
- access: 'read',
16
- description: '获取小红书笔记正文和互动数据',
17
- domain: 'www.xiaohongshu.com',
18
- strategy: Strategy.COOKIE,
19
- navigateBefore: false,
20
- args: [
21
- { name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
22
- ],
23
- columns: ['field', 'value'],
24
- func: async (page, kwargs) => {
25
- const raw = String(kwargs['note-id']);
26
- const noteId = parseNoteId(raw);
27
- const url = buildNoteUrl(raw, { commandName: 'xiaohongshu note' });
28
- await page.goto(url);
29
- await page.wait({ time: 2 + Math.random() * 3 });
30
- const data = await page.evaluate(`
12
+ /**
13
+ * Host-agnostic IIFE that scrapes note title / author / counts / tags from a
14
+ * rendered note detail page. Exported so the rednote adapter can reuse the
15
+ * exact same selector set without copying it.
16
+ */
17
+ export const NOTE_EXTRACT_JS = `
31
18
  (() => {
32
19
  const bodyText = document.body?.innerText || ''
33
20
  const loginWall = /登录后查看|请登录/.test(bodyText)
@@ -58,7 +45,26 @@ cli({
58
45
 
59
46
  return { pageUrl: location.href, securityBlock, loginWall, notFound, title, desc, author, likes, collects, comments, tags }
60
47
  })()
61
- `);
48
+ `;
49
+ export const command = cli({
50
+ site: 'xiaohongshu',
51
+ name: 'note',
52
+ access: 'read',
53
+ description: '获取小红书笔记正文和互动数据',
54
+ domain: 'www.xiaohongshu.com',
55
+ strategy: Strategy.COOKIE,
56
+ navigateBefore: false,
57
+ args: [
58
+ { name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
59
+ ],
60
+ columns: ['field', 'value'],
61
+ func: async (page, kwargs) => {
62
+ const raw = String(kwargs['note-id']);
63
+ const noteId = parseNoteId(raw);
64
+ const url = buildNoteUrl(raw, { commandName: 'xiaohongshu note' });
65
+ await page.goto(url);
66
+ await page.wait({ time: 2 + Math.random() * 3 });
67
+ const data = await page.evaluate(NOTE_EXTRACT_JS);
62
68
  if (!data || typeof data !== 'object') {
63
69
  throw new EmptyResultError('xiaohongshu/note', 'Unexpected evaluate response');
64
70
  }
@@ -1,23 +1,11 @@
1
1
  import { cli, Strategy } from '@jackwener/opencli/registry';
2
- cli({
3
- site: 'xiaohongshu',
4
- name: 'notifications',
5
- access: 'read',
6
- description: '小红书通知 (mentions/likes/connections)',
7
- domain: 'www.xiaohongshu.com',
8
- strategy: Strategy.INTERCEPT,
9
- browser: true,
10
- args: [
11
- {
12
- name: 'type',
13
- default: 'mentions',
14
- help: 'Notification type: mentions, likes, or connections',
15
- },
16
- { name: 'limit', type: 'int', default: 20, help: 'Number of notifications to return' },
17
- ],
18
- columns: ['rank', 'user', 'action', 'content', 'note', 'time'],
19
- pipeline: [
20
- { navigate: 'https://www.xiaohongshu.com/notification' },
2
+ /**
3
+ * Build the notifications pipeline for the given web host. Exported so the
4
+ * rednote adapter can register the same pipeline against www.rednote.com.
5
+ */
6
+ export function buildNotificationsPipeline(webHost) {
7
+ return [
8
+ { navigate: `https://${webHost}/notification` },
21
9
  { tap: {
22
10
  store: 'notification',
23
11
  action: 'getNotification',
@@ -35,5 +23,24 @@ cli({
35
23
  time: '${{ item.time }}',
36
24
  } },
37
25
  { limit: '${{ args.limit | default(20) }}' },
26
+ ];
27
+ }
28
+ export const command = cli({
29
+ site: 'xiaohongshu',
30
+ name: 'notifications',
31
+ access: 'read',
32
+ description: '小红书通知 (mentions/likes/connections)',
33
+ domain: 'www.xiaohongshu.com',
34
+ strategy: Strategy.INTERCEPT,
35
+ browser: true,
36
+ args: [
37
+ {
38
+ name: 'type',
39
+ default: 'mentions',
40
+ help: 'Notification type: mentions, likes, or connections',
41
+ },
42
+ { name: 'limit', type: 'int', default: 20, help: 'Number of notifications to return' },
38
43
  ],
44
+ columns: ['rank', 'user', 'action', 'content', 'note', 'time'],
45
+ pipeline: buildNotificationsPipeline('www.xiaohongshu.com'),
39
46
  });
@@ -6,16 +6,24 @@
6
6
  * Ref: https://github.com/jackwener/opencli/issues/10
7
7
  */
8
8
  import { cli, Strategy } from '@jackwener/opencli/registry';
9
- import { AuthRequiredError } from '@jackwener/opencli/errors';
9
+ import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
10
10
  /**
11
11
  * Wait for search results or login wall using MutationObserver (max 5s).
12
12
  * Returns 'content' if note items appeared, 'login_wall' if login gate
13
13
  * detected, or 'timeout' if neither appeared within the deadline.
14
+ *
15
+ * Note-item detection tries the legacy `section.note-item` class first
16
+ * (still observed in many sessions, including rednote) and falls back to
17
+ * a `<section>` element containing a `/search_result/` or `/explore/`
18
+ * link. Issue #1506 reports the class being dropped on some xhs renders.
14
19
  */
15
20
  const WAIT_FOR_CONTENT_JS = `
16
21
  new Promise((resolve) => {
22
+ const findNoteCard = () => document.querySelector(
23
+ 'section.note-item, section:has(a[href*="/search_result/"]), section:has(a[href*="/explore/"])'
24
+ );
17
25
  const detect = () => {
18
- if (document.querySelector('section.note-item')) return 'content';
26
+ if (findNoteCard()) return 'content';
19
27
  if (/登录后查看搜索结果/.test(document.body?.innerText || '')) return 'login_wall';
20
28
  return null;
21
29
  };
@@ -52,49 +60,146 @@ export function stripXhsAuthorDateSuffix(value) {
52
60
  const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim();
53
61
  return stripped || text;
54
62
  }
55
- cli({
56
- site: 'xiaohongshu',
57
- name: 'search',
58
- access: 'read',
59
- description: '搜索小红书笔记',
60
- domain: 'www.xiaohongshu.com',
61
- strategy: Strategy.COOKIE,
62
- navigateBefore: false,
63
- args: [
64
- { name: 'query', required: true, positional: true, help: 'Search keyword' },
65
- { name: 'limit', type: 'int', default: 20, help: 'Number of results' },
66
- ],
67
- columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'],
68
- func: async (page, kwargs) => {
69
- const keyword = encodeURIComponent(kwargs.query);
70
- await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
71
- // Wait for search results to render (or login wall to appear).
72
- // Uses MutationObserver to resolve as soon as content appears,
73
- // instead of a fixed delay + blind retry.
74
- const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS);
75
- if (waitResult === 'login_wall') {
76
- throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
63
+ export function parseLimit(raw) {
64
+ const parsed = Number(raw ?? 20);
65
+ if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
66
+ throw new ArgumentError(`--limit must be an integer between 1 and 100, got ${JSON.stringify(raw)}`);
67
+ }
68
+ if (parsed < 1 || parsed > 100) {
69
+ throw new ArgumentError(`--limit must be between 1 and 100, got ${parsed}`);
70
+ }
71
+ return parsed;
72
+ }
73
+ /**
74
+ * Build a "scroll until enough or plateaued" IIFE used in place of a fixed
75
+ * `autoScroll({ times: N })`. Xiaohongshu's search results page lazy-loads
76
+ * ~5-7 notes per scroll, so the previous `times: 2` capped extraction at
77
+ * ~13 items regardless of `--limit` (see #1471). This helper drives scrolls
78
+ * dynamically:
79
+ *
80
+ * - count visible `section.note-item` rows (excluding related-search
81
+ * `.query-note-item` rows)
82
+ * - if count >= targetCount → break (got enough)
83
+ * - if two consecutive scrolls add no new rows → break (DOM plateaued,
84
+ * no more lazy-load available)
85
+ * - hard cap at `maxScrolls` iterations (default 15) to bound runtime
86
+ *
87
+ * Exported so the rednote adapter (same DOM shape) can reuse it.
88
+ */
89
+ export function buildScrollUntilJs(targetCount, maxScrolls = 15) {
90
+ if (!Number.isSafeInteger(targetCount) || targetCount < 1) {
91
+ throw new ArgumentError(`targetCount must be a positive integer, got ${JSON.stringify(targetCount)}`);
92
+ }
93
+ if (!Number.isSafeInteger(maxScrolls) || maxScrolls < 1) {
94
+ throw new ArgumentError(`maxScrolls must be a positive integer, got ${JSON.stringify(maxScrolls)}`);
95
+ }
96
+ return `
97
+ (async () => {
98
+ const isVisibleNote = (el) => {
99
+ if (el.classList.contains('query-note-item')) return false;
100
+ const rect = el.getBoundingClientRect();
101
+ if (rect.width <= 0 || rect.height <= 0) return false;
102
+ const style = getComputedStyle(el);
103
+ return style.display !== 'none' && style.visibility !== 'hidden';
104
+ };
105
+ // Note containers: legacy \`section.note-item\` first, fallback to
106
+ // any \`<section>\` that wraps a search-result/explore note link
107
+ // (#1506 reports the class being dropped on some xhs renders).
108
+ const collectNoteCards = () => {
109
+ const classMatches = document.querySelectorAll('section.note-item');
110
+ if (classMatches.length > 0) return classMatches;
111
+ const sections = new Set();
112
+ for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
113
+ const section = a.closest('section');
114
+ if (section) sections.add(section);
115
+ }
116
+ return sections;
117
+ };
118
+ const countItems = () => {
119
+ let count = 0;
120
+ for (const el of collectNoteCards()) {
121
+ if (isVisibleNote(el)) count++;
122
+ }
123
+ return count;
124
+ };
125
+
126
+ let lastCount = countItems();
127
+ let plateauRounds = 0;
128
+ for (let i = 0; i < ${maxScrolls}; i++) {
129
+ if (countItems() >= ${targetCount}) break;
130
+ const lastHeight = document.body.scrollHeight;
131
+ window.scrollTo(0, lastHeight);
132
+ await new Promise((resolve) => {
133
+ let to;
134
+ const ob = new MutationObserver(() => {
135
+ if (document.body.scrollHeight > lastHeight) {
136
+ clearTimeout(to);
137
+ ob.disconnect();
138
+ setTimeout(resolve, 200);
139
+ }
140
+ });
141
+ ob.observe(document.body, { childList: true, subtree: true });
142
+ to = setTimeout(() => { ob.disconnect(); resolve(null); }, 2500);
143
+ });
144
+ const newCount = countItems();
145
+ if (newCount === lastCount) {
146
+ plateauRounds++;
147
+ if (plateauRounds >= 2) break;
148
+ } else {
149
+ plateauRounds = 0;
150
+ lastCount = newCount;
151
+ }
77
152
  }
78
- // Scroll a couple of times to load more results
79
- await page.autoScroll({ times: 2 });
80
- const payload = await page.evaluate(`
153
+ return countItems();
154
+ })()
155
+ `;
156
+ }
157
+ /**
158
+ * Build the search-result extraction IIFE. The web host is baked into the
159
+ * `normalizeUrl` fallback so relative `/explore/...` hrefs resolve to a full
160
+ * URL on the calling site. Exported so the rednote adapter can call it with
161
+ * `www.rednote.com` without duplicating the selector logic.
162
+ */
163
+ export function buildSearchExtractJs(webHost) {
164
+ return `
81
165
  (() => {
82
166
  const normalizeUrl = (href) => {
83
167
  if (!href) return '';
84
168
  if (href.startsWith('http://') || href.startsWith('https://')) return href;
85
- if (href.startsWith('/')) return 'https://www.xiaohongshu.com' + href;
169
+ if (href.startsWith('/')) return 'https://${webHost}' + href;
86
170
  return '';
87
171
  };
88
172
 
89
173
  const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
90
174
  const stripXhsAuthorDateSuffix = ${stripXhsAuthorDateSuffix.toString()};
175
+ const isVisibleNote = (el) => {
176
+ const rect = el.getBoundingClientRect();
177
+ if (rect.width <= 0 || rect.height <= 0) return false;
178
+ const style = getComputedStyle(el);
179
+ return style.display !== 'none' && style.visibility !== 'hidden';
180
+ };
91
181
 
92
182
  const results = [];
93
183
  const seen = new Set();
94
184
 
95
- document.querySelectorAll('section.note-item').forEach(el => {
185
+ // Note containers: legacy \`section.note-item\` first, fallback to any
186
+ // \`<section>\` wrapping a search-result/explore link (#1506 reports the
187
+ // class being dropped on some xhs renders).
188
+ const collectNoteCards = () => {
189
+ const classMatches = document.querySelectorAll('section.note-item');
190
+ if (classMatches.length > 0) return classMatches;
191
+ const sections = new Set();
192
+ for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
193
+ const section = a.closest('section');
194
+ if (section) sections.add(section);
195
+ }
196
+ return sections;
197
+ };
198
+
199
+ for (const el of collectNoteCards()) {
96
200
  // Skip "related searches" sections
97
- if (el.classList.contains('query-note-item')) return;
201
+ if (el.classList?.contains('query-note-item')) continue;
202
+ if (!isVisibleNote(el)) continue;
98
203
 
99
204
  const titleEl = el.querySelector('.title, .note-title, a.title, .footer .title span');
100
205
  const nameEl = el.querySelector('a.author .name, .author-name, .nick-name, .name');
@@ -114,28 +219,87 @@ cli({
114
219
  const authorLinkEl = el.querySelector('a.author, a[href*="/user/profile/"]');
115
220
 
116
221
  const url = normalizeUrl(detailLinkEl?.getAttribute('href') || '');
117
- if (!url) return;
222
+ if (!url) continue;
118
223
 
119
224
  const key = url;
120
- if (seen.has(key)) return;
225
+ if (seen.has(key)) continue;
121
226
  seen.add(key);
122
227
 
228
+ // Fallback title: the new bare-section render keeps the note caption
229
+ // inside the search_result anchor's first span, not in a class-named
230
+ // .title element. Pull from there when the class-based pick is empty.
231
+ let title = cleanText(titleEl?.textContent || '');
232
+ if (!title) {
233
+ const captionSpan = detailLinkEl?.querySelector('span');
234
+ title = cleanText(captionSpan?.textContent || '');
235
+ }
236
+
123
237
  results.push({
124
- title: cleanText(titleEl?.textContent || ''),
238
+ title,
125
239
  author,
126
240
  likes: cleanText(likesEl?.textContent || '0'),
127
241
  url,
128
242
  author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''),
129
243
  });
130
- });
244
+ }
131
245
 
132
246
  return results;
133
247
  })()
134
- `);
135
- const data = Array.isArray(payload) ? payload : [];
248
+ `;
249
+ }
250
+ export const command = cli({
251
+ site: 'xiaohongshu',
252
+ name: 'search',
253
+ access: 'read',
254
+ description: '搜索小红书笔记',
255
+ domain: 'www.xiaohongshu.com',
256
+ strategy: Strategy.COOKIE,
257
+ navigateBefore: false,
258
+ args: [
259
+ { name: 'query', required: true, positional: true, help: 'Search keyword' },
260
+ { name: 'limit', type: 'int', default: 20, help: 'Number of results' },
261
+ ],
262
+ columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'],
263
+ func: async (page, kwargs) => {
264
+ const limit = parseLimit(kwargs.limit);
265
+ const keyword = encodeURIComponent(kwargs.query);
266
+ await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
267
+ // Wait for search results to render (or login wall to appear).
268
+ // Uses MutationObserver to resolve as soon as content appears,
269
+ // instead of a fixed delay + blind retry.
270
+ const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS);
271
+ if (waitResult === 'login_wall') {
272
+ throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
273
+ }
274
+ // Extract before scrolling. Xiaohongshu uses a virtualized masonry
275
+ // layout, so scrolling to the bottom can evict the initially visible
276
+ // note cards from the DOM and make extraction return [] even though the
277
+ // browser rendered results correctly.
278
+ const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
279
+ let payload = Array.isArray(initialPayload) ? initialPayload : [];
280
+ if (payload.length < limit) {
281
+ // Scroll until enough rows are rendered or the lazy-load plateaus.
282
+ // Replaces the previous fixed `autoScroll({ times: 2 })` which capped
283
+ // extraction at ~13 notes regardless of `--limit` (#1471).
284
+ await page.evaluate(buildScrollUntilJs(limit));
285
+ const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
286
+ if (Array.isArray(scrolledPayload)) {
287
+ const seen = new Set(payload.map((item) => item.url).filter(Boolean));
288
+ for (const item of scrolledPayload) {
289
+ if (item?.url && seen.has(item.url))
290
+ continue;
291
+ if (item?.url)
292
+ seen.add(item.url);
293
+ payload.push(item);
294
+ if (payload.length >= limit)
295
+ break;
296
+ }
297
+ }
298
+ }
299
+ const data = payload;
136
300
  return data
137
301
  .filter((item) => item.title)
138
- .slice(0, kwargs.limit)
302
+ .slice(0, limit)
139
303
  .map((item, i) => ({
140
304
  rank: i + 1,
141
305
  ...item,
@@ -1,7 +1,11 @@
1
1
  import { describe, expect, it, vi } from 'vitest';
2
2
  import { getRegistry } from '@jackwener/opencli/registry';
3
3
  import { JSDOM } from 'jsdom';
4
- import { __test__, noteIdToDate } from './search.js';
4
+ import { __test__, buildScrollUntilJs, noteIdToDate } from './search.js';
5
+
6
+ function markVisible(el) {
7
+ el.getBoundingClientRect = () => ({ width: 100, height: 100 });
8
+ }
5
9
  function createPageMock(evaluateResults) {
6
10
  const evaluate = vi.fn();
7
11
  for (const result of evaluateResults) {
@@ -31,6 +35,16 @@ function createPageMock(evaluateResults) {
31
35
  };
32
36
  }
33
37
  describe('xiaohongshu search', () => {
38
+ it('rejects invalid limit before browser navigation', async () => {
39
+ const cmd = getRegistry().get('xiaohongshu/search');
40
+ const page = createPageMock([]);
41
+
42
+ await expect(cmd.func(page, { query: '特斯拉', limit: 0 })).rejects.toMatchObject({
43
+ code: 'ARGUMENT',
44
+ message: expect.stringContaining('--limit'),
45
+ });
46
+ expect(page.goto).not.toHaveBeenCalled();
47
+ });
34
48
  it('throws a clear error when the search page is blocked by a login wall', async () => {
35
49
  const cmd = getRegistry().get('xiaohongshu/search');
36
50
  expect(cmd?.func).toBeTypeOf('function');
@@ -39,7 +53,8 @@ describe('xiaohongshu search', () => {
39
53
  'login_wall',
40
54
  ]);
41
55
  await expect(cmd.func(page, { query: '特斯拉', limit: 5 })).rejects.toThrow('Xiaohongshu search results are blocked behind a login wall');
42
- // autoScroll must NOT be called when a login wall is detected early
56
+ // No scroll-until / autoScroll call when a login wall is detected early.
57
+ expect(page.evaluate).toHaveBeenCalledTimes(1);
43
58
  expect(page.autoScroll).not.toHaveBeenCalled();
44
59
  });
45
60
  it('returns ranked results with search_result url and author_url preserved', async () => {
@@ -50,7 +65,7 @@ describe('xiaohongshu search', () => {
50
65
  const page = createPageMock([
51
66
  // First evaluate: MutationObserver wait (content appeared)
52
67
  'content',
53
- // Second evaluate: main DOM extraction (returns array directly)
68
+ // Second evaluate: initial DOM extraction (already enough results)
54
69
  [
55
70
  {
56
71
  title: '某鱼买FSD被坑了4万',
@@ -82,7 +97,7 @@ describe('xiaohongshu search', () => {
82
97
  const page = createPageMock([
83
98
  // First evaluate: MutationObserver wait (content appeared)
84
99
  'content',
85
- // Second evaluate: main DOM extraction (returns array directly)
100
+ // Second evaluate: initial DOM extraction (already enough valid rows)
86
101
  [
87
102
  {
88
103
  title: 'Result A',
@@ -118,15 +133,36 @@ describe('xiaohongshu search', () => {
118
133
  const page = createPageMock([
119
134
  // First evaluate: MutationObserver wait (content appeared)
120
135
  'content',
121
- // Second evaluate: extraction (returns empty array)
136
+ // Second evaluate: initial extraction (no rows rendered)
122
137
  [],
123
138
  ]);
124
139
  const result = (await cmd.func(page, { query: '测试等待', limit: 5 }));
125
140
  expect(result).toHaveLength(0);
126
141
  // Only one navigation, no retry
127
142
  expect(page.goto).toHaveBeenCalledTimes(1);
128
- // Two evaluate calls: wait + extraction
129
- expect(page.evaluate).toHaveBeenCalledTimes(2);
143
+ // Four evaluate calls: wait, initial extraction, scroll-until, post-scroll extraction.
144
+ expect(page.evaluate).toHaveBeenCalledTimes(4);
145
+ });
146
+ it('scrolls only when the initial extraction has fewer rows than requested', async () => {
147
+ const cmd = getRegistry().get('xiaohongshu/search');
148
+ expect(cmd?.func).toBeTypeOf('function');
149
+ const page = createPageMock([
150
+ 'content',
151
+ [
152
+ { title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' },
153
+ ],
154
+ 3,
155
+ [
156
+ { title: 'Result A', author: 'UserA', likes: '10', url: 'https://www.xiaohongshu.com/search_result/aaa', author_url: '' },
157
+ { title: 'Result B', author: 'UserB', likes: '5', url: 'https://www.xiaohongshu.com/search_result/bbb', author_url: '' },
158
+ ],
159
+ ]);
160
+
161
+ const result = (await cmd.func(page, { query: '测试等待', limit: 2 }));
162
+
163
+ expect(result).toHaveLength(2);
164
+ expect(result.map((item) => item.title)).toEqual(['Result A', 'Result B']);
165
+ expect(page.evaluate).toHaveBeenCalledTimes(4);
130
166
  });
131
167
  it('separates fallback author text from appended relative date', async () => {
132
168
  const cmd = getRegistry().get('xiaohongshu/search');
@@ -141,9 +177,10 @@ describe('xiaohongshu search', () => {
141
177
  <span class="count">8</span>
142
178
  </section>
143
179
  `, { url: 'https://www.xiaohongshu.com/search_result?keyword=test' });
180
+ markVisible(dom.window.document.querySelector('section.note-item'));
144
181
  const page = createPageMock([]);
145
182
  page.evaluate.mockImplementationOnce(async () => 'content');
146
- page.evaluate.mockImplementationOnce(async (script) => Function('document', `return (${script})`)(dom.window.document));
183
+ page.evaluate.mockImplementationOnce(async (script) => Function('document', 'getComputedStyle', `return (${script})`)(dom.window.document, dom.window.getComputedStyle.bind(dom.window)));
147
184
 
148
185
  const result = await cmd.func(page, { query: '测试', limit: 1 });
149
186
 
@@ -155,6 +192,43 @@ describe('xiaohongshu search', () => {
155
192
  });
156
193
  });
157
194
  });
195
+ describe('buildScrollUntilJs', () => {
196
+ it('inlines the target count and default maxScrolls into the generated IIFE', () => {
197
+ const js = buildScrollUntilJs(40);
198
+ // Target count must drive the early-exit check (#1471: --limit > 13 was capped).
199
+ expect(js).toContain('countItems() >= 40');
200
+ // Default safety cap of 15 to bound runtime on infinite-scroll pages.
201
+ expect(js).toContain('i < 15');
202
+ // Plateau detection so the loop exits early when XHS stops lazy-loading
203
+ // instead of spinning all 15 iterations against an exhausted feed.
204
+ expect(js).toContain('plateauRounds');
205
+ // Related-search rows must not count toward the target.
206
+ expect(js).toContain("classList.contains('query-note-item')");
207
+ });
208
+ it('respects a custom maxScrolls override', () => {
209
+ const js = buildScrollUntilJs(100, 5);
210
+ expect(js).toContain('countItems() >= 100');
211
+ expect(js).toContain('i < 5');
212
+ });
213
+ it('counts only visible real note rows', async () => {
214
+ const dom = new JSDOM(`
215
+ <section class="note-item" id="visible"></section>
216
+ <section class="note-item query-note-item" id="query"></section>
217
+ <section class="note-item" id="hidden" style="display:none"></section>
218
+ `, { url: 'https://www.xiaohongshu.com/search_result?keyword=test' });
219
+ markVisible(dom.window.document.querySelector('#visible'));
220
+ markVisible(dom.window.document.querySelector('#query'));
221
+ markVisible(dom.window.document.querySelector('#hidden'));
222
+
223
+ const result = await Function('document', 'window', 'MutationObserver', 'getComputedStyle', `return (${buildScrollUntilJs(1)})`)(dom.window.document, dom.window, dom.window.MutationObserver, dom.window.getComputedStyle.bind(dom.window));
224
+
225
+ expect(result).toBe(1);
226
+ });
227
+ it('rejects unsafe helper arguments instead of interpolating them into code', () => {
228
+ expect(() => buildScrollUntilJs(0)).toThrow(/targetCount/);
229
+ expect(() => buildScrollUntilJs(10, 0)).toThrow(/maxScrolls/);
230
+ });
231
+ });
158
232
  describe('stripXhsAuthorDateSuffix', () => {
159
233
  it('only strips trailing date suffixes and preserves date-like author text', () => {
160
234
  expect(__test__.stripXhsAuthorDateSuffix('作者名 3天前')).toBe('作者名');
@@ -27,12 +27,17 @@ export function flattenXhsNoteGroups(noteGroups) {
27
27
  }
28
28
  return notes;
29
29
  }
30
- export function buildXhsNoteUrl(userId, noteId, xsecToken) {
30
+ /**
31
+ * Build a signed user-profile note URL on the given web host (defaults to
32
+ * `www.xiaohongshu.com`). The rednote adapter passes `'www.rednote.com'` so
33
+ * the same builder works for both sites.
34
+ */
35
+ export function buildXhsNoteUrl(userId, noteId, xsecToken, webHost = 'www.xiaohongshu.com') {
31
36
  const cleanUserId = toCleanString(userId);
32
37
  const cleanNoteId = toCleanString(noteId);
33
38
  if (!cleanUserId || !cleanNoteId)
34
39
  return '';
35
- const url = new URL(`https://www.xiaohongshu.com/user/profile/${cleanUserId}/${cleanNoteId}`);
40
+ const url = new URL(`https://${webHost}/user/profile/${cleanUserId}/${cleanNoteId}`);
36
41
  const cleanToken = toCleanString(xsecToken);
37
42
  if (cleanToken) {
38
43
  url.searchParams.set('xsec_token', cleanToken);
@@ -40,7 +45,11 @@ export function buildXhsNoteUrl(userId, noteId, xsecToken) {
40
45
  }
41
46
  return url.toString();
42
47
  }
43
- export function extractXhsUserNotes(snapshot, fallbackUserId) {
48
+ /**
49
+ * Normalise a Pinia user-store snapshot into CLI rows. `webHost` is forwarded
50
+ * to `buildXhsNoteUrl` so the resulting URLs point at the calling site.
51
+ */
52
+ export function extractXhsUserNotes(snapshot, fallbackUserId, webHost = 'www.xiaohongshu.com') {
44
53
  const notes = flattenXhsNoteGroups(snapshot.noteGroups);
45
54
  const rows = [];
46
55
  const seen = new Set();
@@ -62,7 +71,7 @@ export function extractXhsUserNotes(snapshot, fallbackUserId) {
62
71
  type: toCleanString(noteCard.type),
63
72
  likes,
64
73
  cover,
65
- url: buildXhsNoteUrl(userId || fallbackUserId, noteId, xsecToken),
74
+ url: buildXhsNoteUrl(userId || fallbackUserId, noteId, xsecToken, webHost),
66
75
  });
67
76
  }
68
77
  return rows;
@@ -20,6 +20,9 @@ describe('buildXhsNoteUrl', () => {
20
20
  it('includes xsec token when available', () => {
21
21
  expect(buildXhsNoteUrl('user123', 'note456', 'token789')).toBe('https://www.xiaohongshu.com/user/profile/user123/note456?xsec_token=token789&xsec_source=pc_user');
22
22
  });
23
+ it('emits a rednote URL when webHost is overridden', () => {
24
+ expect(buildXhsNoteUrl('user123', 'note456', 'token789', 'www.rednote.com')).toBe('https://www.rednote.com/user/profile/user123/note456?xsec_token=token789&xsec_source=pc_user');
25
+ });
23
26
  });
24
27
  describe('extractXhsUserNotes', () => {
25
28
  it('normalizes grouped note cards into CLI rows', () => {
@@ -96,4 +99,21 @@ describe('extractXhsUserNotes', () => {
96
99
  expect(rows).toHaveLength(1);
97
100
  expect(rows[0]?.title).toBe('keep me');
98
101
  });
102
+ it('emits rednote-hosted URLs when webHost is overridden', () => {
103
+ const rows = extractXhsUserNotes({
104
+ noteGroups: [
105
+ [
106
+ {
107
+ xsecToken: 'tok',
108
+ noteCard: {
109
+ noteId: 'note-red',
110
+ displayTitle: 'rednote note',
111
+ user: { userId: 'user-red' },
112
+ },
113
+ },
114
+ ],
115
+ ],
116
+ }, 'fallback-user', 'www.rednote.com');
117
+ expect(rows[0]?.url).toBe('https://www.rednote.com/user/profile/user-red/note-red?xsec_token=tok&xsec_source=pc_user');
118
+ });
99
119
  });