@jackwener/opencli 1.7.18 → 1.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. package/README.md +7 -8
  2. package/README.zh-CN.md +7 -8
  3. package/cli-manifest.json +305 -9
  4. package/clis/ctrip/ctrip.test.js +486 -1
  5. package/clis/ctrip/flight.js +136 -0
  6. package/clis/ctrip/hotel-search.js +132 -0
  7. package/clis/ctrip/utils.js +298 -0
  8. package/clis/google/search.js +16 -6
  9. package/clis/google-scholar/search.js +20 -5
  10. package/clis/google-scholar/search.test.js +35 -2
  11. package/clis/reddit/home.js +117 -0
  12. package/clis/reddit/home.test.js +127 -0
  13. package/clis/reddit/read.js +400 -54
  14. package/clis/reddit/read.test.js +315 -12
  15. package/clis/reddit/subreddit-info.js +117 -0
  16. package/clis/reddit/subreddit-info.test.js +163 -0
  17. package/clis/reddit/whoami.js +84 -0
  18. package/clis/reddit/whoami.test.js +105 -0
  19. package/clis/rednote/search.js +6 -2
  20. package/clis/twitter/bookmark-folder.js +3 -1
  21. package/clis/twitter/bookmarks.js +3 -1
  22. package/clis/twitter/followers.js +20 -5
  23. package/clis/twitter/followers.test.js +44 -0
  24. package/clis/twitter/following.js +36 -20
  25. package/clis/twitter/following.test.js +60 -8
  26. package/clis/twitter/likes.js +28 -13
  27. package/clis/twitter/likes.test.js +111 -1
  28. package/clis/twitter/list-add.js +128 -204
  29. package/clis/twitter/list-add.test.js +97 -1
  30. package/clis/twitter/list-tweets.js +13 -4
  31. package/clis/twitter/list-tweets.test.js +48 -0
  32. package/clis/twitter/lists.js +5 -2
  33. package/clis/twitter/post.js +23 -4
  34. package/clis/twitter/post.test.js +30 -0
  35. package/clis/twitter/profile.js +16 -8
  36. package/clis/twitter/profile.test.js +39 -0
  37. package/clis/twitter/reply.js +133 -10
  38. package/clis/twitter/reply.test.js +55 -0
  39. package/clis/twitter/search.js +188 -170
  40. package/clis/twitter/search.test.js +96 -258
  41. package/clis/twitter/shared.js +167 -16
  42. package/clis/twitter/shared.test.js +102 -1
  43. package/clis/twitter/timeline.js +3 -1
  44. package/clis/twitter/tweets.js +147 -51
  45. package/clis/twitter/tweets.test.js +238 -1
  46. package/clis/xiaohongshu/comments.js +23 -2
  47. package/clis/xiaohongshu/comments.test.js +63 -1
  48. package/clis/xiaohongshu/search.js +168 -13
  49. package/clis/xiaohongshu/search.test.js +82 -8
  50. package/clis/xueqiu/earnings-date.js +2 -2
  51. package/clis/xueqiu/kline.js +2 -2
  52. package/clis/xueqiu/utils.js +19 -0
  53. package/clis/xueqiu/utils.test.js +26 -0
  54. package/clis/zhihu/answer-detail.js +233 -0
  55. package/clis/zhihu/answer-detail.test.js +330 -0
  56. package/clis/zhihu/question.js +44 -10
  57. package/clis/zhihu/question.test.js +78 -1
  58. package/clis/zhihu/recommend.js +103 -0
  59. package/clis/zhihu/recommend.test.js +143 -0
  60. package/dist/src/browser/base-page.d.ts +3 -2
  61. package/dist/src/browser/base-page.test.js +2 -2
  62. package/dist/src/browser/cdp.js +3 -3
  63. package/dist/src/browser/page.d.ts +3 -2
  64. package/dist/src/browser/page.js +4 -4
  65. package/dist/src/browser/page.test.js +31 -0
  66. package/dist/src/browser/utils.d.ts +10 -0
  67. package/dist/src/browser/utils.js +37 -0
  68. package/dist/src/browser/utils.test.d.ts +1 -0
  69. package/dist/src/browser/utils.test.js +29 -0
  70. package/dist/src/cli-argv-preprocess.d.ts +37 -0
  71. package/dist/src/cli-argv-preprocess.js +131 -0
  72. package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
  73. package/dist/src/cli-argv-preprocess.test.js +130 -0
  74. package/dist/src/cli.js +123 -86
  75. package/dist/src/cli.test.js +33 -28
  76. package/dist/src/commands/daemon.js +6 -7
  77. package/dist/src/doctor.js +15 -16
  78. package/dist/src/download/progress.js +15 -11
  79. package/dist/src/download/progress.test.d.ts +1 -0
  80. package/dist/src/download/progress.test.js +25 -0
  81. package/dist/src/execution.js +1 -3
  82. package/dist/src/execution.test.js +4 -16
  83. package/dist/src/help.d.ts +11 -0
  84. package/dist/src/help.js +46 -5
  85. package/dist/src/logger.js +8 -9
  86. package/dist/src/main.js +16 -0
  87. package/dist/src/output.js +4 -5
  88. package/dist/src/runtime-detect.d.ts +1 -1
  89. package/dist/src/runtime-detect.js +1 -1
  90. package/dist/src/runtime-detect.test.js +3 -2
  91. package/dist/src/tui.d.ts +0 -1
  92. package/dist/src/tui.js +9 -22
  93. package/dist/src/types.d.ts +3 -1
  94. package/dist/src/update-check.js +4 -5
  95. package/package.json +5 -4
@@ -1,5 +1,6 @@
1
- import { describe, expect, it } from 'vitest';
1
+ import { describe, expect, it, vi } from 'vitest';
2
2
  import { getRegistry } from '@jackwener/opencli/registry';
3
+ import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
3
4
  import { __test__ } from './tweets.js';
4
5
 
5
6
  describe('twitter tweets helpers', () => {
@@ -8,6 +9,140 @@ describe('twitter tweets helpers', () => {
8
9
  expect(cmd?.columns).toEqual(['id', 'author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url', 'has_media', 'media_urls']);
9
10
  });
10
11
 
12
+ it('makes the username argument optional so it can default to the logged-in user', () => {
13
+ const cmd = getRegistry().get('twitter/tweets');
14
+ const usernameArg = cmd?.args?.find((arg) => arg.name === 'username');
15
+ expect(usernameArg).toBeDefined();
16
+ expect(usernameArg?.required).not.toBe(true);
17
+ expect(usernameArg?.help || '').toMatch(/default/i);
18
+ expect(cmd?.description || '').toMatch(/default/i);
19
+ });
20
+
21
+ it('detects the logged-in user via AppTabBar_Profile_Link when no username is given', async () => {
22
+ const cmd = getRegistry().get('twitter/tweets');
23
+ const evaluatedScripts = [];
24
+ const page = {
25
+ goto: vi.fn().mockResolvedValue(undefined),
26
+ wait: vi.fn().mockResolvedValue(undefined),
27
+ getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
28
+ evaluate: vi.fn(async (script) => {
29
+ const text = typeof script === 'function' ? script.toString() : String(script);
30
+ evaluatedScripts.push(text);
31
+ if (text.includes('AppTabBar_Profile_Link')) return '/viewer';
32
+ if (text.includes('operationName')) return null; // operation metadata resolver
33
+ if (text.includes('/UserByScreenName')) return '42';
34
+ if (text.includes('/UserTweets')) {
35
+ return {
36
+ data: {
37
+ user: {
38
+ result: {
39
+ timeline_v2: {
40
+ timeline: {
41
+ instructions: [
42
+ {
43
+ entries: [
44
+ {
45
+ entryId: 'tweet-1',
46
+ content: {
47
+ itemContent: {
48
+ tweet_results: {
49
+ result: {
50
+ rest_id: '1',
51
+ legacy: {
52
+ full_text: 'own post',
53
+ favorite_count: 0,
54
+ retweet_count: 0,
55
+ reply_count: 0,
56
+ created_at: 'now',
57
+ },
58
+ core: {
59
+ user_results: {
60
+ result: {
61
+ legacy: { screen_name: 'viewer', name: 'Viewer' },
62
+ },
63
+ },
64
+ },
65
+ },
66
+ },
67
+ },
68
+ },
69
+ },
70
+ ],
71
+ },
72
+ ],
73
+ },
74
+ },
75
+ },
76
+ },
77
+ },
78
+ };
79
+ }
80
+ return null;
81
+ }),
82
+ };
83
+ const rows = await cmd.func(page, { limit: 1 });
84
+ // Navigated home to read the logged-in user
85
+ expect(page.goto).toHaveBeenCalledWith('https://x.com/home');
86
+ // AppTabBar_Profile_Link probe happened before any GraphQL fetch
87
+ const probeIdx = evaluatedScripts.findIndex((t) => t.includes('AppTabBar_Profile_Link'));
88
+ const graphqlIdx = evaluatedScripts.findIndex((t) => t.includes('/UserByScreenName'));
89
+ expect(probeIdx).toBeGreaterThanOrEqual(0);
90
+ expect(graphqlIdx).toBeGreaterThan(probeIdx);
91
+ // The detected handle ('viewer') was used for the UserByScreenName lookup
92
+ const lookup = evaluatedScripts.find((t) => t.includes('/UserByScreenName')) || '';
93
+ expect(decodeURIComponent(lookup)).toContain('"screen_name":"viewer"');
94
+ expect(rows).toHaveLength(1);
95
+ expect(rows[0]).toMatchObject({ id: '1', author: 'viewer', url: 'https://x.com/viewer/status/1' });
96
+ });
97
+
98
+ it('throws AuthRequiredError when no username is given and the logged-in user cannot be detected', async () => {
99
+ const cmd = getRegistry().get('twitter/tweets');
100
+ const page = {
101
+ goto: vi.fn().mockResolvedValue(undefined),
102
+ wait: vi.fn().mockResolvedValue(undefined),
103
+ getCookies: vi.fn(async () => []),
104
+ evaluate: vi.fn(async (script) => {
105
+ const text = typeof script === 'function' ? script.toString() : String(script);
106
+ if (text.includes('AppTabBar_Profile_Link')) return null;
107
+ return null;
108
+ }),
109
+ };
110
+ await expect(cmd.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
111
+ });
112
+
113
+ it('rejects invalid explicit username before navigation', async () => {
114
+ const cmd = getRegistry().get('twitter/tweets');
115
+ const page = {
116
+ goto: vi.fn().mockResolvedValue(undefined),
117
+ wait: vi.fn().mockResolvedValue(undefined),
118
+ getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
119
+ evaluate: vi.fn(),
120
+ };
121
+
122
+ await expect(cmd.func(page, { username: 'viewer/extra' })).rejects.toBeInstanceOf(ArgumentError);
123
+ expect(page.goto).not.toHaveBeenCalled();
124
+ expect(page.getCookies).not.toHaveBeenCalled();
125
+ expect(page.evaluate).not.toHaveBeenCalled();
126
+ });
127
+
128
+ it('rejects non-profile AppTabBar hrefs instead of querying route names as users', async () => {
129
+ const cmd = getRegistry().get('twitter/tweets');
130
+ const page = {
131
+ goto: vi.fn().mockResolvedValue(undefined),
132
+ wait: vi.fn().mockResolvedValue(undefined),
133
+ getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
134
+ evaluate: vi.fn(async (script) => {
135
+ const text = typeof script === 'function' ? script.toString() : String(script);
136
+ if (text.includes('AppTabBar_Profile_Link')) return '/home';
137
+ throw new Error(`Unexpected evaluate: ${text.slice(0, 80)}`);
138
+ }),
139
+ };
140
+
141
+ await expect(cmd.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
142
+ expect(page.goto).toHaveBeenCalledWith('https://x.com/home');
143
+ expect(page.evaluate).toHaveBeenCalledTimes(1);
144
+ });
145
+
11
146
  it('falls back when queryId contains unsafe characters', () => {
12
147
  expect(__test__.sanitizeQueryId('safe_Query-123', 'fallback')).toBe('safe_Query-123');
13
148
  expect(__test__.sanitizeQueryId('bad"id', 'fallback')).toBe('fallback');
@@ -60,6 +195,18 @@ describe('twitter tweets helpers', () => {
60
195
  expect(b.is_retweet).toBe(true);
61
196
  });
62
197
 
198
+ it('unwraps TweetWithVisibilityResults', () => {
199
+ const tweet = __test__.extractTweet({
200
+ __typename: 'TweetWithVisibilityResults',
201
+ tweet: {
202
+ rest_id: '42',
203
+ legacy: { full_text: 'visible post', favorite_count: 2, retweet_count: 0, reply_count: 0, created_at: 'now' },
204
+ core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
205
+ },
206
+ }, new Set());
207
+ expect(tweet).toMatchObject({ id: '42', author: 'alice', text: 'visible post' });
208
+ });
209
+
63
210
  it('parses chronological tweets and skips pinned instruction', () => {
64
211
  const chronEntry = {
65
212
  entryId: 'tweet-1',
@@ -122,4 +269,94 @@ describe('twitter tweets helpers', () => {
122
269
  url: 'https://x.com/alice/status/1',
123
270
  });
124
271
  });
272
+
273
+ it('recursively parses tweets nested in timeline modules', () => {
274
+ const payload = {
275
+ data: {
276
+ user: {
277
+ result: {
278
+ timeline_v2: {
279
+ timeline: {
280
+ instructions: [
281
+ {
282
+ type: 'TimelineAddEntries',
283
+ entries: [
284
+ {
285
+ entryId: 'profile-conversation-1',
286
+ content: {
287
+ entryType: 'TimelineTimelineModule',
288
+ items: [
289
+ {
290
+ item: {
291
+ itemContent: {
292
+ tweet_results: {
293
+ result: {
294
+ rest_id: '2',
295
+ legacy: { full_text: 'nested post', favorite_count: 1, retweet_count: 0, reply_count: 0, created_at: 'now' },
296
+ core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
297
+ },
298
+ },
299
+ },
300
+ },
301
+ },
302
+ ],
303
+ },
304
+ },
305
+ {
306
+ entryId: 'cursor-bottom-2',
307
+ content: { entryType: 'TimelineTimelineCursor', cursorType: 'Bottom', value: 'next' },
308
+ },
309
+ ],
310
+ },
311
+ ],
312
+ },
313
+ },
314
+ },
315
+ },
316
+ },
317
+ };
318
+ const result = __test__.parseUserTweets(payload, new Set());
319
+ expect(result.nextCursor).toBe('next');
320
+ expect(result.tweets).toHaveLength(1);
321
+ expect(result.tweets[0]).toMatchObject({ id: '2', text: 'nested post' });
322
+ });
323
+
324
+ it('uses populated timeline instructions when timeline_v2 is present but empty', () => {
325
+ const payload = {
326
+ data: {
327
+ user: {
328
+ result: {
329
+ timeline_v2: { timeline: { instructions: [] } },
330
+ timeline: {
331
+ timeline: {
332
+ instructions: [
333
+ {
334
+ type: 'TimelineAddEntries',
335
+ entries: [
336
+ {
337
+ content: {
338
+ itemContent: {
339
+ tweet_results: {
340
+ result: {
341
+ rest_id: '3',
342
+ legacy: { full_text: 'fallback timeline post', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: 'now' },
343
+ core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
344
+ },
345
+ },
346
+ },
347
+ },
348
+ },
349
+ ],
350
+ },
351
+ ],
352
+ },
353
+ },
354
+ },
355
+ },
356
+ },
357
+ };
358
+ const result = __test__.parseUserTweets(payload, new Set());
359
+ expect(result.tweets).toHaveLength(1);
360
+ expect(result.tweets[0]).toMatchObject({ id: '3', text: 'fallback timeline post' });
361
+ });
125
362
  });
@@ -14,12 +14,33 @@ export function parseCommentLimit(raw, fallback = 20) {
14
14
  return fallback;
15
15
  return Math.max(1, Math.min(Math.floor(n), 50));
16
16
  }
17
+
18
+ export function parseXhsLikeCountText(value) {
19
+ const integerRe = /^(?:\d+|\d{1,3}(?:[,,]\d{3})+)\+?$/u;
20
+ const shortformRe = /^((?:\d+|\d{1,3}(?:[,,]\d{3})+)(?:\.\d+)?)([wWkK万千])\+?$/u;
21
+ const raw = String(value ?? '').replace(/\s+/g, '');
22
+ if (!raw)
23
+ return 0;
24
+ if (integerRe.test(raw))
25
+ return Number(raw.replace(/[,+,]/g, ''));
26
+ const short = raw.match(shortformRe);
27
+ if (!short)
28
+ return 0;
29
+ const numeric = Number(short[1].replace(/[,,]/g, ''));
30
+ if (!Number.isFinite(numeric))
31
+ return 0;
32
+ const unit = short[2].toLowerCase();
33
+ const multiplier = unit === 'w' || unit === '万' ? 10000 : 1000;
34
+ return Math.round(numeric * multiplier);
35
+ }
36
+
17
37
  /**
18
38
  * Host-agnostic IIFE that scrolls a note's comment list and extracts
19
39
  * top-level comments (and optionally nested 楼中楼 replies). Exported so
20
40
  * the rednote adapter can reuse the exact same selector chain.
21
41
  */
22
42
  export function buildCommentsExtractJs(withReplies) {
43
+ const parseLikeCountText = parseXhsLikeCountText.toString();
23
44
  return `
24
45
  (async () => {
25
46
  const wait = (ms) => new Promise(r => setTimeout(r, ms))
@@ -44,9 +65,9 @@ export function buildCommentsExtractJs(withReplies) {
44
65
  }
45
66
 
46
67
  const clean = (el) => (el?.textContent || '').replace(/\\s+/g, ' ').trim()
68
+ const parseLikeCountText = ${parseLikeCountText}
47
69
  const parseLikes = (el) => {
48
- const raw = clean(el)
49
- return /^\\d+$/.test(raw) ? Number(raw) : 0
70
+ return parseLikeCountText(clean(el))
50
71
  }
51
72
  const expandReplyThreads = async (root) => {
52
73
  if (!withReplies || !root) return
@@ -1,6 +1,7 @@
1
1
  import { describe, expect, it, vi } from 'vitest';
2
+ import { JSDOM } from 'jsdom';
2
3
  import { getRegistry } from '@jackwener/opencli/registry';
3
- import './comments.js';
4
+ import { buildCommentsExtractJs, parseXhsLikeCountText } from './comments.js';
4
5
  function createPageMock(evaluateResult) {
5
6
  return {
6
7
  goto: vi.fn().mockResolvedValue(undefined),
@@ -25,6 +26,41 @@ function createPageMock(evaluateResult) {
25
26
  waitForCapture: vi.fn().mockResolvedValue(undefined),
26
27
  };
27
28
  }
29
+
30
+ async function runCommentsExtract(html) {
31
+ const dom = new JSDOM(html, { url: 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok' });
32
+ const previousDocument = globalThis.document;
33
+ const previousLocation = globalThis.location;
34
+ globalThis.document = dom.window.document;
35
+ globalThis.location = dom.window.location;
36
+ try {
37
+ return await eval(buildCommentsExtractJs(false));
38
+ } finally {
39
+ globalThis.document = previousDocument;
40
+ globalThis.location = previousLocation;
41
+ }
42
+ }
43
+
44
+ describe('parseXhsLikeCountText', () => {
45
+ it('parses exact integer and shortform like counts', () => {
46
+ expect(parseXhsLikeCountText('0')).toBe(0);
47
+ expect(parseXhsLikeCountText('42')).toBe(42);
48
+ expect(parseXhsLikeCountText('1,234')).toBe(1234);
49
+ expect(parseXhsLikeCountText('1,234+')).toBe(1234);
50
+ expect(parseXhsLikeCountText('2.1w')).toBe(21000);
51
+ expect(parseXhsLikeCountText('1.5万')).toBe(15000);
52
+ expect(parseXhsLikeCountText('1.2k')).toBe(1200);
53
+ expect(parseXhsLikeCountText('3千')).toBe(3000);
54
+ expect(parseXhsLikeCountText(' 2.1 w + ')).toBe(21000);
55
+ });
56
+
57
+ it('returns 0 for unknown shapes without overparsing arbitrary text', () => {
58
+ for (const raw of ['', null, undefined, '赞', 'likes 2.1w', '2w人', '1,23', '1.2.3k', '.', '1.5']) {
59
+ expect(parseXhsLikeCountText(raw)).toBe(0);
60
+ }
61
+ });
62
+ });
63
+
28
64
  describe('xiaohongshu comments', () => {
29
65
  const command = getRegistry().get('xiaohongshu/comments');
30
66
  it('returns ranked comment rows for signed full URLs', async () => {
@@ -120,6 +156,32 @@ describe('xiaohongshu comments', () => {
120
156
  expect(script).toContain("const afterCount = scroller.querySelectorAll('.parent-comment').length");
121
157
  expect(script).toContain('if (afterCount <= beforeCount) break');
122
158
  });
159
+ it('extracts shortform like counts from the shared xiaohongshu/rednote DOM script', async () => {
160
+ const data = await runCommentsExtract(`
161
+ <main>
162
+ <section class="parent-comment">
163
+ <div class="comment-item">
164
+ <div class="author-wrapper"><span class="name">Alice</span></div>
165
+ <div class="content">Great note</div>
166
+ <span class="count">2.1w</span>
167
+ <span class="date">today</span>
168
+ </div>
169
+ </section>
170
+ <section class="parent-comment">
171
+ <div class="comment-item">
172
+ <span class="user-name">Bob</span>
173
+ <div class="note-text">Malformed count</div>
174
+ <span class="count">likes 2.1w</span>
175
+ </div>
176
+ </section>
177
+ </main>
178
+ `);
179
+
180
+ expect(data.results).toEqual([
181
+ { author: 'Alice', text: 'Great note', likes: 21000, time: 'today', is_reply: false, reply_to: '' },
182
+ { author: 'Bob', text: 'Malformed count', likes: 0, time: '', is_reply: false, reply_to: '' },
183
+ ]);
184
+ });
123
185
  it('respects the limit for top-level comments', async () => {
124
186
  const manyComments = Array.from({ length: 10 }, (_, i) => ({
125
187
  author: `User${i}`,
@@ -6,16 +6,24 @@
6
6
  * Ref: https://github.com/jackwener/opencli/issues/10
7
7
  */
8
8
  import { cli, Strategy } from '@jackwener/opencli/registry';
9
- import { AuthRequiredError } from '@jackwener/opencli/errors';
9
+ import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
10
10
  /**
11
11
  * Wait for search results or login wall using MutationObserver (max 5s).
12
12
  * Returns 'content' if note items appeared, 'login_wall' if login gate
13
13
  * detected, or 'timeout' if neither appeared within the deadline.
14
+ *
15
+ * Note-item detection tries the legacy `section.note-item` class first
16
+ * (still observed in many sessions, including rednote) and falls back to
17
+ * a `<section>` element containing a `/search_result/` or `/explore/`
18
+ * link. Issue #1506 reports the class being dropped on some xhs renders.
14
19
  */
15
20
  const WAIT_FOR_CONTENT_JS = `
16
21
  new Promise((resolve) => {
22
+ const findNoteCard = () => document.querySelector(
23
+ 'section.note-item, section:has(a[href*="/search_result/"]), section:has(a[href*="/explore/"])'
24
+ );
17
25
  const detect = () => {
18
- if (document.querySelector('section.note-item')) return 'content';
26
+ if (findNoteCard()) return 'content';
19
27
  if (/登录后查看搜索结果/.test(document.body?.innerText || '')) return 'login_wall';
20
28
  return null;
21
29
  };
@@ -52,6 +60,100 @@ export function stripXhsAuthorDateSuffix(value) {
52
60
  const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim();
53
61
  return stripped || text;
54
62
  }
63
+ export function parseLimit(raw) {
64
+ const parsed = Number(raw ?? 20);
65
+ if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
66
+ throw new ArgumentError(`--limit must be an integer between 1 and 100, got ${JSON.stringify(raw)}`);
67
+ }
68
+ if (parsed < 1 || parsed > 100) {
69
+ throw new ArgumentError(`--limit must be between 1 and 100, got ${parsed}`);
70
+ }
71
+ return parsed;
72
+ }
73
+ /**
74
+ * Build a "scroll until enough or plateaued" IIFE used in place of a fixed
75
+ * `autoScroll({ times: N })`. Xiaohongshu's search results page lazy-loads
76
+ * ~5-7 notes per scroll, so the previous `times: 2` capped extraction at
77
+ * ~13 items regardless of `--limit` (see #1471). This helper drives scrolls
78
+ * dynamically:
79
+ *
80
+ * - count visible `section.note-item` rows (excluding related-search
81
+ * `.query-note-item` rows)
82
+ * - if count >= targetCount → break (got enough)
83
+ * - if two consecutive scrolls add no new rows → break (DOM plateaued,
84
+ * no more lazy-load available)
85
+ * - hard cap at `maxScrolls` iterations (default 15) to bound runtime
86
+ *
87
+ * Exported so the rednote adapter (same DOM shape) can reuse it.
88
+ */
89
+ export function buildScrollUntilJs(targetCount, maxScrolls = 15) {
90
+ if (!Number.isSafeInteger(targetCount) || targetCount < 1) {
91
+ throw new ArgumentError(`targetCount must be a positive integer, got ${JSON.stringify(targetCount)}`);
92
+ }
93
+ if (!Number.isSafeInteger(maxScrolls) || maxScrolls < 1) {
94
+ throw new ArgumentError(`maxScrolls must be a positive integer, got ${JSON.stringify(maxScrolls)}`);
95
+ }
96
+ return `
97
+ (async () => {
98
+ const isVisibleNote = (el) => {
99
+ if (el.classList.contains('query-note-item')) return false;
100
+ const rect = el.getBoundingClientRect();
101
+ if (rect.width <= 0 || rect.height <= 0) return false;
102
+ const style = getComputedStyle(el);
103
+ return style.display !== 'none' && style.visibility !== 'hidden';
104
+ };
105
+ // Note containers: legacy \`section.note-item\` first, fallback to
106
+ // any \`<section>\` that wraps a search-result/explore note link
107
+ // (#1506 reports the class being dropped on some xhs renders).
108
+ const collectNoteCards = () => {
109
+ const classMatches = document.querySelectorAll('section.note-item');
110
+ if (classMatches.length > 0) return classMatches;
111
+ const sections = new Set();
112
+ for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
113
+ const section = a.closest('section');
114
+ if (section) sections.add(section);
115
+ }
116
+ return sections;
117
+ };
118
+ const countItems = () => {
119
+ let count = 0;
120
+ for (const el of collectNoteCards()) {
121
+ if (isVisibleNote(el)) count++;
122
+ }
123
+ return count;
124
+ };
125
+
126
+ let lastCount = countItems();
127
+ let plateauRounds = 0;
128
+ for (let i = 0; i < ${maxScrolls}; i++) {
129
+ if (countItems() >= ${targetCount}) break;
130
+ const lastHeight = document.body.scrollHeight;
131
+ window.scrollTo(0, lastHeight);
132
+ await new Promise((resolve) => {
133
+ let to;
134
+ const ob = new MutationObserver(() => {
135
+ if (document.body.scrollHeight > lastHeight) {
136
+ clearTimeout(to);
137
+ ob.disconnect();
138
+ setTimeout(resolve, 200);
139
+ }
140
+ });
141
+ ob.observe(document.body, { childList: true, subtree: true });
142
+ to = setTimeout(() => { ob.disconnect(); resolve(null); }, 2500);
143
+ });
144
+ const newCount = countItems();
145
+ if (newCount === lastCount) {
146
+ plateauRounds++;
147
+ if (plateauRounds >= 2) break;
148
+ } else {
149
+ plateauRounds = 0;
150
+ lastCount = newCount;
151
+ }
152
+ }
153
+ return countItems();
154
+ })()
155
+ `;
156
+ }
55
157
  /**
56
158
  * Build the search-result extraction IIFE. The web host is baked into the
57
159
  * `normalizeUrl` fallback so relative `/explore/...` hrefs resolve to a full
@@ -70,13 +172,34 @@ export function buildSearchExtractJs(webHost) {
70
172
 
71
173
  const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
72
174
  const stripXhsAuthorDateSuffix = ${stripXhsAuthorDateSuffix.toString()};
175
+ const isVisibleNote = (el) => {
176
+ const rect = el.getBoundingClientRect();
177
+ if (rect.width <= 0 || rect.height <= 0) return false;
178
+ const style = getComputedStyle(el);
179
+ return style.display !== 'none' && style.visibility !== 'hidden';
180
+ };
73
181
 
74
182
  const results = [];
75
183
  const seen = new Set();
76
184
 
77
- document.querySelectorAll('section.note-item').forEach(el => {
185
+ // Note containers: legacy \`section.note-item\` first, fallback to any
186
+ // \`<section>\` wrapping a search-result/explore link (#1506 reports the
187
+ // class being dropped on some xhs renders).
188
+ const collectNoteCards = () => {
189
+ const classMatches = document.querySelectorAll('section.note-item');
190
+ if (classMatches.length > 0) return classMatches;
191
+ const sections = new Set();
192
+ for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
193
+ const section = a.closest('section');
194
+ if (section) sections.add(section);
195
+ }
196
+ return sections;
197
+ };
198
+
199
+ for (const el of collectNoteCards()) {
78
200
  // Skip "related searches" sections
79
- if (el.classList.contains('query-note-item')) return;
201
+ if (el.classList?.contains('query-note-item')) continue;
202
+ if (!isVisibleNote(el)) continue;
80
203
 
81
204
  const titleEl = el.querySelector('.title, .note-title, a.title, .footer .title span');
82
205
  const nameEl = el.querySelector('a.author .name, .author-name, .nick-name, .name');
@@ -96,20 +219,29 @@ export function buildSearchExtractJs(webHost) {
96
219
  const authorLinkEl = el.querySelector('a.author, a[href*="/user/profile/"]');
97
220
 
98
221
  const url = normalizeUrl(detailLinkEl?.getAttribute('href') || '');
99
- if (!url) return;
222
+ if (!url) continue;
100
223
 
101
224
  const key = url;
102
- if (seen.has(key)) return;
225
+ if (seen.has(key)) continue;
103
226
  seen.add(key);
104
227
 
228
+ // Fallback title: the new bare-section render keeps the note caption
229
+ // inside the search_result anchor's first span, not in a class-named
230
+ // .title element. Pull from there when the class-based pick is empty.
231
+ let title = cleanText(titleEl?.textContent || '');
232
+ if (!title) {
233
+ const captionSpan = detailLinkEl?.querySelector('span');
234
+ title = cleanText(captionSpan?.textContent || '');
235
+ }
236
+
105
237
  results.push({
106
- title: cleanText(titleEl?.textContent || ''),
238
+ title,
107
239
  author,
108
240
  likes: cleanText(likesEl?.textContent || '0'),
109
241
  url,
110
242
  author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''),
111
243
  });
112
- });
244
+ }
113
245
 
114
246
  return results;
115
247
  })()
@@ -129,6 +261,7 @@ export const command = cli({
129
261
  ],
130
262
  columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'],
131
263
  func: async (page, kwargs) => {
264
+ const limit = parseLimit(kwargs.limit);
132
265
  const keyword = encodeURIComponent(kwargs.query);
133
266
  await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
134
267
  // Wait for search results to render (or login wall to appear).
@@ -138,13 +271,35 @@ export const command = cli({
138
271
  if (waitResult === 'login_wall') {
139
272
  throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
140
273
  }
141
- // Scroll a couple of times to load more results
142
- await page.autoScroll({ times: 2 });
143
- const payload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
144
- const data = Array.isArray(payload) ? payload : [];
274
+ // Extract before scrolling. Xiaohongshu uses a virtualized masonry
275
+ // layout, so scrolling to the bottom can evict the initially visible
276
+ // note cards from the DOM and make extraction return [] even though the
277
+ // browser rendered results correctly.
278
+ const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
279
+ let payload = Array.isArray(initialPayload) ? initialPayload : [];
280
+ if (payload.length < limit) {
281
+ // Scroll until enough rows are rendered or the lazy-load plateaus.
282
+ // Replaces the previous fixed `autoScroll({ times: 2 })` which capped
283
+ // extraction at ~13 notes regardless of `--limit` (#1471).
284
+ await page.evaluate(buildScrollUntilJs(limit));
285
+ const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
286
+ if (Array.isArray(scrolledPayload)) {
287
+ const seen = new Set(payload.map((item) => item.url).filter(Boolean));
288
+ for (const item of scrolledPayload) {
289
+ if (item?.url && seen.has(item.url))
290
+ continue;
291
+ if (item?.url)
292
+ seen.add(item.url);
293
+ payload.push(item);
294
+ if (payload.length >= limit)
295
+ break;
296
+ }
297
+ }
298
+ }
299
+ const data = payload;
145
300
  return data
146
301
  .filter((item) => item.title)
147
- .slice(0, kwargs.limit)
302
+ .slice(0, limit)
148
303
  .map((item, i) => ({
149
304
  rank: i + 1,
150
305
  ...item,