@jackwener/opencli 1.7.20 → 1.7.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. package/cli-manifest.json +233 -72
  2. package/clis/_shared/search-adapter.js +70 -0
  3. package/clis/boss/chatlist.js +96 -14
  4. package/clis/boss/chatlist.test.js +211 -0
  5. package/clis/boss/chatmsg.js +98 -24
  6. package/clis/boss/chatmsg.test.js +230 -0
  7. package/clis/boss/utils.js +240 -11
  8. package/clis/brave/search.js +80 -0
  9. package/clis/brave/search.test.js +76 -0
  10. package/clis/duckduckgo/search.js +131 -0
  11. package/clis/duckduckgo/search.test.js +128 -0
  12. package/clis/duckduckgo/suggest.js +45 -0
  13. package/clis/duckduckgo/suggest.test.js +66 -0
  14. package/clis/facebook/feed.js +301 -56
  15. package/clis/facebook/feed.test.js +169 -0
  16. package/clis/reddit/comment.js +0 -1
  17. package/clis/reddit/frontpage.js +0 -1
  18. package/clis/reddit/home.js +0 -1
  19. package/clis/reddit/popular.js +0 -1
  20. package/clis/reddit/read.js +0 -1
  21. package/clis/reddit/read.test.js +2 -2
  22. package/clis/reddit/save.js +0 -1
  23. package/clis/reddit/saved.js +0 -1
  24. package/clis/reddit/search.js +0 -1
  25. package/clis/reddit/subreddit-info.js +0 -1
  26. package/clis/reddit/subreddit.js +0 -1
  27. package/clis/reddit/subscribe.js +0 -1
  28. package/clis/reddit/upvote.js +0 -1
  29. package/clis/reddit/upvoted.js +0 -1
  30. package/clis/reddit/user-comments.js +0 -1
  31. package/clis/reddit/user-posts.js +0 -1
  32. package/clis/reddit/user.js +0 -1
  33. package/clis/reddit/whoami.js +0 -1
  34. package/clis/rednote/rednote.test.js +65 -0
  35. package/clis/rednote/search.js +11 -5
  36. package/clis/twitter/article.js +0 -1
  37. package/clis/twitter/bookmark-folder.js +0 -1
  38. package/clis/twitter/bookmark-folders.js +0 -1
  39. package/clis/twitter/bookmarks.js +0 -1
  40. package/clis/twitter/download.js +0 -1
  41. package/clis/twitter/followers.js +0 -1
  42. package/clis/twitter/following.js +0 -1
  43. package/clis/twitter/likes.js +0 -1
  44. package/clis/twitter/list-tweets.js +0 -1
  45. package/clis/twitter/lists.js +0 -1
  46. package/clis/twitter/notifications.js +0 -1
  47. package/clis/twitter/profile.js +0 -1
  48. package/clis/twitter/search.js +0 -1
  49. package/clis/twitter/thread.js +0 -1
  50. package/clis/twitter/timeline.js +0 -1
  51. package/clis/twitter/trending.js +0 -1
  52. package/clis/twitter/tweets.js +0 -1
  53. package/clis/xiaohongshu/search.js +34 -16
  54. package/clis/xiaohongshu/search.test.js +66 -11
  55. package/clis/yahoo/search.js +92 -0
  56. package/clis/yahoo/search.test.js +94 -0
  57. package/package.json +1 -1
@@ -7,7 +7,6 @@ cli({
7
7
  domain: 'reddit.com',
8
8
  strategy: Strategy.COOKIE,
9
9
  browser: true,
10
- siteSession: 'persistent',
11
10
  args: [
12
11
  { name: 'username', type: 'string', required: true, positional: true, help: 'Reddit username (no `u/` prefix needed)' },
13
12
  { name: 'limit', type: 'int', default: 15 },
@@ -7,7 +7,6 @@ cli({
7
7
  domain: 'reddit.com',
8
8
  strategy: Strategy.COOKIE,
9
9
  browser: true,
10
- siteSession: 'persistent',
11
10
  args: [
12
11
  { name: 'username', type: 'string', required: true, positional: true, help: 'Reddit username (no `u/` prefix needed)' },
13
12
  { name: 'limit', type: 'int', default: 15 },
@@ -7,7 +7,6 @@ cli({
7
7
  domain: 'reddit.com',
8
8
  strategy: Strategy.COOKIE,
9
9
  browser: true,
10
- siteSession: 'persistent',
11
10
  args: [
12
11
  { name: 'username', type: 'string', required: true, positional: true, help: 'Reddit username (no `u/` prefix needed)' },
13
12
  ],
@@ -9,7 +9,6 @@ cli({
9
9
  domain: 'reddit.com',
10
10
  strategy: Strategy.COOKIE,
11
11
  browser: true,
12
- siteSession: 'persistent',
13
12
  args: [],
14
13
  columns: ['field', 'value'],
15
14
  func: async (page) => {
@@ -31,6 +31,14 @@ function createPageMock(evaluateResult) {
31
31
  getCookies: vi.fn().mockResolvedValue([{ name: 'sid', value: 'secret', domain: 'www.rednote.com' }]),
32
32
  };
33
33
  }
34
+ function createSearchPageMock(evaluateResults) {
35
+ const page = createPageMock(undefined);
36
+ page.evaluate = vi.fn();
37
+ for (const result of evaluateResults) {
38
+ page.evaluate.mockResolvedValueOnce(result);
39
+ }
40
+ return page;
41
+ }
34
42
 
35
43
  describe('rednote note URL identity', () => {
36
44
  const download = getRegistry().get('rednote/download');
@@ -130,6 +138,63 @@ describe('rednote argument validation', () => {
130
138
  });
131
139
  });
132
140
 
141
+ describe('rednote search browser-bridge envelopes', () => {
142
+ const search = getRegistry().get('rednote/search');
143
+
144
+ it('unwraps login-wall wait result envelopes before auth handling', async () => {
145
+ const page = createSearchPageMock([
146
+ { session: 'site:rednote', data: 'login_wall' },
147
+ ]);
148
+
149
+ await expect(search.func(page, { query: 'tesla', limit: 5 })).rejects.toMatchObject({
150
+ code: 'AUTH_REQUIRED',
151
+ message: expect.stringContaining('blocked behind a login wall'),
152
+ });
153
+ expect(page.evaluate).toHaveBeenCalledTimes(1);
154
+ });
155
+
156
+ it('unwraps search extraction envelopes and preserves rednote row shape', async () => {
157
+ const url = 'https://www.rednote.com/search_result/68e90be80000000004022e66?xsec_token=test-token';
158
+ const page = createSearchPageMock([
159
+ 'content',
160
+ 1,
161
+ {
162
+ session: 'site:rednote',
163
+ data: [{
164
+ title: 'rednote result',
165
+ author: 'author',
166
+ likes: '12',
167
+ url,
168
+ author_url: 'https://www.rednote.com/user/profile/u1',
169
+ }],
170
+ },
171
+ ]);
172
+
173
+ await expect(search.func(page, { query: 'tesla', limit: 1 })).resolves.toEqual([{
174
+ rank: 1,
175
+ title: 'rednote result',
176
+ author: 'author',
177
+ likes: '12',
178
+ published_at: '2025-10-10',
179
+ url,
180
+ author_url: 'https://www.rednote.com/user/profile/u1',
181
+ }]);
182
+ });
183
+
184
+ it('fails typed instead of silently returning [] for malformed extraction payloads', async () => {
185
+ const page = createSearchPageMock([
186
+ 'content',
187
+ 1,
188
+ { session: 'site:rednote', data: { rows: [] } },
189
+ ]);
190
+
191
+ await expect(search.func(page, { query: 'tesla', limit: 1 })).rejects.toMatchObject({
192
+ code: 'COMMAND_EXEC',
193
+ message: expect.stringContaining('payload shape'),
194
+ });
195
+ });
196
+ });
197
+
133
198
  describe('rednote Pinia store failures', () => {
134
199
  it('maps feed store read failure to CommandExecutionError', async () => {
135
200
  const command = getRegistry().get('rednote/feed');
@@ -6,8 +6,8 @@
6
6
  * 1:1 comparison between the two frontends.
7
7
  */
8
8
  import { cli, Strategy } from '@jackwener/opencli/registry';
9
- import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
10
- import { buildScrollUntilJs, buildSearchExtractJs, noteIdToDate } from '../xiaohongshu/search.js';
9
+ import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
10
+ import { buildScrollUntilJs, buildSearchExtractJs, noteIdToDate, unwrapEvaluateResult } from '../xiaohongshu/search.js';
11
11
 
12
12
  function parseLimit(raw) {
13
13
  const parsed = Number(raw);
@@ -19,6 +19,13 @@ function parseLimit(raw) {
19
19
  }
20
20
  return parsed;
21
21
  }
22
+ function requireSearchRows(payload) {
23
+ const rows = unwrapEvaluateResult(payload);
24
+ if (!Array.isArray(rows)) {
25
+ throw new CommandExecutionError('Unexpected Rednote search extraction payload shape; expected an array of rows.');
26
+ }
27
+ return rows;
28
+ }
22
29
 
23
30
  /**
24
31
  * Wait for search results or login wall using MutationObserver (max 5s).
@@ -78,7 +85,7 @@ cli({
78
85
  const limit = parseLimit(kwargs.limit ?? 20);
79
86
  const keyword = encodeURIComponent(kwargs.query);
80
87
  await page.goto(`https://www.rednote.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
81
- const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS);
88
+ const waitResult = unwrapEvaluateResult(await page.evaluate(WAIT_FOR_CONTENT_JS));
82
89
  if (waitResult === 'login_wall') {
83
90
  throw new AuthRequiredError('www.rednote.com', 'Rednote search results are blocked behind a login wall');
84
91
  }
@@ -87,8 +94,7 @@ cli({
87
94
  // `autoScroll({ times: 2 })` capped extraction at ~13 notes regardless
88
95
  // of `--limit`.
89
96
  await page.evaluate(buildScrollUntilJs(limit));
90
- const payload = await page.evaluate(buildSearchExtractJs('www.rednote.com'));
91
- const data = Array.isArray(payload) ? payload : [];
97
+ const data = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.rednote.com')));
92
98
  return data
93
99
  .filter((item) => item.title)
94
100
  .slice(0, limit)
@@ -11,7 +11,6 @@ cli({
11
11
  domain: 'x.com',
12
12
  strategy: Strategy.COOKIE,
13
13
  browser: true,
14
- siteSession: 'persistent',
15
14
  args: [
16
15
  { name: 'tweet-id', type: 'string', positional: true, required: true, help: 'Tweet ID or URL containing the article' },
17
16
  ],
@@ -124,7 +124,6 @@ cli({
124
124
  domain: 'x.com',
125
125
  strategy: Strategy.COOKIE,
126
126
  browser: true,
127
- siteSession: 'persistent',
128
127
  args: [
129
128
  { name: 'folder-id', positional: true, type: 'string', required: true, help: 'Folder id from `opencli twitter bookmark-folders`.' },
130
129
  { name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' },
@@ -77,7 +77,6 @@ cli({
77
77
  domain: 'x.com',
78
78
  strategy: Strategy.COOKIE,
79
79
  browser: true,
80
- siteSession: 'persistent',
81
80
  args: [],
82
81
  columns: ['id', 'name', 'items', 'created_at'],
83
82
  func: async (page) => {
@@ -108,7 +108,6 @@ cli({
108
108
  domain: 'x.com',
109
109
  strategy: Strategy.COOKIE,
110
110
  browser: true,
111
- siteSession: 'persistent',
112
111
  args: [
113
112
  { name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' },
114
113
  { name: 'top-by-engagement', type: 'int', default: 0, help: 'When set to N>0, re-rank the bookmarks by weighted engagement (likes×1 + retweets×3 + replies×2 + bookmarks×5 + log10(views+1)×0.5) and return the top N. Default 0 keeps the API\'s native (saved-time) ordering.' },
@@ -15,7 +15,6 @@ cli({
15
15
  description: 'Download Twitter/X media (images and videos). Provide either <username> to scan a profile\'s media tab, or --tweet-url to download a single tweet.',
16
16
  domain: 'x.com',
17
17
  strategy: Strategy.COOKIE,
18
- siteSession: 'persistent',
19
18
  args: [
20
19
  { name: 'username', positional: true, help: 'Twitter username (with or without @) to scan their /media tab. Either <username> or --tweet-url is required.' },
21
20
  { name: 'tweet-url', help: 'Single tweet URL to download. Use this OR <username>, not both required at once.' },
@@ -84,7 +84,6 @@ cli({
84
84
  domain: 'x.com',
85
85
  strategy: Strategy.UI,
86
86
  browser: true,
87
- siteSession: 'persistent',
88
87
  args: [
89
88
  {
90
89
  name: 'user',
@@ -140,7 +140,6 @@ cli({
140
140
  domain: 'x.com',
141
141
  strategy: Strategy.COOKIE,
142
142
  browser: true,
143
- siteSession: 'persistent',
144
143
  args: [
145
144
  {
146
145
  name: 'user',
@@ -143,7 +143,6 @@ cli({
143
143
  domain: 'x.com',
144
144
  strategy: Strategy.COOKIE,
145
145
  browser: true,
146
- siteSession: 'persistent',
147
146
  args: [
148
147
  { name: 'username', type: 'string', positional: true, help: 'Twitter screen name (with or without @). Defaults to the logged-in user when omitted.' },
149
148
  { name: 'limit', type: 'int', default: 20, help: 'Maximum number of liked tweets to return (default 20).' },
@@ -115,7 +115,6 @@ cli({
115
115
  domain: 'x.com',
116
116
  strategy: Strategy.COOKIE,
117
117
  browser: true,
118
- siteSession: 'persistent',
119
118
  args: [
120
119
  { name: 'listId', positional: true, type: 'string', required: true, help: 'Numeric ID of a Twitter/X list (e.g. from `opencli twitter lists`)' },
121
120
  { name: 'limit', type: 'int', default: 50 },
@@ -92,7 +92,6 @@ export const command = cli({
92
92
  domain: 'x.com',
93
93
  strategy: Strategy.COOKIE,
94
94
  browser: true,
95
- siteSession: 'persistent',
96
95
  args: [
97
96
  { name: 'limit', type: 'int', default: 50, help: 'Maximum number of lists to return (default 50).' },
98
97
  ],
@@ -8,7 +8,6 @@ cli({
8
8
  domain: 'x.com',
9
9
  strategy: Strategy.INTERCEPT,
10
10
  browser: true,
11
- siteSession: 'persistent',
12
11
  args: [
13
12
  { name: 'limit', type: 'int', default: 20, help: 'Maximum number of notifications to return (default 20).' },
14
13
  ],
@@ -11,7 +11,6 @@ cli({
11
11
  domain: 'x.com',
12
12
  strategy: Strategy.COOKIE,
13
13
  browser: true,
14
- siteSession: 'persistent',
15
14
  args: [
16
15
  { name: 'username', type: 'string', positional: true, help: 'Twitter screen name (with or without @). Defaults to the logged-in user when omitted.' },
17
16
  ],
@@ -261,7 +261,6 @@ cli({
261
261
  domain: 'x.com',
262
262
  strategy: Strategy.COOKIE,
263
263
  browser: true,
264
- siteSession: 'persistent',
265
264
  args: [
266
265
  { name: 'query', type: 'string', required: true, positional: true, help: 'Search query. Raw X operators (e.g. "exact phrase", #tag, OR, lang:en, since:YYYY-MM-DD, from:, since:) are passed through unchanged.' },
267
266
  { name: 'filter', type: 'string', default: 'top', choices: ['top', 'live'], help: 'Legacy alias for --product. Kept for backwards compatibility; if --product is set it wins.' },
@@ -100,7 +100,6 @@ cli({
100
100
  domain: 'x.com',
101
101
  strategy: Strategy.COOKIE,
102
102
  browser: true,
103
- siteSession: 'persistent',
104
103
  args: [
105
104
  { name: 'tweet-id', positional: true, type: 'string', required: true, help: 'Tweet numeric ID (e.g. 1234567890) or full status URL' },
106
105
  { name: 'limit', type: 'int', default: 50 },
@@ -142,7 +142,6 @@ cli({
142
142
  domain: 'x.com',
143
143
  strategy: Strategy.COOKIE,
144
144
  browser: true,
145
- siteSession: 'persistent',
146
145
  args: [
147
146
  {
148
147
  name: 'type',
@@ -17,7 +17,6 @@ cli({
17
17
  domain: 'x.com',
18
18
  strategy: Strategy.COOKIE,
19
19
  browser: true,
20
- siteSession: 'persistent',
21
20
  args: [
22
21
  { name: 'limit', type: 'int', default: 20, help: 'Number of trends to show' },
23
22
  ],
@@ -221,7 +221,6 @@ cli({
221
221
  domain: 'x.com',
222
222
  strategy: Strategy.COOKIE,
223
223
  browser: true,
224
- siteSession: 'persistent',
225
224
  args: [
226
225
  { name: 'username', type: 'string', positional: true, help: 'Twitter screen name (with or without @). Defaults to the logged-in user when omitted.' },
227
226
  { name: 'limit', type: 'int', default: 20, help: 'Max tweets to return' },
@@ -6,7 +6,7 @@
6
6
  * Ref: https://github.com/jackwener/opencli/issues/10
7
7
  */
8
8
  import { cli, Strategy } from '@jackwener/opencli/registry';
9
- import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
9
+ import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
10
10
  /**
11
11
  * Wait for search results or login wall using MutationObserver (max 5s).
12
12
  * Returns 'content' if note items appeared, 'login_wall' if login gate
@@ -60,6 +60,26 @@ export function stripXhsAuthorDateSuffix(value) {
60
60
  const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim();
61
61
  return stripped || text;
62
62
  }
63
+ /**
64
+ * `page.evaluate` may return either the raw IIFE value or a
65
+ * `{ session, data }` envelope depending on the browser-bridge version.
66
+ * Adapter code that called `Array.isArray(payload)` directly on the
67
+ * envelope silently received [] for every search. This helper normalizes
68
+ * both shapes so callers can keep their Array.isArray checks unchanged.
69
+ */
70
+ export function unwrapEvaluateResult(payload) {
71
+ if (payload && !Array.isArray(payload) && typeof payload === 'object' && 'session' in payload && 'data' in payload) {
72
+ return payload.data;
73
+ }
74
+ return payload;
75
+ }
76
+ function requireSearchRows(payload, phase) {
77
+ const rows = unwrapEvaluateResult(payload);
78
+ if (!Array.isArray(rows)) {
79
+ throw new CommandExecutionError(`Unexpected Xiaohongshu search ${phase} payload shape; expected an array of rows.`);
80
+ }
81
+ return rows;
82
+ }
63
83
  export function parseLimit(raw) {
64
84
  const parsed = Number(raw ?? 20);
65
85
  if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
@@ -267,7 +287,7 @@ export const command = cli({
267
287
  // Wait for search results to render (or login wall to appear).
268
288
  // Uses MutationObserver to resolve as soon as content appears,
269
289
  // instead of a fixed delay + blind retry.
270
- const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS);
290
+ const waitResult = unwrapEvaluateResult(await page.evaluate(WAIT_FOR_CONTENT_JS));
271
291
  if (waitResult === 'login_wall') {
272
292
  throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
273
293
  }
@@ -275,25 +295,23 @@ export const command = cli({
275
295
  // layout, so scrolling to the bottom can evict the initially visible
276
296
  // note cards from the DOM and make extraction return [] even though the
277
297
  // browser rendered results correctly.
278
- const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
279
- let payload = Array.isArray(initialPayload) ? initialPayload : [];
298
+ const initialPayload = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')), 'initial extraction');
299
+ const payload = [...initialPayload];
280
300
  if (payload.length < limit) {
281
301
  // Scroll until enough rows are rendered or the lazy-load plateaus.
282
302
  // Replaces the previous fixed `autoScroll({ times: 2 })` which capped
283
303
  // extraction at ~13 notes regardless of `--limit` (#1471).
284
304
  await page.evaluate(buildScrollUntilJs(limit));
285
- const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
286
- if (Array.isArray(scrolledPayload)) {
287
- const seen = new Set(payload.map((item) => item.url).filter(Boolean));
288
- for (const item of scrolledPayload) {
289
- if (item?.url && seen.has(item.url))
290
- continue;
291
- if (item?.url)
292
- seen.add(item.url);
293
- payload.push(item);
294
- if (payload.length >= limit)
295
- break;
296
- }
305
+ const scrolledPayload = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')), 'post-scroll extraction');
306
+ const seen = new Set(payload.map((item) => item.url).filter(Boolean));
307
+ for (const item of scrolledPayload) {
308
+ if (item?.url && seen.has(item.url))
309
+ continue;
310
+ if (item?.url)
311
+ seen.add(item.url);
312
+ payload.push(item);
313
+ if (payload.length >= limit)
314
+ break;
297
315
  }
298
316
  }
299
317
  const data = payload;
@@ -1,7 +1,7 @@
1
1
  import { describe, expect, it, vi } from 'vitest';
2
2
  import { getRegistry } from '@jackwener/opencli/registry';
3
3
  import { JSDOM } from 'jsdom';
4
- import { __test__, buildScrollUntilJs, noteIdToDate } from './search.js';
4
+ import { __test__, buildScrollUntilJs, noteIdToDate, unwrapEvaluateResult } from './search.js';
5
5
 
6
6
  function markVisible(el) {
7
7
  el.getBoundingClientRect = () => ({ width: 100, height: 100 });
@@ -57,24 +57,37 @@ describe('xiaohongshu search', () => {
57
57
  expect(page.evaluate).toHaveBeenCalledTimes(1);
58
58
  expect(page.autoScroll).not.toHaveBeenCalled();
59
59
  });
60
+ it('unwraps a browser-bridge envelope before handling login-wall wait result', async () => {
61
+ const cmd = getRegistry().get('xiaohongshu/search');
62
+ const page = createPageMock([
63
+ { session: 'site:xiaohongshu', data: 'login_wall' },
64
+ ]);
65
+
66
+ await expect(cmd.func(page, { query: '特斯拉', limit: 5 })).rejects.toMatchObject({
67
+ code: 'AUTH_REQUIRED',
68
+ message: expect.stringContaining('blocked behind a login wall'),
69
+ });
70
+ expect(page.evaluate).toHaveBeenCalledTimes(1);
71
+ });
60
72
  it('returns ranked results with search_result url and author_url preserved', async () => {
61
73
  const cmd = getRegistry().get('xiaohongshu/search');
62
74
  expect(cmd?.func).toBeTypeOf('function');
63
75
  const detailUrl = 'https://www.xiaohongshu.com/search_result/68e90be80000000004022e66?xsec_token=test-token&xsec_source=';
64
76
  const authorUrl = 'https://www.xiaohongshu.com/user/profile/635a9c720000000018028b40?xsec_token=user-token&xsec_source=pc_search';
77
+ const rows = [
78
+ {
79
+ title: '某鱼买FSD被坑了4万',
80
+ author: '随风',
81
+ likes: '261',
82
+ url: detailUrl,
83
+ author_url: authorUrl,
84
+ },
85
+ ];
65
86
  const page = createPageMock([
66
87
  // First evaluate: MutationObserver wait (content appeared)
67
88
  'content',
68
- // Second evaluate: initial DOM extraction (already enough results)
69
- [
70
- {
71
- title: '某鱼买FSD被坑了4万',
72
- author: '随风',
73
- likes: '261',
74
- url: detailUrl,
75
- author_url: authorUrl,
76
- },
77
- ],
89
+ // Second evaluate: initial DOM extraction (already enough results) through Browser Bridge envelope.
90
+ { session: 'site:xiaohongshu', data: rows },
78
91
  ]);
79
92
  const result = await cmd.func(page, { query: '特斯拉', limit: 1 });
80
93
  // Should only do one goto (the search page itself), no per-note detail navigation
@@ -91,6 +104,18 @@ describe('xiaohongshu search', () => {
91
104
  },
92
105
  ]);
93
106
  });
107
+ it('fails typed instead of silently returning [] for malformed extraction payloads', async () => {
108
+ const cmd = getRegistry().get('xiaohongshu/search');
109
+ const page = createPageMock([
110
+ 'content',
111
+ { session: 'site:xiaohongshu', data: { rows: [] } },
112
+ ]);
113
+
114
+ await expect(cmd.func(page, { query: '测试', limit: 1 })).rejects.toMatchObject({
115
+ code: 'COMMAND_EXEC',
116
+ message: expect.stringContaining('payload shape'),
117
+ });
118
+ });
94
119
  it('filters out results with no title and respects the limit', async () => {
95
120
  const cmd = getRegistry().get('xiaohongshu/search');
96
121
  expect(cmd?.func).toBeTypeOf('function');
@@ -135,6 +160,10 @@ describe('xiaohongshu search', () => {
135
160
  'content',
136
161
  // Second evaluate: initial extraction (no rows rendered)
137
162
  [],
163
+ // Third evaluate: scroll-until row count
164
+ 0,
165
+ // Fourth evaluate: post-scroll extraction (still no rows)
166
+ [],
138
167
  ]);
139
168
  const result = (await cmd.func(page, { query: '测试等待', limit: 5 }));
140
169
  expect(result).toHaveLength(0);
@@ -268,3 +297,29 @@ describe('noteIdToDate (ObjectID timestamp parsing)', () => {
268
297
  expect(noteIdToDate('https://www.xiaohongshu.com/search_result/000000000000000000000000')).toBe('');
269
298
  });
270
299
  });
300
+ describe('unwrapEvaluateResult (browser-bridge envelope normalization)', () => {
301
+ it('returns the raw array unchanged when payload is already an array', () => {
302
+ const arr = [{ title: 'a' }, { title: 'b' }];
303
+ expect(unwrapEvaluateResult(arr)).toBe(arr);
304
+ });
305
+ it('unwraps { session, data: [...] } envelope to the inner array', () => {
306
+ const arr = [{ title: 'a' }];
307
+ const env = { session: 'site:xiaohongshu:abc', data: arr };
308
+ expect(unwrapEvaluateResult(env)).toBe(arr);
309
+ });
310
+ it('unwraps primitive data from Browser Bridge envelopes', () => {
311
+ expect(unwrapEvaluateResult({ session: 'site:xiaohongshu:abc', data: 'login_wall' })).toBe('login_wall');
312
+ });
313
+ it('passes non-envelope objects through unchanged', () => {
314
+ const obj = { results: [], loginWall: true };
315
+ expect(unwrapEvaluateResult(obj)).toBe(obj);
316
+ });
317
+ it('handles null and undefined safely', () => {
318
+ expect(unwrapEvaluateResult(null)).toBe(null);
319
+ expect(unwrapEvaluateResult(undefined)).toBe(undefined);
320
+ });
321
+ it('unwraps non-array envelope data so callers can validate the payload shape', () => {
322
+ const env = { session: 'x', data: { not: 'an array' } };
323
+ expect(unwrapEvaluateResult(env)).toEqual({ not: 'an array' });
324
+ });
325
+ });
@@ -0,0 +1,92 @@
1
+ import { cli, Strategy } from '@jackwener/opencli/registry';
2
+ import {
3
+ emptySearchResults,
4
+ requireBoundedInteger,
5
+ requireRows,
6
+ requireSearchQuery,
7
+ runBrowserStep,
8
+ toHttpsUrl,
9
+ } from '../_shared/search-adapter.js';
10
+
11
+ function decodeYahooUrl(href) {
12
+ if (!href) return '';
13
+ if (href.indexOf('RU=') !== -1 && href.indexOf('/RK=') !== -1) {
14
+ var match = href.match(/RU=([^/]+)\/RK=/);
15
+ if (match && match[1]) {
16
+ try {
17
+ return toHttpsUrl(decodeURIComponent(match[1]), 'https://search.yahoo.com');
18
+ } catch {
19
+ return toHttpsUrl(href, 'https://search.yahoo.com');
20
+ }
21
+ }
22
+ }
23
+ return toHttpsUrl(href, 'https://search.yahoo.com');
24
+ }
25
+
26
+ function buildExtractorJs(limit) {
27
+ return `
28
+ (function() {
29
+ var results = [];
30
+ var seen = {};
31
+ var items = document.querySelectorAll('.algo');
32
+ for (var i = 0; i < items.length; i++) {
33
+ if (results.length >= ${limit}) break;
34
+ var el = items[i];
35
+ var h3 = el.querySelector('h3');
36
+ var linkEl = el.querySelector('.compTitle a');
37
+ var snippetEl = el.querySelector('.compText');
38
+ if (!h3 || !linkEl) continue;
39
+ var title = h3.textContent.trim();
40
+ var href = linkEl.getAttribute('href') || '';
41
+ var snippet = snippetEl ? snippetEl.textContent.trim() : '';
42
+ if (!title || !href || seen[href]) continue;
43
+ seen[href] = true;
44
+ results.push([title, href, snippet]);
45
+ }
46
+ return results;
47
+ })()`;
48
+ }
49
+
50
+ const command = cli({
51
+ site: 'yahoo',
52
+ name: 'search',
53
+ access: 'read',
54
+ description: 'Search Yahoo (powered by Bing)',
55
+ domain: 'search.yahoo.com',
56
+ strategy: Strategy.PUBLIC,
57
+ browser: true,
58
+ args: [
59
+ { name: 'keyword', positional: true, required: true, help: 'Search query' },
60
+ { name: 'limit', type: 'int', default: 7, help: 'Number of results per page (max 7)' },
61
+ { name: 'page', type: 'int', default: 1, help: 'Page number (1, 2, 3...). Yahoo returns ~7 results per page' },
62
+ ],
63
+ columns: ['rank', 'title', 'url', 'snippet'],
64
+ func: async (page, kwargs) => {
65
+ const limit = requireBoundedInteger(kwargs.limit, 7, 1, 7, '--limit');
66
+ const query = requireSearchQuery(kwargs.keyword);
67
+ const keyword = encodeURIComponent(query);
68
+ const pageNum = requireBoundedInteger(kwargs.page, 1, 1, 100, '--page');
69
+ var url = `https://search.yahoo.com/search?p=${keyword}`;
70
+ if (pageNum > 1) url += `&b=${(pageNum - 1) * 7 + 1}`;
71
+ await runBrowserStep('yahoo search navigation', () => page.goto(url));
72
+ try {
73
+ await page.wait({ selector: '.algo', timeout: 10 });
74
+ } catch {
75
+ await page.wait(3).catch(function() {});
76
+ }
77
+ const raw = await runBrowserStep('yahoo search extraction', () => page.evaluate(buildExtractorJs(limit)));
78
+ const results = requireRows(raw, 'yahoo search');
79
+ if (results.length === 0) {
80
+ throw emptySearchResults('Yahoo', query);
81
+ }
82
+ const rows = results
83
+ .map(function(r, index) {
84
+ return { rank: index + 1 + (pageNum - 1) * 7, title: r[0], url: decodeYahooUrl(r[1]), snippet: r[2] };
85
+ })
86
+ .filter((row) => row.url);
87
+ if (rows.length === 0) throw emptySearchResults('Yahoo', query);
88
+ return rows;
89
+ },
90
+ });
91
+
92
+ export const __test__ = { command };