@jackwener/opencli 1.7.20 → 1.7.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli-manifest.json +233 -72
- package/clis/_shared/search-adapter.js +70 -0
- package/clis/boss/chatlist.js +96 -14
- package/clis/boss/chatlist.test.js +211 -0
- package/clis/boss/chatmsg.js +98 -24
- package/clis/boss/chatmsg.test.js +230 -0
- package/clis/boss/utils.js +240 -11
- package/clis/brave/search.js +80 -0
- package/clis/brave/search.test.js +76 -0
- package/clis/duckduckgo/search.js +131 -0
- package/clis/duckduckgo/search.test.js +128 -0
- package/clis/duckduckgo/suggest.js +45 -0
- package/clis/duckduckgo/suggest.test.js +66 -0
- package/clis/facebook/feed.js +301 -56
- package/clis/facebook/feed.test.js +169 -0
- package/clis/reddit/comment.js +0 -1
- package/clis/reddit/frontpage.js +0 -1
- package/clis/reddit/home.js +0 -1
- package/clis/reddit/popular.js +0 -1
- package/clis/reddit/read.js +0 -1
- package/clis/reddit/read.test.js +2 -2
- package/clis/reddit/save.js +0 -1
- package/clis/reddit/saved.js +0 -1
- package/clis/reddit/search.js +0 -1
- package/clis/reddit/subreddit-info.js +0 -1
- package/clis/reddit/subreddit.js +0 -1
- package/clis/reddit/subscribe.js +0 -1
- package/clis/reddit/upvote.js +0 -1
- package/clis/reddit/upvoted.js +0 -1
- package/clis/reddit/user-comments.js +0 -1
- package/clis/reddit/user-posts.js +0 -1
- package/clis/reddit/user.js +0 -1
- package/clis/reddit/whoami.js +0 -1
- package/clis/rednote/rednote.test.js +65 -0
- package/clis/rednote/search.js +11 -5
- package/clis/twitter/article.js +0 -1
- package/clis/twitter/bookmark-folder.js +0 -1
- package/clis/twitter/bookmark-folders.js +0 -1
- package/clis/twitter/bookmarks.js +0 -1
- package/clis/twitter/download.js +0 -1
- package/clis/twitter/followers.js +0 -1
- package/clis/twitter/following.js +0 -1
- package/clis/twitter/likes.js +0 -1
- package/clis/twitter/list-tweets.js +0 -1
- package/clis/twitter/lists.js +0 -1
- package/clis/twitter/notifications.js +0 -1
- package/clis/twitter/profile.js +0 -1
- package/clis/twitter/search.js +0 -1
- package/clis/twitter/thread.js +0 -1
- package/clis/twitter/timeline.js +0 -1
- package/clis/twitter/trending.js +0 -1
- package/clis/twitter/tweets.js +0 -1
- package/clis/xiaohongshu/search.js +34 -16
- package/clis/xiaohongshu/search.test.js +66 -11
- package/clis/yahoo/search.js +92 -0
- package/clis/yahoo/search.test.js +94 -0
- package/package.json +1 -1
|
@@ -7,7 +7,6 @@ cli({
|
|
|
7
7
|
domain: 'reddit.com',
|
|
8
8
|
strategy: Strategy.COOKIE,
|
|
9
9
|
browser: true,
|
|
10
|
-
siteSession: 'persistent',
|
|
11
10
|
args: [
|
|
12
11
|
{ name: 'username', type: 'string', required: true, positional: true, help: 'Reddit username (no `u/` prefix needed)' },
|
|
13
12
|
{ name: 'limit', type: 'int', default: 15 },
|
|
@@ -7,7 +7,6 @@ cli({
|
|
|
7
7
|
domain: 'reddit.com',
|
|
8
8
|
strategy: Strategy.COOKIE,
|
|
9
9
|
browser: true,
|
|
10
|
-
siteSession: 'persistent',
|
|
11
10
|
args: [
|
|
12
11
|
{ name: 'username', type: 'string', required: true, positional: true, help: 'Reddit username (no `u/` prefix needed)' },
|
|
13
12
|
{ name: 'limit', type: 'int', default: 15 },
|
package/clis/reddit/user.js
CHANGED
package/clis/reddit/whoami.js
CHANGED
|
@@ -31,6 +31,14 @@ function createPageMock(evaluateResult) {
|
|
|
31
31
|
getCookies: vi.fn().mockResolvedValue([{ name: 'sid', value: 'secret', domain: 'www.rednote.com' }]),
|
|
32
32
|
};
|
|
33
33
|
}
|
|
34
|
+
function createSearchPageMock(evaluateResults) {
|
|
35
|
+
const page = createPageMock(undefined);
|
|
36
|
+
page.evaluate = vi.fn();
|
|
37
|
+
for (const result of evaluateResults) {
|
|
38
|
+
page.evaluate.mockResolvedValueOnce(result);
|
|
39
|
+
}
|
|
40
|
+
return page;
|
|
41
|
+
}
|
|
34
42
|
|
|
35
43
|
describe('rednote note URL identity', () => {
|
|
36
44
|
const download = getRegistry().get('rednote/download');
|
|
@@ -130,6 +138,63 @@ describe('rednote argument validation', () => {
|
|
|
130
138
|
});
|
|
131
139
|
});
|
|
132
140
|
|
|
141
|
+
describe('rednote search browser-bridge envelopes', () => {
|
|
142
|
+
const search = getRegistry().get('rednote/search');
|
|
143
|
+
|
|
144
|
+
it('unwraps login-wall wait result envelopes before auth handling', async () => {
|
|
145
|
+
const page = createSearchPageMock([
|
|
146
|
+
{ session: 'site:rednote', data: 'login_wall' },
|
|
147
|
+
]);
|
|
148
|
+
|
|
149
|
+
await expect(search.func(page, { query: 'tesla', limit: 5 })).rejects.toMatchObject({
|
|
150
|
+
code: 'AUTH_REQUIRED',
|
|
151
|
+
message: expect.stringContaining('blocked behind a login wall'),
|
|
152
|
+
});
|
|
153
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it('unwraps search extraction envelopes and preserves rednote row shape', async () => {
|
|
157
|
+
const url = 'https://www.rednote.com/search_result/68e90be80000000004022e66?xsec_token=test-token';
|
|
158
|
+
const page = createSearchPageMock([
|
|
159
|
+
'content',
|
|
160
|
+
1,
|
|
161
|
+
{
|
|
162
|
+
session: 'site:rednote',
|
|
163
|
+
data: [{
|
|
164
|
+
title: 'rednote result',
|
|
165
|
+
author: 'author',
|
|
166
|
+
likes: '12',
|
|
167
|
+
url,
|
|
168
|
+
author_url: 'https://www.rednote.com/user/profile/u1',
|
|
169
|
+
}],
|
|
170
|
+
},
|
|
171
|
+
]);
|
|
172
|
+
|
|
173
|
+
await expect(search.func(page, { query: 'tesla', limit: 1 })).resolves.toEqual([{
|
|
174
|
+
rank: 1,
|
|
175
|
+
title: 'rednote result',
|
|
176
|
+
author: 'author',
|
|
177
|
+
likes: '12',
|
|
178
|
+
published_at: '2025-10-10',
|
|
179
|
+
url,
|
|
180
|
+
author_url: 'https://www.rednote.com/user/profile/u1',
|
|
181
|
+
}]);
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
it('fails typed instead of silently returning [] for malformed extraction payloads', async () => {
|
|
185
|
+
const page = createSearchPageMock([
|
|
186
|
+
'content',
|
|
187
|
+
1,
|
|
188
|
+
{ session: 'site:rednote', data: { rows: [] } },
|
|
189
|
+
]);
|
|
190
|
+
|
|
191
|
+
await expect(search.func(page, { query: 'tesla', limit: 1 })).rejects.toMatchObject({
|
|
192
|
+
code: 'COMMAND_EXEC',
|
|
193
|
+
message: expect.stringContaining('payload shape'),
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
});
|
|
197
|
+
|
|
133
198
|
describe('rednote Pinia store failures', () => {
|
|
134
199
|
it('maps feed store read failure to CommandExecutionError', async () => {
|
|
135
200
|
const command = getRegistry().get('rednote/feed');
|
package/clis/rednote/search.js
CHANGED
|
@@ -6,8 +6,8 @@
|
|
|
6
6
|
* 1:1 comparison between the two frontends.
|
|
7
7
|
*/
|
|
8
8
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
9
|
-
import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
|
|
10
|
-
import { buildScrollUntilJs, buildSearchExtractJs, noteIdToDate } from '../xiaohongshu/search.js';
|
|
9
|
+
import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
|
|
10
|
+
import { buildScrollUntilJs, buildSearchExtractJs, noteIdToDate, unwrapEvaluateResult } from '../xiaohongshu/search.js';
|
|
11
11
|
|
|
12
12
|
function parseLimit(raw) {
|
|
13
13
|
const parsed = Number(raw);
|
|
@@ -19,6 +19,13 @@ function parseLimit(raw) {
|
|
|
19
19
|
}
|
|
20
20
|
return parsed;
|
|
21
21
|
}
|
|
22
|
+
function requireSearchRows(payload) {
|
|
23
|
+
const rows = unwrapEvaluateResult(payload);
|
|
24
|
+
if (!Array.isArray(rows)) {
|
|
25
|
+
throw new CommandExecutionError('Unexpected Rednote search extraction payload shape; expected an array of rows.');
|
|
26
|
+
}
|
|
27
|
+
return rows;
|
|
28
|
+
}
|
|
22
29
|
|
|
23
30
|
/**
|
|
24
31
|
* Wait for search results or login wall using MutationObserver (max 5s).
|
|
@@ -78,7 +85,7 @@ cli({
|
|
|
78
85
|
const limit = parseLimit(kwargs.limit ?? 20);
|
|
79
86
|
const keyword = encodeURIComponent(kwargs.query);
|
|
80
87
|
await page.goto(`https://www.rednote.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
|
|
81
|
-
const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS);
|
|
88
|
+
const waitResult = unwrapEvaluateResult(await page.evaluate(WAIT_FOR_CONTENT_JS));
|
|
82
89
|
if (waitResult === 'login_wall') {
|
|
83
90
|
throw new AuthRequiredError('www.rednote.com', 'Rednote search results are blocked behind a login wall');
|
|
84
91
|
}
|
|
@@ -87,8 +94,7 @@ cli({
|
|
|
87
94
|
// `autoScroll({ times: 2 })` capped extraction at ~13 notes regardless
|
|
88
95
|
// of `--limit`.
|
|
89
96
|
await page.evaluate(buildScrollUntilJs(limit));
|
|
90
|
-
const
|
|
91
|
-
const data = Array.isArray(payload) ? payload : [];
|
|
97
|
+
const data = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.rednote.com')));
|
|
92
98
|
return data
|
|
93
99
|
.filter((item) => item.title)
|
|
94
100
|
.slice(0, limit)
|
package/clis/twitter/article.js
CHANGED
|
@@ -124,7 +124,6 @@ cli({
|
|
|
124
124
|
domain: 'x.com',
|
|
125
125
|
strategy: Strategy.COOKIE,
|
|
126
126
|
browser: true,
|
|
127
|
-
siteSession: 'persistent',
|
|
128
127
|
args: [
|
|
129
128
|
{ name: 'folder-id', positional: true, type: 'string', required: true, help: 'Folder id from `opencli twitter bookmark-folders`.' },
|
|
130
129
|
{ name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' },
|
|
@@ -108,7 +108,6 @@ cli({
|
|
|
108
108
|
domain: 'x.com',
|
|
109
109
|
strategy: Strategy.COOKIE,
|
|
110
110
|
browser: true,
|
|
111
|
-
siteSession: 'persistent',
|
|
112
111
|
args: [
|
|
113
112
|
{ name: 'limit', type: 'int', default: 20, help: 'Maximum number of bookmarks to return (default 20).' },
|
|
114
113
|
{ name: 'top-by-engagement', type: 'int', default: 0, help: 'When set to N>0, re-rank the bookmarks by weighted engagement (likes×1 + retweets×3 + replies×2 + bookmarks×5 + log10(views+1)×0.5) and return the top N. Default 0 keeps the API\'s native (saved-time) ordering.' },
|
package/clis/twitter/download.js
CHANGED
|
@@ -15,7 +15,6 @@ cli({
|
|
|
15
15
|
description: 'Download Twitter/X media (images and videos). Provide either <username> to scan a profile\'s media tab, or --tweet-url to download a single tweet.',
|
|
16
16
|
domain: 'x.com',
|
|
17
17
|
strategy: Strategy.COOKIE,
|
|
18
|
-
siteSession: 'persistent',
|
|
19
18
|
args: [
|
|
20
19
|
{ name: 'username', positional: true, help: 'Twitter username (with or without @) to scan their /media tab. Either <username> or --tweet-url is required.' },
|
|
21
20
|
{ name: 'tweet-url', help: 'Single tweet URL to download. Use this OR <username>, not both required at once.' },
|
package/clis/twitter/likes.js
CHANGED
|
@@ -143,7 +143,6 @@ cli({
|
|
|
143
143
|
domain: 'x.com',
|
|
144
144
|
strategy: Strategy.COOKIE,
|
|
145
145
|
browser: true,
|
|
146
|
-
siteSession: 'persistent',
|
|
147
146
|
args: [
|
|
148
147
|
{ name: 'username', type: 'string', positional: true, help: 'Twitter screen name (with or without @). Defaults to the logged-in user when omitted.' },
|
|
149
148
|
{ name: 'limit', type: 'int', default: 20, help: 'Maximum number of liked tweets to return (default 20).' },
|
|
@@ -115,7 +115,6 @@ cli({
|
|
|
115
115
|
domain: 'x.com',
|
|
116
116
|
strategy: Strategy.COOKIE,
|
|
117
117
|
browser: true,
|
|
118
|
-
siteSession: 'persistent',
|
|
119
118
|
args: [
|
|
120
119
|
{ name: 'listId', positional: true, type: 'string', required: true, help: 'Numeric ID of a Twitter/X list (e.g. from `opencli twitter lists`)' },
|
|
121
120
|
{ name: 'limit', type: 'int', default: 50 },
|
package/clis/twitter/lists.js
CHANGED
package/clis/twitter/profile.js
CHANGED
|
@@ -11,7 +11,6 @@ cli({
|
|
|
11
11
|
domain: 'x.com',
|
|
12
12
|
strategy: Strategy.COOKIE,
|
|
13
13
|
browser: true,
|
|
14
|
-
siteSession: 'persistent',
|
|
15
14
|
args: [
|
|
16
15
|
{ name: 'username', type: 'string', positional: true, help: 'Twitter screen name (with or without @). Defaults to the logged-in user when omitted.' },
|
|
17
16
|
],
|
package/clis/twitter/search.js
CHANGED
|
@@ -261,7 +261,6 @@ cli({
|
|
|
261
261
|
domain: 'x.com',
|
|
262
262
|
strategy: Strategy.COOKIE,
|
|
263
263
|
browser: true,
|
|
264
|
-
siteSession: 'persistent',
|
|
265
264
|
args: [
|
|
266
265
|
{ name: 'query', type: 'string', required: true, positional: true, help: 'Search query. Raw X operators (e.g. "exact phrase", #tag, OR, lang:en, since:YYYY-MM-DD, from:, since:) are passed through unchanged.' },
|
|
267
266
|
{ name: 'filter', type: 'string', default: 'top', choices: ['top', 'live'], help: 'Legacy alias for --product. Kept for backwards compatibility; if --product is set it wins.' },
|
package/clis/twitter/thread.js
CHANGED
|
@@ -100,7 +100,6 @@ cli({
|
|
|
100
100
|
domain: 'x.com',
|
|
101
101
|
strategy: Strategy.COOKIE,
|
|
102
102
|
browser: true,
|
|
103
|
-
siteSession: 'persistent',
|
|
104
103
|
args: [
|
|
105
104
|
{ name: 'tweet-id', positional: true, type: 'string', required: true, help: 'Tweet numeric ID (e.g. 1234567890) or full status URL' },
|
|
106
105
|
{ name: 'limit', type: 'int', default: 50 },
|
package/clis/twitter/timeline.js
CHANGED
package/clis/twitter/trending.js
CHANGED
package/clis/twitter/tweets.js
CHANGED
|
@@ -221,7 +221,6 @@ cli({
|
|
|
221
221
|
domain: 'x.com',
|
|
222
222
|
strategy: Strategy.COOKIE,
|
|
223
223
|
browser: true,
|
|
224
|
-
siteSession: 'persistent',
|
|
225
224
|
args: [
|
|
226
225
|
{ name: 'username', type: 'string', positional: true, help: 'Twitter screen name (with or without @). Defaults to the logged-in user when omitted.' },
|
|
227
226
|
{ name: 'limit', type: 'int', default: 20, help: 'Max tweets to return' },
|
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* Ref: https://github.com/jackwener/opencli/issues/10
|
|
7
7
|
*/
|
|
8
8
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
9
|
-
import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
|
|
9
|
+
import { ArgumentError, AuthRequiredError, CommandExecutionError } from '@jackwener/opencli/errors';
|
|
10
10
|
/**
|
|
11
11
|
* Wait for search results or login wall using MutationObserver (max 5s).
|
|
12
12
|
* Returns 'content' if note items appeared, 'login_wall' if login gate
|
|
@@ -60,6 +60,26 @@ export function stripXhsAuthorDateSuffix(value) {
|
|
|
60
60
|
const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim();
|
|
61
61
|
return stripped || text;
|
|
62
62
|
}
|
|
63
|
+
/**
|
|
64
|
+
* `page.evaluate` may return either the raw IIFE value or a
|
|
65
|
+
* `{ session, data }` envelope depending on the browser-bridge version.
|
|
66
|
+
* Adapter code that called `Array.isArray(payload)` directly on the
|
|
67
|
+
* envelope silently received [] for every search. This helper normalizes
|
|
68
|
+
* both shapes so callers can keep their Array.isArray checks unchanged.
|
|
69
|
+
*/
|
|
70
|
+
export function unwrapEvaluateResult(payload) {
|
|
71
|
+
if (payload && !Array.isArray(payload) && typeof payload === 'object' && 'session' in payload && 'data' in payload) {
|
|
72
|
+
return payload.data;
|
|
73
|
+
}
|
|
74
|
+
return payload;
|
|
75
|
+
}
|
|
76
|
+
function requireSearchRows(payload, phase) {
|
|
77
|
+
const rows = unwrapEvaluateResult(payload);
|
|
78
|
+
if (!Array.isArray(rows)) {
|
|
79
|
+
throw new CommandExecutionError(`Unexpected Xiaohongshu search ${phase} payload shape; expected an array of rows.`);
|
|
80
|
+
}
|
|
81
|
+
return rows;
|
|
82
|
+
}
|
|
63
83
|
export function parseLimit(raw) {
|
|
64
84
|
const parsed = Number(raw ?? 20);
|
|
65
85
|
if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
|
|
@@ -267,7 +287,7 @@ export const command = cli({
|
|
|
267
287
|
// Wait for search results to render (or login wall to appear).
|
|
268
288
|
// Uses MutationObserver to resolve as soon as content appears,
|
|
269
289
|
// instead of a fixed delay + blind retry.
|
|
270
|
-
const waitResult = await page.evaluate(WAIT_FOR_CONTENT_JS);
|
|
290
|
+
const waitResult = unwrapEvaluateResult(await page.evaluate(WAIT_FOR_CONTENT_JS));
|
|
271
291
|
if (waitResult === 'login_wall') {
|
|
272
292
|
throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
|
|
273
293
|
}
|
|
@@ -275,25 +295,23 @@ export const command = cli({
|
|
|
275
295
|
// layout, so scrolling to the bottom can evict the initially visible
|
|
276
296
|
// note cards from the DOM and make extraction return [] even though the
|
|
277
297
|
// browser rendered results correctly.
|
|
278
|
-
const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
|
|
279
|
-
|
|
298
|
+
const initialPayload = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')), 'initial extraction');
|
|
299
|
+
const payload = [...initialPayload];
|
|
280
300
|
if (payload.length < limit) {
|
|
281
301
|
// Scroll until enough rows are rendered or the lazy-load plateaus.
|
|
282
302
|
// Replaces the previous fixed `autoScroll({ times: 2 })` which capped
|
|
283
303
|
// extraction at ~13 notes regardless of `--limit` (#1471).
|
|
284
304
|
await page.evaluate(buildScrollUntilJs(limit));
|
|
285
|
-
const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
break;
|
|
296
|
-
}
|
|
305
|
+
const scrolledPayload = requireSearchRows(await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com')), 'post-scroll extraction');
|
|
306
|
+
const seen = new Set(payload.map((item) => item.url).filter(Boolean));
|
|
307
|
+
for (const item of scrolledPayload) {
|
|
308
|
+
if (item?.url && seen.has(item.url))
|
|
309
|
+
continue;
|
|
310
|
+
if (item?.url)
|
|
311
|
+
seen.add(item.url);
|
|
312
|
+
payload.push(item);
|
|
313
|
+
if (payload.length >= limit)
|
|
314
|
+
break;
|
|
297
315
|
}
|
|
298
316
|
}
|
|
299
317
|
const data = payload;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from 'vitest';
|
|
2
2
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
3
|
import { JSDOM } from 'jsdom';
|
|
4
|
-
import { __test__, buildScrollUntilJs, noteIdToDate } from './search.js';
|
|
4
|
+
import { __test__, buildScrollUntilJs, noteIdToDate, unwrapEvaluateResult } from './search.js';
|
|
5
5
|
|
|
6
6
|
function markVisible(el) {
|
|
7
7
|
el.getBoundingClientRect = () => ({ width: 100, height: 100 });
|
|
@@ -57,24 +57,37 @@ describe('xiaohongshu search', () => {
|
|
|
57
57
|
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
58
58
|
expect(page.autoScroll).not.toHaveBeenCalled();
|
|
59
59
|
});
|
|
60
|
+
it('unwraps a browser-bridge envelope before handling login-wall wait result', async () => {
|
|
61
|
+
const cmd = getRegistry().get('xiaohongshu/search');
|
|
62
|
+
const page = createPageMock([
|
|
63
|
+
{ session: 'site:xiaohongshu', data: 'login_wall' },
|
|
64
|
+
]);
|
|
65
|
+
|
|
66
|
+
await expect(cmd.func(page, { query: '特斯拉', limit: 5 })).rejects.toMatchObject({
|
|
67
|
+
code: 'AUTH_REQUIRED',
|
|
68
|
+
message: expect.stringContaining('blocked behind a login wall'),
|
|
69
|
+
});
|
|
70
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
71
|
+
});
|
|
60
72
|
it('returns ranked results with search_result url and author_url preserved', async () => {
|
|
61
73
|
const cmd = getRegistry().get('xiaohongshu/search');
|
|
62
74
|
expect(cmd?.func).toBeTypeOf('function');
|
|
63
75
|
const detailUrl = 'https://www.xiaohongshu.com/search_result/68e90be80000000004022e66?xsec_token=test-token&xsec_source=';
|
|
64
76
|
const authorUrl = 'https://www.xiaohongshu.com/user/profile/635a9c720000000018028b40?xsec_token=user-token&xsec_source=pc_search';
|
|
77
|
+
const rows = [
|
|
78
|
+
{
|
|
79
|
+
title: '某鱼买FSD被坑了4万',
|
|
80
|
+
author: '随风',
|
|
81
|
+
likes: '261',
|
|
82
|
+
url: detailUrl,
|
|
83
|
+
author_url: authorUrl,
|
|
84
|
+
},
|
|
85
|
+
];
|
|
65
86
|
const page = createPageMock([
|
|
66
87
|
// First evaluate: MutationObserver wait (content appeared)
|
|
67
88
|
'content',
|
|
68
|
-
// Second evaluate: initial DOM extraction (already enough results)
|
|
69
|
-
|
|
70
|
-
{
|
|
71
|
-
title: '某鱼买FSD被坑了4万',
|
|
72
|
-
author: '随风',
|
|
73
|
-
likes: '261',
|
|
74
|
-
url: detailUrl,
|
|
75
|
-
author_url: authorUrl,
|
|
76
|
-
},
|
|
77
|
-
],
|
|
89
|
+
// Second evaluate: initial DOM extraction (already enough results) through Browser Bridge envelope.
|
|
90
|
+
{ session: 'site:xiaohongshu', data: rows },
|
|
78
91
|
]);
|
|
79
92
|
const result = await cmd.func(page, { query: '特斯拉', limit: 1 });
|
|
80
93
|
// Should only do one goto (the search page itself), no per-note detail navigation
|
|
@@ -91,6 +104,18 @@ describe('xiaohongshu search', () => {
|
|
|
91
104
|
},
|
|
92
105
|
]);
|
|
93
106
|
});
|
|
107
|
+
it('fails typed instead of silently returning [] for malformed extraction payloads', async () => {
|
|
108
|
+
const cmd = getRegistry().get('xiaohongshu/search');
|
|
109
|
+
const page = createPageMock([
|
|
110
|
+
'content',
|
|
111
|
+
{ session: 'site:xiaohongshu', data: { rows: [] } },
|
|
112
|
+
]);
|
|
113
|
+
|
|
114
|
+
await expect(cmd.func(page, { query: '测试', limit: 1 })).rejects.toMatchObject({
|
|
115
|
+
code: 'COMMAND_EXEC',
|
|
116
|
+
message: expect.stringContaining('payload shape'),
|
|
117
|
+
});
|
|
118
|
+
});
|
|
94
119
|
it('filters out results with no title and respects the limit', async () => {
|
|
95
120
|
const cmd = getRegistry().get('xiaohongshu/search');
|
|
96
121
|
expect(cmd?.func).toBeTypeOf('function');
|
|
@@ -135,6 +160,10 @@ describe('xiaohongshu search', () => {
|
|
|
135
160
|
'content',
|
|
136
161
|
// Second evaluate: initial extraction (no rows rendered)
|
|
137
162
|
[],
|
|
163
|
+
// Third evaluate: scroll-until row count
|
|
164
|
+
0,
|
|
165
|
+
// Fourth evaluate: post-scroll extraction (still no rows)
|
|
166
|
+
[],
|
|
138
167
|
]);
|
|
139
168
|
const result = (await cmd.func(page, { query: '测试等待', limit: 5 }));
|
|
140
169
|
expect(result).toHaveLength(0);
|
|
@@ -268,3 +297,29 @@ describe('noteIdToDate (ObjectID timestamp parsing)', () => {
|
|
|
268
297
|
expect(noteIdToDate('https://www.xiaohongshu.com/search_result/000000000000000000000000')).toBe('');
|
|
269
298
|
});
|
|
270
299
|
});
|
|
300
|
+
describe('unwrapEvaluateResult (browser-bridge envelope normalization)', () => {
|
|
301
|
+
it('returns the raw array unchanged when payload is already an array', () => {
|
|
302
|
+
const arr = [{ title: 'a' }, { title: 'b' }];
|
|
303
|
+
expect(unwrapEvaluateResult(arr)).toBe(arr);
|
|
304
|
+
});
|
|
305
|
+
it('unwraps { session, data: [...] } envelope to the inner array', () => {
|
|
306
|
+
const arr = [{ title: 'a' }];
|
|
307
|
+
const env = { session: 'site:xiaohongshu:abc', data: arr };
|
|
308
|
+
expect(unwrapEvaluateResult(env)).toBe(arr);
|
|
309
|
+
});
|
|
310
|
+
it('unwraps primitive data from Browser Bridge envelopes', () => {
|
|
311
|
+
expect(unwrapEvaluateResult({ session: 'site:xiaohongshu:abc', data: 'login_wall' })).toBe('login_wall');
|
|
312
|
+
});
|
|
313
|
+
it('passes non-envelope objects through unchanged', () => {
|
|
314
|
+
const obj = { results: [], loginWall: true };
|
|
315
|
+
expect(unwrapEvaluateResult(obj)).toBe(obj);
|
|
316
|
+
});
|
|
317
|
+
it('handles null and undefined safely', () => {
|
|
318
|
+
expect(unwrapEvaluateResult(null)).toBe(null);
|
|
319
|
+
expect(unwrapEvaluateResult(undefined)).toBe(undefined);
|
|
320
|
+
});
|
|
321
|
+
it('unwraps non-array envelope data so callers can validate the payload shape', () => {
|
|
322
|
+
const env = { session: 'x', data: { not: 'an array' } };
|
|
323
|
+
expect(unwrapEvaluateResult(env)).toEqual({ not: 'an array' });
|
|
324
|
+
});
|
|
325
|
+
});
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import {
|
|
3
|
+
emptySearchResults,
|
|
4
|
+
requireBoundedInteger,
|
|
5
|
+
requireRows,
|
|
6
|
+
requireSearchQuery,
|
|
7
|
+
runBrowserStep,
|
|
8
|
+
toHttpsUrl,
|
|
9
|
+
} from '../_shared/search-adapter.js';
|
|
10
|
+
|
|
11
|
+
function decodeYahooUrl(href) {
|
|
12
|
+
if (!href) return '';
|
|
13
|
+
if (href.indexOf('RU=') !== -1 && href.indexOf('/RK=') !== -1) {
|
|
14
|
+
var match = href.match(/RU=([^/]+)\/RK=/);
|
|
15
|
+
if (match && match[1]) {
|
|
16
|
+
try {
|
|
17
|
+
return toHttpsUrl(decodeURIComponent(match[1]), 'https://search.yahoo.com');
|
|
18
|
+
} catch {
|
|
19
|
+
return toHttpsUrl(href, 'https://search.yahoo.com');
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
return toHttpsUrl(href, 'https://search.yahoo.com');
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function buildExtractorJs(limit) {
|
|
27
|
+
return `
|
|
28
|
+
(function() {
|
|
29
|
+
var results = [];
|
|
30
|
+
var seen = {};
|
|
31
|
+
var items = document.querySelectorAll('.algo');
|
|
32
|
+
for (var i = 0; i < items.length; i++) {
|
|
33
|
+
if (results.length >= ${limit}) break;
|
|
34
|
+
var el = items[i];
|
|
35
|
+
var h3 = el.querySelector('h3');
|
|
36
|
+
var linkEl = el.querySelector('.compTitle a');
|
|
37
|
+
var snippetEl = el.querySelector('.compText');
|
|
38
|
+
if (!h3 || !linkEl) continue;
|
|
39
|
+
var title = h3.textContent.trim();
|
|
40
|
+
var href = linkEl.getAttribute('href') || '';
|
|
41
|
+
var snippet = snippetEl ? snippetEl.textContent.trim() : '';
|
|
42
|
+
if (!title || !href || seen[href]) continue;
|
|
43
|
+
seen[href] = true;
|
|
44
|
+
results.push([title, href, snippet]);
|
|
45
|
+
}
|
|
46
|
+
return results;
|
|
47
|
+
})()`;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const command = cli({
|
|
51
|
+
site: 'yahoo',
|
|
52
|
+
name: 'search',
|
|
53
|
+
access: 'read',
|
|
54
|
+
description: 'Search Yahoo (powered by Bing)',
|
|
55
|
+
domain: 'search.yahoo.com',
|
|
56
|
+
strategy: Strategy.PUBLIC,
|
|
57
|
+
browser: true,
|
|
58
|
+
args: [
|
|
59
|
+
{ name: 'keyword', positional: true, required: true, help: 'Search query' },
|
|
60
|
+
{ name: 'limit', type: 'int', default: 7, help: 'Number of results per page (max 7)' },
|
|
61
|
+
{ name: 'page', type: 'int', default: 1, help: 'Page number (1, 2, 3...). Yahoo returns ~7 results per page' },
|
|
62
|
+
],
|
|
63
|
+
columns: ['rank', 'title', 'url', 'snippet'],
|
|
64
|
+
func: async (page, kwargs) => {
|
|
65
|
+
const limit = requireBoundedInteger(kwargs.limit, 7, 1, 7, '--limit');
|
|
66
|
+
const query = requireSearchQuery(kwargs.keyword);
|
|
67
|
+
const keyword = encodeURIComponent(query);
|
|
68
|
+
const pageNum = requireBoundedInteger(kwargs.page, 1, 1, 100, '--page');
|
|
69
|
+
var url = `https://search.yahoo.com/search?p=${keyword}`;
|
|
70
|
+
if (pageNum > 1) url += `&b=${(pageNum - 1) * 7 + 1}`;
|
|
71
|
+
await runBrowserStep('yahoo search navigation', () => page.goto(url));
|
|
72
|
+
try {
|
|
73
|
+
await page.wait({ selector: '.algo', timeout: 10 });
|
|
74
|
+
} catch {
|
|
75
|
+
await page.wait(3).catch(function() {});
|
|
76
|
+
}
|
|
77
|
+
const raw = await runBrowserStep('yahoo search extraction', () => page.evaluate(buildExtractorJs(limit)));
|
|
78
|
+
const results = requireRows(raw, 'yahoo search');
|
|
79
|
+
if (results.length === 0) {
|
|
80
|
+
throw emptySearchResults('Yahoo', query);
|
|
81
|
+
}
|
|
82
|
+
const rows = results
|
|
83
|
+
.map(function(r, index) {
|
|
84
|
+
return { rank: index + 1 + (pageNum - 1) * 7, title: r[0], url: decodeYahooUrl(r[1]), snippet: r[2] };
|
|
85
|
+
})
|
|
86
|
+
.filter((row) => row.url);
|
|
87
|
+
if (rows.length === 0) throw emptySearchResults('Yahoo', query);
|
|
88
|
+
return rows;
|
|
89
|
+
},
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
export const __test__ = { command };
|