@jackwener/opencli 1.7.18 → 1.7.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -17
- package/README.zh-CN.md +16 -18
- package/cli-manifest.json +311 -186
- package/clis/ctrip/ctrip.test.js +486 -1
- package/clis/ctrip/flight.js +136 -0
- package/clis/ctrip/hotel-search.js +132 -0
- package/clis/ctrip/utils.js +298 -0
- package/clis/google/search.js +16 -6
- package/clis/google-scholar/search.js +20 -5
- package/clis/google-scholar/search.test.js +35 -2
- package/clis/reddit/home.js +117 -0
- package/clis/reddit/home.test.js +127 -0
- package/clis/reddit/read.js +400 -54
- package/clis/reddit/read.test.js +315 -12
- package/clis/reddit/subreddit-info.js +117 -0
- package/clis/reddit/subreddit-info.test.js +163 -0
- package/clis/reddit/whoami.js +84 -0
- package/clis/reddit/whoami.test.js +105 -0
- package/clis/rednote/search.js +6 -2
- package/clis/twitter/bookmark-folder.js +8 -4
- package/clis/twitter/bookmark-folder.test.js +59 -1
- package/clis/twitter/bookmarks.js +12 -4
- package/clis/twitter/bookmarks.test.js +205 -0
- package/clis/twitter/followers.js +20 -5
- package/clis/twitter/followers.test.js +44 -0
- package/clis/twitter/following.js +36 -20
- package/clis/twitter/following.test.js +60 -8
- package/clis/twitter/likes.js +28 -13
- package/clis/twitter/likes.test.js +111 -1
- package/clis/twitter/list-add.js +128 -204
- package/clis/twitter/list-add.test.js +97 -1
- package/clis/twitter/list-tweets.js +13 -4
- package/clis/twitter/list-tweets.test.js +48 -0
- package/clis/twitter/lists.js +5 -2
- package/clis/twitter/post.js +23 -4
- package/clis/twitter/post.test.js +30 -0
- package/clis/twitter/profile.js +16 -8
- package/clis/twitter/profile.test.js +39 -0
- package/clis/twitter/reply.js +133 -10
- package/clis/twitter/reply.test.js +55 -0
- package/clis/twitter/search.js +188 -170
- package/clis/twitter/search.test.js +96 -258
- package/clis/twitter/shared.js +167 -16
- package/clis/twitter/shared.test.js +102 -1
- package/clis/twitter/timeline.js +3 -1
- package/clis/twitter/tweets.js +147 -51
- package/clis/twitter/tweets.test.js +238 -1
- package/clis/xiaohongshu/comments.js +23 -2
- package/clis/xiaohongshu/comments.test.js +63 -1
- package/clis/xiaohongshu/search.js +168 -13
- package/clis/xiaohongshu/search.test.js +82 -8
- package/clis/xueqiu/earnings-date.js +2 -2
- package/clis/xueqiu/kline.js +2 -2
- package/clis/xueqiu/utils.js +19 -0
- package/clis/xueqiu/utils.test.js +26 -0
- package/clis/zhihu/answer-detail.js +233 -0
- package/clis/zhihu/answer-detail.test.js +330 -0
- package/clis/zhihu/question.js +44 -10
- package/clis/zhihu/question.test.js +78 -1
- package/clis/zhihu/recommend.js +103 -0
- package/clis/zhihu/recommend.test.js +143 -0
- package/dist/src/browser/base-page.d.ts +3 -2
- package/dist/src/browser/base-page.test.js +2 -2
- package/dist/src/browser/cdp.js +3 -3
- package/dist/src/browser/daemon-client.d.ts +1 -0
- package/dist/src/browser/daemon-client.js +3 -0
- package/dist/src/browser/daemon-client.test.js +20 -0
- package/dist/src/browser/page.d.ts +3 -2
- package/dist/src/browser/page.js +4 -4
- package/dist/src/browser/page.test.js +31 -0
- package/dist/src/browser/utils.d.ts +10 -0
- package/dist/src/browser/utils.js +37 -0
- package/dist/src/browser/utils.test.d.ts +1 -0
- package/dist/src/browser/utils.test.js +29 -0
- package/dist/src/cli-argv-preprocess.d.ts +37 -0
- package/dist/src/cli-argv-preprocess.js +131 -0
- package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
- package/dist/src/cli-argv-preprocess.test.js +130 -0
- package/dist/src/cli.js +131 -89
- package/dist/src/cli.test.js +34 -28
- package/dist/src/commands/daemon.js +6 -7
- package/dist/src/daemon-utils.d.ts +18 -0
- package/dist/src/daemon-utils.js +37 -0
- package/dist/src/daemon.d.ts +1 -1
- package/dist/src/daemon.js +44 -13
- package/dist/src/daemon.test.js +42 -1
- package/dist/src/doctor.js +15 -16
- package/dist/src/download/progress.js +15 -11
- package/dist/src/download/progress.test.d.ts +1 -0
- package/dist/src/download/progress.test.js +25 -0
- package/dist/src/electron-apps.js +0 -1
- package/dist/src/electron-apps.test.js +1 -0
- package/dist/src/execution.js +1 -3
- package/dist/src/execution.test.js +4 -16
- package/dist/src/external-clis.yaml +12 -3
- package/dist/src/external.d.ts +4 -0
- package/dist/src/external.js +3 -0
- package/dist/src/external.test.js +24 -1
- package/dist/src/help.d.ts +16 -1
- package/dist/src/help.js +50 -8
- package/dist/src/help.test.js +5 -1
- package/dist/src/logger.js +8 -9
- package/dist/src/main.js +16 -0
- package/dist/src/output.js +4 -5
- package/dist/src/runtime-detect.d.ts +1 -1
- package/dist/src/runtime-detect.js +1 -1
- package/dist/src/runtime-detect.test.js +3 -2
- package/dist/src/tui.d.ts +0 -1
- package/dist/src/tui.js +9 -22
- package/dist/src/types.d.ts +3 -1
- package/dist/src/update-check.js +4 -5
- package/package.json +5 -4
- package/clis/notion/export.js +0 -32
- package/clis/notion/favorites.js +0 -85
- package/clis/notion/new.js +0 -35
- package/clis/notion/read.js +0 -31
- package/clis/notion/search.js +0 -47
- package/clis/notion/sidebar.js +0 -42
- package/clis/notion/status.js +0 -17
- package/clis/notion/write.js +0 -41
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest';
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
2
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
+
import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
|
|
3
4
|
import { __test__ } from './tweets.js';
|
|
4
5
|
|
|
5
6
|
describe('twitter tweets helpers', () => {
|
|
@@ -8,6 +9,140 @@ describe('twitter tweets helpers', () => {
|
|
|
8
9
|
expect(cmd?.columns).toEqual(['id', 'author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url', 'has_media', 'media_urls']);
|
|
9
10
|
});
|
|
10
11
|
|
|
12
|
+
it('makes the username argument optional so it can default to the logged-in user', () => {
|
|
13
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
14
|
+
const usernameArg = cmd?.args?.find((arg) => arg.name === 'username');
|
|
15
|
+
expect(usernameArg).toBeDefined();
|
|
16
|
+
expect(usernameArg?.required).not.toBe(true);
|
|
17
|
+
expect(usernameArg?.help || '').toMatch(/default/i);
|
|
18
|
+
expect(cmd?.description || '').toMatch(/default/i);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('detects the logged-in user via AppTabBar_Profile_Link when no username is given', async () => {
|
|
22
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
23
|
+
const evaluatedScripts = [];
|
|
24
|
+
const page = {
|
|
25
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
26
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
27
|
+
getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
|
|
28
|
+
evaluate: vi.fn(async (script) => {
|
|
29
|
+
const text = typeof script === 'function' ? script.toString() : String(script);
|
|
30
|
+
evaluatedScripts.push(text);
|
|
31
|
+
if (text.includes('AppTabBar_Profile_Link')) return '/viewer';
|
|
32
|
+
if (text.includes('operationName')) return null; // operation metadata resolver
|
|
33
|
+
if (text.includes('/UserByScreenName')) return '42';
|
|
34
|
+
if (text.includes('/UserTweets')) {
|
|
35
|
+
return {
|
|
36
|
+
data: {
|
|
37
|
+
user: {
|
|
38
|
+
result: {
|
|
39
|
+
timeline_v2: {
|
|
40
|
+
timeline: {
|
|
41
|
+
instructions: [
|
|
42
|
+
{
|
|
43
|
+
entries: [
|
|
44
|
+
{
|
|
45
|
+
entryId: 'tweet-1',
|
|
46
|
+
content: {
|
|
47
|
+
itemContent: {
|
|
48
|
+
tweet_results: {
|
|
49
|
+
result: {
|
|
50
|
+
rest_id: '1',
|
|
51
|
+
legacy: {
|
|
52
|
+
full_text: 'own post',
|
|
53
|
+
favorite_count: 0,
|
|
54
|
+
retweet_count: 0,
|
|
55
|
+
reply_count: 0,
|
|
56
|
+
created_at: 'now',
|
|
57
|
+
},
|
|
58
|
+
core: {
|
|
59
|
+
user_results: {
|
|
60
|
+
result: {
|
|
61
|
+
legacy: { screen_name: 'viewer', name: 'Viewer' },
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
return null;
|
|
81
|
+
}),
|
|
82
|
+
};
|
|
83
|
+
const rows = await cmd.func(page, { limit: 1 });
|
|
84
|
+
// Navigated home to read the logged-in user
|
|
85
|
+
expect(page.goto).toHaveBeenCalledWith('https://x.com/home');
|
|
86
|
+
// AppTabBar_Profile_Link probe happened before any GraphQL fetch
|
|
87
|
+
const probeIdx = evaluatedScripts.findIndex((t) => t.includes('AppTabBar_Profile_Link'));
|
|
88
|
+
const graphqlIdx = evaluatedScripts.findIndex((t) => t.includes('/UserByScreenName'));
|
|
89
|
+
expect(probeIdx).toBeGreaterThanOrEqual(0);
|
|
90
|
+
expect(graphqlIdx).toBeGreaterThan(probeIdx);
|
|
91
|
+
// The detected handle ('viewer') was used for the UserByScreenName lookup
|
|
92
|
+
const lookup = evaluatedScripts.find((t) => t.includes('/UserByScreenName')) || '';
|
|
93
|
+
expect(decodeURIComponent(lookup)).toContain('"screen_name":"viewer"');
|
|
94
|
+
expect(rows).toHaveLength(1);
|
|
95
|
+
expect(rows[0]).toMatchObject({ id: '1', author: 'viewer', url: 'https://x.com/viewer/status/1' });
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('throws AuthRequiredError when no username is given and the logged-in user cannot be detected', async () => {
|
|
99
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
100
|
+
const page = {
|
|
101
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
102
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
103
|
+
getCookies: vi.fn(async () => []),
|
|
104
|
+
evaluate: vi.fn(async (script) => {
|
|
105
|
+
const text = typeof script === 'function' ? script.toString() : String(script);
|
|
106
|
+
if (text.includes('AppTabBar_Profile_Link')) return null;
|
|
107
|
+
return null;
|
|
108
|
+
}),
|
|
109
|
+
};
|
|
110
|
+
await expect(cmd.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('rejects invalid explicit username before navigation', async () => {
|
|
114
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
115
|
+
const page = {
|
|
116
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
117
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
118
|
+
getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
|
|
119
|
+
evaluate: vi.fn(),
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
await expect(cmd.func(page, { username: 'viewer/extra' })).rejects.toBeInstanceOf(ArgumentError);
|
|
123
|
+
expect(page.goto).not.toHaveBeenCalled();
|
|
124
|
+
expect(page.getCookies).not.toHaveBeenCalled();
|
|
125
|
+
expect(page.evaluate).not.toHaveBeenCalled();
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('rejects non-profile AppTabBar hrefs instead of querying route names as users', async () => {
|
|
129
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
130
|
+
const page = {
|
|
131
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
132
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
133
|
+
getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
|
|
134
|
+
evaluate: vi.fn(async (script) => {
|
|
135
|
+
const text = typeof script === 'function' ? script.toString() : String(script);
|
|
136
|
+
if (text.includes('AppTabBar_Profile_Link')) return '/home';
|
|
137
|
+
throw new Error(`Unexpected evaluate: ${text.slice(0, 80)}`);
|
|
138
|
+
}),
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
await expect(cmd.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
|
|
142
|
+
expect(page.goto).toHaveBeenCalledWith('https://x.com/home');
|
|
143
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
144
|
+
});
|
|
145
|
+
|
|
11
146
|
it('falls back when queryId contains unsafe characters', () => {
|
|
12
147
|
expect(__test__.sanitizeQueryId('safe_Query-123', 'fallback')).toBe('safe_Query-123');
|
|
13
148
|
expect(__test__.sanitizeQueryId('bad"id', 'fallback')).toBe('fallback');
|
|
@@ -60,6 +195,18 @@ describe('twitter tweets helpers', () => {
|
|
|
60
195
|
expect(b.is_retweet).toBe(true);
|
|
61
196
|
});
|
|
62
197
|
|
|
198
|
+
it('unwraps TweetWithVisibilityResults', () => {
|
|
199
|
+
const tweet = __test__.extractTweet({
|
|
200
|
+
__typename: 'TweetWithVisibilityResults',
|
|
201
|
+
tweet: {
|
|
202
|
+
rest_id: '42',
|
|
203
|
+
legacy: { full_text: 'visible post', favorite_count: 2, retweet_count: 0, reply_count: 0, created_at: 'now' },
|
|
204
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
205
|
+
},
|
|
206
|
+
}, new Set());
|
|
207
|
+
expect(tweet).toMatchObject({ id: '42', author: 'alice', text: 'visible post' });
|
|
208
|
+
});
|
|
209
|
+
|
|
63
210
|
it('parses chronological tweets and skips pinned instruction', () => {
|
|
64
211
|
const chronEntry = {
|
|
65
212
|
entryId: 'tweet-1',
|
|
@@ -122,4 +269,94 @@ describe('twitter tweets helpers', () => {
|
|
|
122
269
|
url: 'https://x.com/alice/status/1',
|
|
123
270
|
});
|
|
124
271
|
});
|
|
272
|
+
|
|
273
|
+
it('recursively parses tweets nested in timeline modules', () => {
|
|
274
|
+
const payload = {
|
|
275
|
+
data: {
|
|
276
|
+
user: {
|
|
277
|
+
result: {
|
|
278
|
+
timeline_v2: {
|
|
279
|
+
timeline: {
|
|
280
|
+
instructions: [
|
|
281
|
+
{
|
|
282
|
+
type: 'TimelineAddEntries',
|
|
283
|
+
entries: [
|
|
284
|
+
{
|
|
285
|
+
entryId: 'profile-conversation-1',
|
|
286
|
+
content: {
|
|
287
|
+
entryType: 'TimelineTimelineModule',
|
|
288
|
+
items: [
|
|
289
|
+
{
|
|
290
|
+
item: {
|
|
291
|
+
itemContent: {
|
|
292
|
+
tweet_results: {
|
|
293
|
+
result: {
|
|
294
|
+
rest_id: '2',
|
|
295
|
+
legacy: { full_text: 'nested post', favorite_count: 1, retweet_count: 0, reply_count: 0, created_at: 'now' },
|
|
296
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
297
|
+
},
|
|
298
|
+
},
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
],
|
|
303
|
+
},
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
entryId: 'cursor-bottom-2',
|
|
307
|
+
content: { entryType: 'TimelineTimelineCursor', cursorType: 'Bottom', value: 'next' },
|
|
308
|
+
},
|
|
309
|
+
],
|
|
310
|
+
},
|
|
311
|
+
],
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
};
|
|
318
|
+
const result = __test__.parseUserTweets(payload, new Set());
|
|
319
|
+
expect(result.nextCursor).toBe('next');
|
|
320
|
+
expect(result.tweets).toHaveLength(1);
|
|
321
|
+
expect(result.tweets[0]).toMatchObject({ id: '2', text: 'nested post' });
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
it('uses populated timeline instructions when timeline_v2 is present but empty', () => {
|
|
325
|
+
const payload = {
|
|
326
|
+
data: {
|
|
327
|
+
user: {
|
|
328
|
+
result: {
|
|
329
|
+
timeline_v2: { timeline: { instructions: [] } },
|
|
330
|
+
timeline: {
|
|
331
|
+
timeline: {
|
|
332
|
+
instructions: [
|
|
333
|
+
{
|
|
334
|
+
type: 'TimelineAddEntries',
|
|
335
|
+
entries: [
|
|
336
|
+
{
|
|
337
|
+
content: {
|
|
338
|
+
itemContent: {
|
|
339
|
+
tweet_results: {
|
|
340
|
+
result: {
|
|
341
|
+
rest_id: '3',
|
|
342
|
+
legacy: { full_text: 'fallback timeline post', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: 'now' },
|
|
343
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
},
|
|
348
|
+
},
|
|
349
|
+
],
|
|
350
|
+
},
|
|
351
|
+
],
|
|
352
|
+
},
|
|
353
|
+
},
|
|
354
|
+
},
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
};
|
|
358
|
+
const result = __test__.parseUserTweets(payload, new Set());
|
|
359
|
+
expect(result.tweets).toHaveLength(1);
|
|
360
|
+
expect(result.tweets[0]).toMatchObject({ id: '3', text: 'fallback timeline post' });
|
|
361
|
+
});
|
|
125
362
|
});
|
|
@@ -14,12 +14,33 @@ export function parseCommentLimit(raw, fallback = 20) {
|
|
|
14
14
|
return fallback;
|
|
15
15
|
return Math.max(1, Math.min(Math.floor(n), 50));
|
|
16
16
|
}
|
|
17
|
+
|
|
18
|
+
export function parseXhsLikeCountText(value) {
|
|
19
|
+
const integerRe = /^(?:\d+|\d{1,3}(?:[,,]\d{3})+)\+?$/u;
|
|
20
|
+
const shortformRe = /^((?:\d+|\d{1,3}(?:[,,]\d{3})+)(?:\.\d+)?)([wWkK万千])\+?$/u;
|
|
21
|
+
const raw = String(value ?? '').replace(/\s+/g, '');
|
|
22
|
+
if (!raw)
|
|
23
|
+
return 0;
|
|
24
|
+
if (integerRe.test(raw))
|
|
25
|
+
return Number(raw.replace(/[,+,]/g, ''));
|
|
26
|
+
const short = raw.match(shortformRe);
|
|
27
|
+
if (!short)
|
|
28
|
+
return 0;
|
|
29
|
+
const numeric = Number(short[1].replace(/[,,]/g, ''));
|
|
30
|
+
if (!Number.isFinite(numeric))
|
|
31
|
+
return 0;
|
|
32
|
+
const unit = short[2].toLowerCase();
|
|
33
|
+
const multiplier = unit === 'w' || unit === '万' ? 10000 : 1000;
|
|
34
|
+
return Math.round(numeric * multiplier);
|
|
35
|
+
}
|
|
36
|
+
|
|
17
37
|
/**
|
|
18
38
|
* Host-agnostic IIFE that scrolls a note's comment list and extracts
|
|
19
39
|
* top-level comments (and optionally nested 楼中楼 replies). Exported so
|
|
20
40
|
* the rednote adapter can reuse the exact same selector chain.
|
|
21
41
|
*/
|
|
22
42
|
export function buildCommentsExtractJs(withReplies) {
|
|
43
|
+
const parseLikeCountText = parseXhsLikeCountText.toString();
|
|
23
44
|
return `
|
|
24
45
|
(async () => {
|
|
25
46
|
const wait = (ms) => new Promise(r => setTimeout(r, ms))
|
|
@@ -44,9 +65,9 @@ export function buildCommentsExtractJs(withReplies) {
|
|
|
44
65
|
}
|
|
45
66
|
|
|
46
67
|
const clean = (el) => (el?.textContent || '').replace(/\\s+/g, ' ').trim()
|
|
68
|
+
const parseLikeCountText = ${parseLikeCountText}
|
|
47
69
|
const parseLikes = (el) => {
|
|
48
|
-
|
|
49
|
-
return /^\\d+$/.test(raw) ? Number(raw) : 0
|
|
70
|
+
return parseLikeCountText(clean(el))
|
|
50
71
|
}
|
|
51
72
|
const expandReplyThreads = async (root) => {
|
|
52
73
|
if (!withReplies || !root) return
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { JSDOM } from 'jsdom';
|
|
2
3
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
-
import './comments.js';
|
|
4
|
+
import { buildCommentsExtractJs, parseXhsLikeCountText } from './comments.js';
|
|
4
5
|
function createPageMock(evaluateResult) {
|
|
5
6
|
return {
|
|
6
7
|
goto: vi.fn().mockResolvedValue(undefined),
|
|
@@ -25,6 +26,41 @@ function createPageMock(evaluateResult) {
|
|
|
25
26
|
waitForCapture: vi.fn().mockResolvedValue(undefined),
|
|
26
27
|
};
|
|
27
28
|
}
|
|
29
|
+
|
|
30
|
+
async function runCommentsExtract(html) {
|
|
31
|
+
const dom = new JSDOM(html, { url: 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok' });
|
|
32
|
+
const previousDocument = globalThis.document;
|
|
33
|
+
const previousLocation = globalThis.location;
|
|
34
|
+
globalThis.document = dom.window.document;
|
|
35
|
+
globalThis.location = dom.window.location;
|
|
36
|
+
try {
|
|
37
|
+
return await eval(buildCommentsExtractJs(false));
|
|
38
|
+
} finally {
|
|
39
|
+
globalThis.document = previousDocument;
|
|
40
|
+
globalThis.location = previousLocation;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
describe('parseXhsLikeCountText', () => {
|
|
45
|
+
it('parses exact integer and shortform like counts', () => {
|
|
46
|
+
expect(parseXhsLikeCountText('0')).toBe(0);
|
|
47
|
+
expect(parseXhsLikeCountText('42')).toBe(42);
|
|
48
|
+
expect(parseXhsLikeCountText('1,234')).toBe(1234);
|
|
49
|
+
expect(parseXhsLikeCountText('1,234+')).toBe(1234);
|
|
50
|
+
expect(parseXhsLikeCountText('2.1w')).toBe(21000);
|
|
51
|
+
expect(parseXhsLikeCountText('1.5万')).toBe(15000);
|
|
52
|
+
expect(parseXhsLikeCountText('1.2k')).toBe(1200);
|
|
53
|
+
expect(parseXhsLikeCountText('3千')).toBe(3000);
|
|
54
|
+
expect(parseXhsLikeCountText(' 2.1 w + ')).toBe(21000);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('returns 0 for unknown shapes without overparsing arbitrary text', () => {
|
|
58
|
+
for (const raw of ['', null, undefined, '赞', 'likes 2.1w', '2w人', '1,23', '1.2.3k', '.', '1.5']) {
|
|
59
|
+
expect(parseXhsLikeCountText(raw)).toBe(0);
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
|
|
28
64
|
describe('xiaohongshu comments', () => {
|
|
29
65
|
const command = getRegistry().get('xiaohongshu/comments');
|
|
30
66
|
it('returns ranked comment rows for signed full URLs', async () => {
|
|
@@ -120,6 +156,32 @@ describe('xiaohongshu comments', () => {
|
|
|
120
156
|
expect(script).toContain("const afterCount = scroller.querySelectorAll('.parent-comment').length");
|
|
121
157
|
expect(script).toContain('if (afterCount <= beforeCount) break');
|
|
122
158
|
});
|
|
159
|
+
it('extracts shortform like counts from the shared xiaohongshu/rednote DOM script', async () => {
|
|
160
|
+
const data = await runCommentsExtract(`
|
|
161
|
+
<main>
|
|
162
|
+
<section class="parent-comment">
|
|
163
|
+
<div class="comment-item">
|
|
164
|
+
<div class="author-wrapper"><span class="name">Alice</span></div>
|
|
165
|
+
<div class="content">Great note</div>
|
|
166
|
+
<span class="count">2.1w</span>
|
|
167
|
+
<span class="date">today</span>
|
|
168
|
+
</div>
|
|
169
|
+
</section>
|
|
170
|
+
<section class="parent-comment">
|
|
171
|
+
<div class="comment-item">
|
|
172
|
+
<span class="user-name">Bob</span>
|
|
173
|
+
<div class="note-text">Malformed count</div>
|
|
174
|
+
<span class="count">likes 2.1w</span>
|
|
175
|
+
</div>
|
|
176
|
+
</section>
|
|
177
|
+
</main>
|
|
178
|
+
`);
|
|
179
|
+
|
|
180
|
+
expect(data.results).toEqual([
|
|
181
|
+
{ author: 'Alice', text: 'Great note', likes: 21000, time: 'today', is_reply: false, reply_to: '' },
|
|
182
|
+
{ author: 'Bob', text: 'Malformed count', likes: 0, time: '', is_reply: false, reply_to: '' },
|
|
183
|
+
]);
|
|
184
|
+
});
|
|
123
185
|
it('respects the limit for top-level comments', async () => {
|
|
124
186
|
const manyComments = Array.from({ length: 10 }, (_, i) => ({
|
|
125
187
|
author: `User${i}`,
|
|
@@ -6,16 +6,24 @@
|
|
|
6
6
|
* Ref: https://github.com/jackwener/opencli/issues/10
|
|
7
7
|
*/
|
|
8
8
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
9
|
-
import { AuthRequiredError } from '@jackwener/opencli/errors';
|
|
9
|
+
import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
|
|
10
10
|
/**
|
|
11
11
|
* Wait for search results or login wall using MutationObserver (max 5s).
|
|
12
12
|
* Returns 'content' if note items appeared, 'login_wall' if login gate
|
|
13
13
|
* detected, or 'timeout' if neither appeared within the deadline.
|
|
14
|
+
*
|
|
15
|
+
* Note-item detection tries the legacy `section.note-item` class first
|
|
16
|
+
* (still observed in many sessions, including rednote) and falls back to
|
|
17
|
+
* a `<section>` element containing a `/search_result/` or `/explore/`
|
|
18
|
+
* link. Issue #1506 reports the class being dropped on some xhs renders.
|
|
14
19
|
*/
|
|
15
20
|
const WAIT_FOR_CONTENT_JS = `
|
|
16
21
|
new Promise((resolve) => {
|
|
22
|
+
const findNoteCard = () => document.querySelector(
|
|
23
|
+
'section.note-item, section:has(a[href*="/search_result/"]), section:has(a[href*="/explore/"])'
|
|
24
|
+
);
|
|
17
25
|
const detect = () => {
|
|
18
|
-
if (
|
|
26
|
+
if (findNoteCard()) return 'content';
|
|
19
27
|
if (/登录后查看搜索结果/.test(document.body?.innerText || '')) return 'login_wall';
|
|
20
28
|
return null;
|
|
21
29
|
};
|
|
@@ -52,6 +60,100 @@ export function stripXhsAuthorDateSuffix(value) {
|
|
|
52
60
|
const stripped = text.replace(/\s*(?:\d{1,2}天前|\d+小时前|\d+分钟前|\d+秒前|刚刚|昨天|前天|\d+周前|\d+个月前|\d{1,2}-\d{1,2}|\d{4}-\d{1,2}-\d{1,2})$/u, '').trim();
|
|
53
61
|
return stripped || text;
|
|
54
62
|
}
|
|
63
|
+
export function parseLimit(raw) {
|
|
64
|
+
const parsed = Number(raw ?? 20);
|
|
65
|
+
if (!Number.isFinite(parsed) || !Number.isInteger(parsed)) {
|
|
66
|
+
throw new ArgumentError(`--limit must be an integer between 1 and 100, got ${JSON.stringify(raw)}`);
|
|
67
|
+
}
|
|
68
|
+
if (parsed < 1 || parsed > 100) {
|
|
69
|
+
throw new ArgumentError(`--limit must be between 1 and 100, got ${parsed}`);
|
|
70
|
+
}
|
|
71
|
+
return parsed;
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Build a "scroll until enough or plateaued" IIFE used in place of a fixed
|
|
75
|
+
* `autoScroll({ times: N })`. Xiaohongshu's search results page lazy-loads
|
|
76
|
+
* ~5-7 notes per scroll, so the previous `times: 2` capped extraction at
|
|
77
|
+
* ~13 items regardless of `--limit` (see #1471). This helper drives scrolls
|
|
78
|
+
* dynamically:
|
|
79
|
+
*
|
|
80
|
+
* - count visible `section.note-item` rows (excluding related-search
|
|
81
|
+
* `.query-note-item` rows)
|
|
82
|
+
* - if count >= targetCount → break (got enough)
|
|
83
|
+
* - if two consecutive scrolls add no new rows → break (DOM plateaued,
|
|
84
|
+
* no more lazy-load available)
|
|
85
|
+
* - hard cap at `maxScrolls` iterations (default 15) to bound runtime
|
|
86
|
+
*
|
|
87
|
+
* Exported so the rednote adapter (same DOM shape) can reuse it.
|
|
88
|
+
*/
|
|
89
|
+
export function buildScrollUntilJs(targetCount, maxScrolls = 15) {
|
|
90
|
+
if (!Number.isSafeInteger(targetCount) || targetCount < 1) {
|
|
91
|
+
throw new ArgumentError(`targetCount must be a positive integer, got ${JSON.stringify(targetCount)}`);
|
|
92
|
+
}
|
|
93
|
+
if (!Number.isSafeInteger(maxScrolls) || maxScrolls < 1) {
|
|
94
|
+
throw new ArgumentError(`maxScrolls must be a positive integer, got ${JSON.stringify(maxScrolls)}`);
|
|
95
|
+
}
|
|
96
|
+
return `
|
|
97
|
+
(async () => {
|
|
98
|
+
const isVisibleNote = (el) => {
|
|
99
|
+
if (el.classList.contains('query-note-item')) return false;
|
|
100
|
+
const rect = el.getBoundingClientRect();
|
|
101
|
+
if (rect.width <= 0 || rect.height <= 0) return false;
|
|
102
|
+
const style = getComputedStyle(el);
|
|
103
|
+
return style.display !== 'none' && style.visibility !== 'hidden';
|
|
104
|
+
};
|
|
105
|
+
// Note containers: legacy \`section.note-item\` first, fallback to
|
|
106
|
+
// any \`<section>\` that wraps a search-result/explore note link
|
|
107
|
+
// (#1506 reports the class being dropped on some xhs renders).
|
|
108
|
+
const collectNoteCards = () => {
|
|
109
|
+
const classMatches = document.querySelectorAll('section.note-item');
|
|
110
|
+
if (classMatches.length > 0) return classMatches;
|
|
111
|
+
const sections = new Set();
|
|
112
|
+
for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
|
|
113
|
+
const section = a.closest('section');
|
|
114
|
+
if (section) sections.add(section);
|
|
115
|
+
}
|
|
116
|
+
return sections;
|
|
117
|
+
};
|
|
118
|
+
const countItems = () => {
|
|
119
|
+
let count = 0;
|
|
120
|
+
for (const el of collectNoteCards()) {
|
|
121
|
+
if (isVisibleNote(el)) count++;
|
|
122
|
+
}
|
|
123
|
+
return count;
|
|
124
|
+
};
|
|
125
|
+
|
|
126
|
+
let lastCount = countItems();
|
|
127
|
+
let plateauRounds = 0;
|
|
128
|
+
for (let i = 0; i < ${maxScrolls}; i++) {
|
|
129
|
+
if (countItems() >= ${targetCount}) break;
|
|
130
|
+
const lastHeight = document.body.scrollHeight;
|
|
131
|
+
window.scrollTo(0, lastHeight);
|
|
132
|
+
await new Promise((resolve) => {
|
|
133
|
+
let to;
|
|
134
|
+
const ob = new MutationObserver(() => {
|
|
135
|
+
if (document.body.scrollHeight > lastHeight) {
|
|
136
|
+
clearTimeout(to);
|
|
137
|
+
ob.disconnect();
|
|
138
|
+
setTimeout(resolve, 200);
|
|
139
|
+
}
|
|
140
|
+
});
|
|
141
|
+
ob.observe(document.body, { childList: true, subtree: true });
|
|
142
|
+
to = setTimeout(() => { ob.disconnect(); resolve(null); }, 2500);
|
|
143
|
+
});
|
|
144
|
+
const newCount = countItems();
|
|
145
|
+
if (newCount === lastCount) {
|
|
146
|
+
plateauRounds++;
|
|
147
|
+
if (plateauRounds >= 2) break;
|
|
148
|
+
} else {
|
|
149
|
+
plateauRounds = 0;
|
|
150
|
+
lastCount = newCount;
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
return countItems();
|
|
154
|
+
})()
|
|
155
|
+
`;
|
|
156
|
+
}
|
|
55
157
|
/**
|
|
56
158
|
* Build the search-result extraction IIFE. The web host is baked into the
|
|
57
159
|
* `normalizeUrl` fallback so relative `/explore/...` hrefs resolve to a full
|
|
@@ -70,13 +172,34 @@ export function buildSearchExtractJs(webHost) {
|
|
|
70
172
|
|
|
71
173
|
const cleanText = (value) => (value || '').replace(/\\s+/g, ' ').trim();
|
|
72
174
|
const stripXhsAuthorDateSuffix = ${stripXhsAuthorDateSuffix.toString()};
|
|
175
|
+
const isVisibleNote = (el) => {
|
|
176
|
+
const rect = el.getBoundingClientRect();
|
|
177
|
+
if (rect.width <= 0 || rect.height <= 0) return false;
|
|
178
|
+
const style = getComputedStyle(el);
|
|
179
|
+
return style.display !== 'none' && style.visibility !== 'hidden';
|
|
180
|
+
};
|
|
73
181
|
|
|
74
182
|
const results = [];
|
|
75
183
|
const seen = new Set();
|
|
76
184
|
|
|
77
|
-
|
|
185
|
+
// Note containers: legacy \`section.note-item\` first, fallback to any
|
|
186
|
+
// \`<section>\` wrapping a search-result/explore link (#1506 reports the
|
|
187
|
+
// class being dropped on some xhs renders).
|
|
188
|
+
const collectNoteCards = () => {
|
|
189
|
+
const classMatches = document.querySelectorAll('section.note-item');
|
|
190
|
+
if (classMatches.length > 0) return classMatches;
|
|
191
|
+
const sections = new Set();
|
|
192
|
+
for (const a of document.querySelectorAll('a[href*="/search_result/"], a[href*="/explore/"]')) {
|
|
193
|
+
const section = a.closest('section');
|
|
194
|
+
if (section) sections.add(section);
|
|
195
|
+
}
|
|
196
|
+
return sections;
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
for (const el of collectNoteCards()) {
|
|
78
200
|
// Skip "related searches" sections
|
|
79
|
-
if (el.classList
|
|
201
|
+
if (el.classList?.contains('query-note-item')) continue;
|
|
202
|
+
if (!isVisibleNote(el)) continue;
|
|
80
203
|
|
|
81
204
|
const titleEl = el.querySelector('.title, .note-title, a.title, .footer .title span');
|
|
82
205
|
const nameEl = el.querySelector('a.author .name, .author-name, .nick-name, .name');
|
|
@@ -96,20 +219,29 @@ export function buildSearchExtractJs(webHost) {
|
|
|
96
219
|
const authorLinkEl = el.querySelector('a.author, a[href*="/user/profile/"]');
|
|
97
220
|
|
|
98
221
|
const url = normalizeUrl(detailLinkEl?.getAttribute('href') || '');
|
|
99
|
-
if (!url)
|
|
222
|
+
if (!url) continue;
|
|
100
223
|
|
|
101
224
|
const key = url;
|
|
102
|
-
if (seen.has(key))
|
|
225
|
+
if (seen.has(key)) continue;
|
|
103
226
|
seen.add(key);
|
|
104
227
|
|
|
228
|
+
// Fallback title: the new bare-section render keeps the note caption
|
|
229
|
+
// inside the search_result anchor's first span, not in a class-named
|
|
230
|
+
// .title element. Pull from there when the class-based pick is empty.
|
|
231
|
+
let title = cleanText(titleEl?.textContent || '');
|
|
232
|
+
if (!title) {
|
|
233
|
+
const captionSpan = detailLinkEl?.querySelector('span');
|
|
234
|
+
title = cleanText(captionSpan?.textContent || '');
|
|
235
|
+
}
|
|
236
|
+
|
|
105
237
|
results.push({
|
|
106
|
-
title
|
|
238
|
+
title,
|
|
107
239
|
author,
|
|
108
240
|
likes: cleanText(likesEl?.textContent || '0'),
|
|
109
241
|
url,
|
|
110
242
|
author_url: normalizeUrl(authorLinkEl?.getAttribute('href') || ''),
|
|
111
243
|
});
|
|
112
|
-
}
|
|
244
|
+
}
|
|
113
245
|
|
|
114
246
|
return results;
|
|
115
247
|
})()
|
|
@@ -129,6 +261,7 @@ export const command = cli({
|
|
|
129
261
|
],
|
|
130
262
|
columns: ['rank', 'title', 'author', 'likes', 'published_at', 'url'],
|
|
131
263
|
func: async (page, kwargs) => {
|
|
264
|
+
const limit = parseLimit(kwargs.limit);
|
|
132
265
|
const keyword = encodeURIComponent(kwargs.query);
|
|
133
266
|
await page.goto(`https://www.xiaohongshu.com/search_result?keyword=${keyword}&source=web_search_result_notes`);
|
|
134
267
|
// Wait for search results to render (or login wall to appear).
|
|
@@ -138,13 +271,35 @@ export const command = cli({
|
|
|
138
271
|
if (waitResult === 'login_wall') {
|
|
139
272
|
throw new AuthRequiredError('www.xiaohongshu.com', 'Xiaohongshu search results are blocked behind a login wall');
|
|
140
273
|
}
|
|
141
|
-
//
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
274
|
+
// Extract before scrolling. Xiaohongshu uses a virtualized masonry
|
|
275
|
+
// layout, so scrolling to the bottom can evict the initially visible
|
|
276
|
+
// note cards from the DOM and make extraction return [] even though the
|
|
277
|
+
// browser rendered results correctly.
|
|
278
|
+
const initialPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
|
|
279
|
+
let payload = Array.isArray(initialPayload) ? initialPayload : [];
|
|
280
|
+
if (payload.length < limit) {
|
|
281
|
+
// Scroll until enough rows are rendered or the lazy-load plateaus.
|
|
282
|
+
// Replaces the previous fixed `autoScroll({ times: 2 })` which capped
|
|
283
|
+
// extraction at ~13 notes regardless of `--limit` (#1471).
|
|
284
|
+
await page.evaluate(buildScrollUntilJs(limit));
|
|
285
|
+
const scrolledPayload = await page.evaluate(buildSearchExtractJs('www.xiaohongshu.com'));
|
|
286
|
+
if (Array.isArray(scrolledPayload)) {
|
|
287
|
+
const seen = new Set(payload.map((item) => item.url).filter(Boolean));
|
|
288
|
+
for (const item of scrolledPayload) {
|
|
289
|
+
if (item?.url && seen.has(item.url))
|
|
290
|
+
continue;
|
|
291
|
+
if (item?.url)
|
|
292
|
+
seen.add(item.url);
|
|
293
|
+
payload.push(item);
|
|
294
|
+
if (payload.length >= limit)
|
|
295
|
+
break;
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
const data = payload;
|
|
145
300
|
return data
|
|
146
301
|
.filter((item) => item.title)
|
|
147
|
-
.slice(0,
|
|
302
|
+
.slice(0, limit)
|
|
148
303
|
.map((item, i) => ({
|
|
149
304
|
rank: i + 1,
|
|
150
305
|
...item,
|