@jackwener/opencli 1.7.17 → 1.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -8
- package/README.zh-CN.md +9 -8
- package/cli-manifest.json +585 -9
- package/clis/ctrip/ctrip.test.js +486 -1
- package/clis/ctrip/flight.js +136 -0
- package/clis/ctrip/hotel-search.js +132 -0
- package/clis/ctrip/utils.js +298 -0
- package/clis/doubao/utils.js +17 -0
- package/clis/doubao/utils.test.js +61 -0
- package/clis/google/search.js +16 -6
- package/clis/google-scholar/search.js +20 -5
- package/clis/google-scholar/search.test.js +35 -2
- package/clis/reddit/home.js +117 -0
- package/clis/reddit/home.test.js +127 -0
- package/clis/reddit/read.js +400 -54
- package/clis/reddit/read.test.js +315 -12
- package/clis/reddit/reply.js +182 -0
- package/clis/reddit/reply.test.js +89 -0
- package/clis/reddit/subreddit-info.js +117 -0
- package/clis/reddit/subreddit-info.test.js +163 -0
- package/clis/reddit/whoami.js +84 -0
- package/clis/reddit/whoami.test.js +105 -0
- package/clis/rednote/comments.js +76 -0
- package/clis/rednote/download.js +59 -0
- package/clis/rednote/feed.js +95 -0
- package/clis/rednote/navigation.test.js +26 -0
- package/clis/rednote/note.js +68 -0
- package/clis/rednote/notifications.js +139 -0
- package/clis/rednote/rednote.test.js +157 -0
- package/clis/rednote/search.js +101 -0
- package/clis/rednote/user.js +55 -0
- package/clis/twitter/bookmark-folder.js +3 -1
- package/clis/twitter/bookmarks.js +3 -1
- package/clis/twitter/followers.js +20 -5
- package/clis/twitter/followers.test.js +44 -0
- package/clis/twitter/following.js +36 -20
- package/clis/twitter/following.test.js +60 -8
- package/clis/twitter/likes.js +28 -13
- package/clis/twitter/likes.test.js +111 -1
- package/clis/twitter/list-add.js +128 -204
- package/clis/twitter/list-add.test.js +97 -1
- package/clis/twitter/list-tweets.js +13 -4
- package/clis/twitter/list-tweets.test.js +48 -0
- package/clis/twitter/lists.js +5 -2
- package/clis/twitter/post.js +23 -4
- package/clis/twitter/post.test.js +30 -0
- package/clis/twitter/profile.js +16 -8
- package/clis/twitter/profile.test.js +39 -0
- package/clis/twitter/reply.js +133 -10
- package/clis/twitter/reply.test.js +55 -0
- package/clis/twitter/search.js +188 -170
- package/clis/twitter/search.test.js +96 -258
- package/clis/twitter/shared.js +167 -16
- package/clis/twitter/shared.test.js +102 -1
- package/clis/twitter/timeline.js +3 -1
- package/clis/twitter/tweets.js +147 -51
- package/clis/twitter/tweets.test.js +238 -1
- package/clis/xiaohongshu/comments.js +57 -26
- package/clis/xiaohongshu/comments.test.js +63 -1
- package/clis/xiaohongshu/download.js +32 -23
- package/clis/xiaohongshu/feed.js +23 -15
- package/clis/xiaohongshu/note-helpers.js +16 -6
- package/clis/xiaohongshu/note.js +26 -20
- package/clis/xiaohongshu/notifications.js +26 -19
- package/clis/xiaohongshu/search.js +201 -37
- package/clis/xiaohongshu/search.test.js +82 -8
- package/clis/xiaohongshu/user-helpers.js +13 -4
- package/clis/xiaohongshu/user-helpers.test.js +20 -0
- package/clis/xiaohongshu/user.js +9 -4
- package/clis/xueqiu/earnings-date.js +2 -2
- package/clis/xueqiu/kline.js +2 -2
- package/clis/xueqiu/utils.js +19 -0
- package/clis/xueqiu/utils.test.js +26 -0
- package/clis/youtube/transcript.js +28 -3
- package/clis/youtube/transcript.test.js +90 -1
- package/clis/zhihu/answer-detail.js +233 -0
- package/clis/zhihu/answer-detail.test.js +330 -0
- package/clis/zhihu/question.js +44 -10
- package/clis/zhihu/question.test.js +78 -1
- package/clis/zhihu/recommend.js +103 -0
- package/clis/zhihu/recommend.test.js +143 -0
- package/dist/src/browser/base-page.d.ts +3 -2
- package/dist/src/browser/base-page.test.js +2 -2
- package/dist/src/browser/cdp.js +3 -3
- package/dist/src/browser/page.d.ts +3 -2
- package/dist/src/browser/page.js +4 -4
- package/dist/src/browser/page.test.js +31 -0
- package/dist/src/browser/utils.d.ts +10 -0
- package/dist/src/browser/utils.js +37 -0
- package/dist/src/browser/utils.test.d.ts +1 -0
- package/dist/src/browser/utils.test.js +29 -0
- package/dist/src/cli-argv-preprocess.d.ts +37 -0
- package/dist/src/cli-argv-preprocess.js +131 -0
- package/dist/src/cli-argv-preprocess.test.d.ts +1 -0
- package/dist/src/cli-argv-preprocess.test.js +130 -0
- package/dist/src/cli.js +123 -86
- package/dist/src/cli.test.js +32 -22
- package/dist/src/commands/daemon.js +6 -7
- package/dist/src/doctor.js +21 -17
- package/dist/src/doctor.test.js +2 -0
- package/dist/src/download/progress.js +15 -11
- package/dist/src/download/progress.test.d.ts +1 -0
- package/dist/src/download/progress.test.js +25 -0
- package/dist/src/execution.js +1 -3
- package/dist/src/execution.test.js +4 -16
- package/dist/src/help.d.ts +11 -0
- package/dist/src/help.js +46 -5
- package/dist/src/logger.js +8 -9
- package/dist/src/main.js +16 -0
- package/dist/src/output.js +4 -5
- package/dist/src/runtime-detect.d.ts +1 -1
- package/dist/src/runtime-detect.js +1 -1
- package/dist/src/runtime-detect.test.js +3 -2
- package/dist/src/tui.d.ts +0 -1
- package/dist/src/tui.js +9 -22
- package/dist/src/types.d.ts +3 -1
- package/dist/src/update-check.js +4 -5
- package/package.json +5 -4
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import { describe, expect, it } from 'vitest';
|
|
1
|
+
import { describe, expect, it, vi } from 'vitest';
|
|
2
2
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
+
import { ArgumentError, AuthRequiredError } from '@jackwener/opencli/errors';
|
|
3
4
|
import { __test__ } from './tweets.js';
|
|
4
5
|
|
|
5
6
|
describe('twitter tweets helpers', () => {
|
|
@@ -8,6 +9,140 @@ describe('twitter tweets helpers', () => {
|
|
|
8
9
|
expect(cmd?.columns).toEqual(['id', 'author', 'created_at', 'is_retweet', 'text', 'likes', 'retweets', 'replies', 'views', 'url', 'has_media', 'media_urls']);
|
|
9
10
|
});
|
|
10
11
|
|
|
12
|
+
it('makes the username argument optional so it can default to the logged-in user', () => {
|
|
13
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
14
|
+
const usernameArg = cmd?.args?.find((arg) => arg.name === 'username');
|
|
15
|
+
expect(usernameArg).toBeDefined();
|
|
16
|
+
expect(usernameArg?.required).not.toBe(true);
|
|
17
|
+
expect(usernameArg?.help || '').toMatch(/default/i);
|
|
18
|
+
expect(cmd?.description || '').toMatch(/default/i);
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it('detects the logged-in user via AppTabBar_Profile_Link when no username is given', async () => {
|
|
22
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
23
|
+
const evaluatedScripts = [];
|
|
24
|
+
const page = {
|
|
25
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
26
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
27
|
+
getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
|
|
28
|
+
evaluate: vi.fn(async (script) => {
|
|
29
|
+
const text = typeof script === 'function' ? script.toString() : String(script);
|
|
30
|
+
evaluatedScripts.push(text);
|
|
31
|
+
if (text.includes('AppTabBar_Profile_Link')) return '/viewer';
|
|
32
|
+
if (text.includes('operationName')) return null; // operation metadata resolver
|
|
33
|
+
if (text.includes('/UserByScreenName')) return '42';
|
|
34
|
+
if (text.includes('/UserTweets')) {
|
|
35
|
+
return {
|
|
36
|
+
data: {
|
|
37
|
+
user: {
|
|
38
|
+
result: {
|
|
39
|
+
timeline_v2: {
|
|
40
|
+
timeline: {
|
|
41
|
+
instructions: [
|
|
42
|
+
{
|
|
43
|
+
entries: [
|
|
44
|
+
{
|
|
45
|
+
entryId: 'tweet-1',
|
|
46
|
+
content: {
|
|
47
|
+
itemContent: {
|
|
48
|
+
tweet_results: {
|
|
49
|
+
result: {
|
|
50
|
+
rest_id: '1',
|
|
51
|
+
legacy: {
|
|
52
|
+
full_text: 'own post',
|
|
53
|
+
favorite_count: 0,
|
|
54
|
+
retweet_count: 0,
|
|
55
|
+
reply_count: 0,
|
|
56
|
+
created_at: 'now',
|
|
57
|
+
},
|
|
58
|
+
core: {
|
|
59
|
+
user_results: {
|
|
60
|
+
result: {
|
|
61
|
+
legacy: { screen_name: 'viewer', name: 'Viewer' },
|
|
62
|
+
},
|
|
63
|
+
},
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
},
|
|
67
|
+
},
|
|
68
|
+
},
|
|
69
|
+
},
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
},
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
}
|
|
80
|
+
return null;
|
|
81
|
+
}),
|
|
82
|
+
};
|
|
83
|
+
const rows = await cmd.func(page, { limit: 1 });
|
|
84
|
+
// Navigated home to read the logged-in user
|
|
85
|
+
expect(page.goto).toHaveBeenCalledWith('https://x.com/home');
|
|
86
|
+
// AppTabBar_Profile_Link probe happened before any GraphQL fetch
|
|
87
|
+
const probeIdx = evaluatedScripts.findIndex((t) => t.includes('AppTabBar_Profile_Link'));
|
|
88
|
+
const graphqlIdx = evaluatedScripts.findIndex((t) => t.includes('/UserByScreenName'));
|
|
89
|
+
expect(probeIdx).toBeGreaterThanOrEqual(0);
|
|
90
|
+
expect(graphqlIdx).toBeGreaterThan(probeIdx);
|
|
91
|
+
// The detected handle ('viewer') was used for the UserByScreenName lookup
|
|
92
|
+
const lookup = evaluatedScripts.find((t) => t.includes('/UserByScreenName')) || '';
|
|
93
|
+
expect(decodeURIComponent(lookup)).toContain('"screen_name":"viewer"');
|
|
94
|
+
expect(rows).toHaveLength(1);
|
|
95
|
+
expect(rows[0]).toMatchObject({ id: '1', author: 'viewer', url: 'https://x.com/viewer/status/1' });
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
it('throws AuthRequiredError when no username is given and the logged-in user cannot be detected', async () => {
|
|
99
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
100
|
+
const page = {
|
|
101
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
102
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
103
|
+
getCookies: vi.fn(async () => []),
|
|
104
|
+
evaluate: vi.fn(async (script) => {
|
|
105
|
+
const text = typeof script === 'function' ? script.toString() : String(script);
|
|
106
|
+
if (text.includes('AppTabBar_Profile_Link')) return null;
|
|
107
|
+
return null;
|
|
108
|
+
}),
|
|
109
|
+
};
|
|
110
|
+
await expect(cmd.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('rejects invalid explicit username before navigation', async () => {
|
|
114
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
115
|
+
const page = {
|
|
116
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
117
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
118
|
+
getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
|
|
119
|
+
evaluate: vi.fn(),
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
await expect(cmd.func(page, { username: 'viewer/extra' })).rejects.toBeInstanceOf(ArgumentError);
|
|
123
|
+
expect(page.goto).not.toHaveBeenCalled();
|
|
124
|
+
expect(page.getCookies).not.toHaveBeenCalled();
|
|
125
|
+
expect(page.evaluate).not.toHaveBeenCalled();
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
it('rejects non-profile AppTabBar hrefs instead of querying route names as users', async () => {
|
|
129
|
+
const cmd = getRegistry().get('twitter/tweets');
|
|
130
|
+
const page = {
|
|
131
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
132
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
133
|
+
getCookies: vi.fn(async () => [{ name: 'ct0', value: 'token' }]),
|
|
134
|
+
evaluate: vi.fn(async (script) => {
|
|
135
|
+
const text = typeof script === 'function' ? script.toString() : String(script);
|
|
136
|
+
if (text.includes('AppTabBar_Profile_Link')) return '/home';
|
|
137
|
+
throw new Error(`Unexpected evaluate: ${text.slice(0, 80)}`);
|
|
138
|
+
}),
|
|
139
|
+
};
|
|
140
|
+
|
|
141
|
+
await expect(cmd.func(page, {})).rejects.toBeInstanceOf(AuthRequiredError);
|
|
142
|
+
expect(page.goto).toHaveBeenCalledWith('https://x.com/home');
|
|
143
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
144
|
+
});
|
|
145
|
+
|
|
11
146
|
it('falls back when queryId contains unsafe characters', () => {
|
|
12
147
|
expect(__test__.sanitizeQueryId('safe_Query-123', 'fallback')).toBe('safe_Query-123');
|
|
13
148
|
expect(__test__.sanitizeQueryId('bad"id', 'fallback')).toBe('fallback');
|
|
@@ -60,6 +195,18 @@ describe('twitter tweets helpers', () => {
|
|
|
60
195
|
expect(b.is_retweet).toBe(true);
|
|
61
196
|
});
|
|
62
197
|
|
|
198
|
+
it('unwraps TweetWithVisibilityResults', () => {
|
|
199
|
+
const tweet = __test__.extractTweet({
|
|
200
|
+
__typename: 'TweetWithVisibilityResults',
|
|
201
|
+
tweet: {
|
|
202
|
+
rest_id: '42',
|
|
203
|
+
legacy: { full_text: 'visible post', favorite_count: 2, retweet_count: 0, reply_count: 0, created_at: 'now' },
|
|
204
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
205
|
+
},
|
|
206
|
+
}, new Set());
|
|
207
|
+
expect(tweet).toMatchObject({ id: '42', author: 'alice', text: 'visible post' });
|
|
208
|
+
});
|
|
209
|
+
|
|
63
210
|
it('parses chronological tweets and skips pinned instruction', () => {
|
|
64
211
|
const chronEntry = {
|
|
65
212
|
entryId: 'tweet-1',
|
|
@@ -122,4 +269,94 @@ describe('twitter tweets helpers', () => {
|
|
|
122
269
|
url: 'https://x.com/alice/status/1',
|
|
123
270
|
});
|
|
124
271
|
});
|
|
272
|
+
|
|
273
|
+
it('recursively parses tweets nested in timeline modules', () => {
|
|
274
|
+
const payload = {
|
|
275
|
+
data: {
|
|
276
|
+
user: {
|
|
277
|
+
result: {
|
|
278
|
+
timeline_v2: {
|
|
279
|
+
timeline: {
|
|
280
|
+
instructions: [
|
|
281
|
+
{
|
|
282
|
+
type: 'TimelineAddEntries',
|
|
283
|
+
entries: [
|
|
284
|
+
{
|
|
285
|
+
entryId: 'profile-conversation-1',
|
|
286
|
+
content: {
|
|
287
|
+
entryType: 'TimelineTimelineModule',
|
|
288
|
+
items: [
|
|
289
|
+
{
|
|
290
|
+
item: {
|
|
291
|
+
itemContent: {
|
|
292
|
+
tweet_results: {
|
|
293
|
+
result: {
|
|
294
|
+
rest_id: '2',
|
|
295
|
+
legacy: { full_text: 'nested post', favorite_count: 1, retweet_count: 0, reply_count: 0, created_at: 'now' },
|
|
296
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
297
|
+
},
|
|
298
|
+
},
|
|
299
|
+
},
|
|
300
|
+
},
|
|
301
|
+
},
|
|
302
|
+
],
|
|
303
|
+
},
|
|
304
|
+
},
|
|
305
|
+
{
|
|
306
|
+
entryId: 'cursor-bottom-2',
|
|
307
|
+
content: { entryType: 'TimelineTimelineCursor', cursorType: 'Bottom', value: 'next' },
|
|
308
|
+
},
|
|
309
|
+
],
|
|
310
|
+
},
|
|
311
|
+
],
|
|
312
|
+
},
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
},
|
|
316
|
+
},
|
|
317
|
+
};
|
|
318
|
+
const result = __test__.parseUserTweets(payload, new Set());
|
|
319
|
+
expect(result.nextCursor).toBe('next');
|
|
320
|
+
expect(result.tweets).toHaveLength(1);
|
|
321
|
+
expect(result.tweets[0]).toMatchObject({ id: '2', text: 'nested post' });
|
|
322
|
+
});
|
|
323
|
+
|
|
324
|
+
it('uses populated timeline instructions when timeline_v2 is present but empty', () => {
|
|
325
|
+
const payload = {
|
|
326
|
+
data: {
|
|
327
|
+
user: {
|
|
328
|
+
result: {
|
|
329
|
+
timeline_v2: { timeline: { instructions: [] } },
|
|
330
|
+
timeline: {
|
|
331
|
+
timeline: {
|
|
332
|
+
instructions: [
|
|
333
|
+
{
|
|
334
|
+
type: 'TimelineAddEntries',
|
|
335
|
+
entries: [
|
|
336
|
+
{
|
|
337
|
+
content: {
|
|
338
|
+
itemContent: {
|
|
339
|
+
tweet_results: {
|
|
340
|
+
result: {
|
|
341
|
+
rest_id: '3',
|
|
342
|
+
legacy: { full_text: 'fallback timeline post', favorite_count: 0, retweet_count: 0, reply_count: 0, created_at: 'now' },
|
|
343
|
+
core: { user_results: { result: { legacy: { screen_name: 'alice', name: 'Alice' } } } },
|
|
344
|
+
},
|
|
345
|
+
},
|
|
346
|
+
},
|
|
347
|
+
},
|
|
348
|
+
},
|
|
349
|
+
],
|
|
350
|
+
},
|
|
351
|
+
],
|
|
352
|
+
},
|
|
353
|
+
},
|
|
354
|
+
},
|
|
355
|
+
},
|
|
356
|
+
},
|
|
357
|
+
};
|
|
358
|
+
const result = __test__.parseUserTweets(payload, new Set());
|
|
359
|
+
expect(result.tweets).toHaveLength(1);
|
|
360
|
+
expect(result.tweets[0]).toMatchObject({ id: '3', text: 'fallback timeline post' });
|
|
361
|
+
});
|
|
125
362
|
});
|
|
@@ -8,34 +8,40 @@
|
|
|
8
8
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
9
9
|
import { AuthRequiredError, CliError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
10
10
|
import { parseNoteId, buildNoteUrl } from './note-helpers.js';
|
|
11
|
-
function parseCommentLimit(raw, fallback = 20) {
|
|
11
|
+
export function parseCommentLimit(raw, fallback = 20) {
|
|
12
12
|
const n = Number(raw);
|
|
13
13
|
if (!Number.isFinite(n))
|
|
14
14
|
return fallback;
|
|
15
15
|
return Math.max(1, Math.min(Math.floor(n), 50));
|
|
16
16
|
}
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
],
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
17
|
+
|
|
18
|
+
export function parseXhsLikeCountText(value) {
|
|
19
|
+
const integerRe = /^(?:\d+|\d{1,3}(?:[,,]\d{3})+)\+?$/u;
|
|
20
|
+
const shortformRe = /^((?:\d+|\d{1,3}(?:[,,]\d{3})+)(?:\.\d+)?)([wWkK万千])\+?$/u;
|
|
21
|
+
const raw = String(value ?? '').replace(/\s+/g, '');
|
|
22
|
+
if (!raw)
|
|
23
|
+
return 0;
|
|
24
|
+
if (integerRe.test(raw))
|
|
25
|
+
return Number(raw.replace(/[,+,]/g, ''));
|
|
26
|
+
const short = raw.match(shortformRe);
|
|
27
|
+
if (!short)
|
|
28
|
+
return 0;
|
|
29
|
+
const numeric = Number(short[1].replace(/[,,]/g, ''));
|
|
30
|
+
if (!Number.isFinite(numeric))
|
|
31
|
+
return 0;
|
|
32
|
+
const unit = short[2].toLowerCase();
|
|
33
|
+
const multiplier = unit === 'w' || unit === '万' ? 10000 : 1000;
|
|
34
|
+
return Math.round(numeric * multiplier);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Host-agnostic IIFE that scrolls a note's comment list and extracts
|
|
39
|
+
* top-level comments (and optionally nested 楼中楼 replies). Exported so
|
|
40
|
+
* the rednote adapter can reuse the exact same selector chain.
|
|
41
|
+
*/
|
|
42
|
+
export function buildCommentsExtractJs(withReplies) {
|
|
43
|
+
const parseLikeCountText = parseXhsLikeCountText.toString();
|
|
44
|
+
return `
|
|
39
45
|
(async () => {
|
|
40
46
|
const wait = (ms) => new Promise(r => setTimeout(r, ms))
|
|
41
47
|
const withReplies = ${withReplies}
|
|
@@ -59,9 +65,9 @@ cli({
|
|
|
59
65
|
}
|
|
60
66
|
|
|
61
67
|
const clean = (el) => (el?.textContent || '').replace(/\\s+/g, ' ').trim()
|
|
68
|
+
const parseLikeCountText = ${parseLikeCountText}
|
|
62
69
|
const parseLikes = (el) => {
|
|
63
|
-
|
|
64
|
-
return /^\\d+$/.test(raw) ? Number(raw) : 0
|
|
70
|
+
return parseLikeCountText(clean(el))
|
|
65
71
|
}
|
|
66
72
|
const expandReplyThreads = async (root) => {
|
|
67
73
|
if (!withReplies || !root) return
|
|
@@ -115,7 +121,30 @@ cli({
|
|
|
115
121
|
|
|
116
122
|
return { pageUrl: location.href, securityBlock, loginWall, results }
|
|
117
123
|
})()
|
|
118
|
-
|
|
124
|
+
`;
|
|
125
|
+
}
|
|
126
|
+
export const command = cli({
|
|
127
|
+
site: 'xiaohongshu',
|
|
128
|
+
name: 'comments',
|
|
129
|
+
access: 'read',
|
|
130
|
+
description: '获取小红书笔记评论(支持楼中楼子回复)',
|
|
131
|
+
domain: 'www.xiaohongshu.com',
|
|
132
|
+
strategy: Strategy.COOKIE,
|
|
133
|
+
navigateBefore: false,
|
|
134
|
+
args: [
|
|
135
|
+
{ name: 'note-id', required: true, positional: true, help: 'Full Xiaohongshu note URL with xsec_token' },
|
|
136
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Number of top-level comments (max 50)' },
|
|
137
|
+
{ name: 'with-replies', type: 'boolean', default: false, help: 'Include nested replies (楼中楼)' },
|
|
138
|
+
],
|
|
139
|
+
columns: ['rank', 'author', 'text', 'likes', 'time', 'is_reply', 'reply_to'],
|
|
140
|
+
func: async (page, kwargs) => {
|
|
141
|
+
const limit = parseCommentLimit(kwargs.limit);
|
|
142
|
+
const withReplies = Boolean(kwargs['with-replies']);
|
|
143
|
+
const raw = String(kwargs['note-id']);
|
|
144
|
+
const noteId = parseNoteId(raw);
|
|
145
|
+
await page.goto(buildNoteUrl(raw, { commandName: 'xiaohongshu comments' }));
|
|
146
|
+
await page.wait({ time: 2 + Math.random() * 3 });
|
|
147
|
+
const data = await page.evaluate(buildCommentsExtractJs(withReplies));
|
|
119
148
|
if (!data || typeof data !== 'object') {
|
|
120
149
|
throw new EmptyResultError('xiaohongshu/comments', 'Unexpected evaluate response');
|
|
121
150
|
}
|
|
@@ -127,6 +156,8 @@ cli({
|
|
|
127
156
|
if (data.loginWall) {
|
|
128
157
|
throw new AuthRequiredError('www.xiaohongshu.com', 'Note comments require login');
|
|
129
158
|
}
|
|
159
|
+
// noteId currently unused after parsing — kept for symmetry with the note command
|
|
160
|
+
void noteId;
|
|
130
161
|
const all = data.results ?? [];
|
|
131
162
|
// When limiting, count only top-level comments; their replies are included for free
|
|
132
163
|
if (withReplies) {
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { describe, expect, it, vi } from 'vitest';
|
|
2
|
+
import { JSDOM } from 'jsdom';
|
|
2
3
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
3
|
-
import './comments.js';
|
|
4
|
+
import { buildCommentsExtractJs, parseXhsLikeCountText } from './comments.js';
|
|
4
5
|
function createPageMock(evaluateResult) {
|
|
5
6
|
return {
|
|
6
7
|
goto: vi.fn().mockResolvedValue(undefined),
|
|
@@ -25,6 +26,41 @@ function createPageMock(evaluateResult) {
|
|
|
25
26
|
waitForCapture: vi.fn().mockResolvedValue(undefined),
|
|
26
27
|
};
|
|
27
28
|
}
|
|
29
|
+
|
|
30
|
+
async function runCommentsExtract(html) {
|
|
31
|
+
const dom = new JSDOM(html, { url: 'https://www.xiaohongshu.com/search_result/abc123?xsec_token=tok' });
|
|
32
|
+
const previousDocument = globalThis.document;
|
|
33
|
+
const previousLocation = globalThis.location;
|
|
34
|
+
globalThis.document = dom.window.document;
|
|
35
|
+
globalThis.location = dom.window.location;
|
|
36
|
+
try {
|
|
37
|
+
return await eval(buildCommentsExtractJs(false));
|
|
38
|
+
} finally {
|
|
39
|
+
globalThis.document = previousDocument;
|
|
40
|
+
globalThis.location = previousLocation;
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
describe('parseXhsLikeCountText', () => {
|
|
45
|
+
it('parses exact integer and shortform like counts', () => {
|
|
46
|
+
expect(parseXhsLikeCountText('0')).toBe(0);
|
|
47
|
+
expect(parseXhsLikeCountText('42')).toBe(42);
|
|
48
|
+
expect(parseXhsLikeCountText('1,234')).toBe(1234);
|
|
49
|
+
expect(parseXhsLikeCountText('1,234+')).toBe(1234);
|
|
50
|
+
expect(parseXhsLikeCountText('2.1w')).toBe(21000);
|
|
51
|
+
expect(parseXhsLikeCountText('1.5万')).toBe(15000);
|
|
52
|
+
expect(parseXhsLikeCountText('1.2k')).toBe(1200);
|
|
53
|
+
expect(parseXhsLikeCountText('3千')).toBe(3000);
|
|
54
|
+
expect(parseXhsLikeCountText(' 2.1 w + ')).toBe(21000);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('returns 0 for unknown shapes without overparsing arbitrary text', () => {
|
|
58
|
+
for (const raw of ['', null, undefined, '赞', 'likes 2.1w', '2w人', '1,23', '1.2.3k', '.', '1.5']) {
|
|
59
|
+
expect(parseXhsLikeCountText(raw)).toBe(0);
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
|
|
28
64
|
describe('xiaohongshu comments', () => {
|
|
29
65
|
const command = getRegistry().get('xiaohongshu/comments');
|
|
30
66
|
it('returns ranked comment rows for signed full URLs', async () => {
|
|
@@ -120,6 +156,32 @@ describe('xiaohongshu comments', () => {
|
|
|
120
156
|
expect(script).toContain("const afterCount = scroller.querySelectorAll('.parent-comment').length");
|
|
121
157
|
expect(script).toContain('if (afterCount <= beforeCount) break');
|
|
122
158
|
});
|
|
159
|
+
it('extracts shortform like counts from the shared xiaohongshu/rednote DOM script', async () => {
|
|
160
|
+
const data = await runCommentsExtract(`
|
|
161
|
+
<main>
|
|
162
|
+
<section class="parent-comment">
|
|
163
|
+
<div class="comment-item">
|
|
164
|
+
<div class="author-wrapper"><span class="name">Alice</span></div>
|
|
165
|
+
<div class="content">Great note</div>
|
|
166
|
+
<span class="count">2.1w</span>
|
|
167
|
+
<span class="date">today</span>
|
|
168
|
+
</div>
|
|
169
|
+
</section>
|
|
170
|
+
<section class="parent-comment">
|
|
171
|
+
<div class="comment-item">
|
|
172
|
+
<span class="user-name">Bob</span>
|
|
173
|
+
<div class="note-text">Malformed count</div>
|
|
174
|
+
<span class="count">likes 2.1w</span>
|
|
175
|
+
</div>
|
|
176
|
+
</section>
|
|
177
|
+
</main>
|
|
178
|
+
`);
|
|
179
|
+
|
|
180
|
+
expect(data.results).toEqual([
|
|
181
|
+
{ author: 'Alice', text: 'Great note', likes: 21000, time: 'today', is_reply: false, reply_to: '' },
|
|
182
|
+
{ author: 'Bob', text: 'Malformed count', likes: 0, time: '', is_reply: false, reply_to: '' },
|
|
183
|
+
]);
|
|
184
|
+
});
|
|
123
185
|
it('respects the limit for top-level comments', async () => {
|
|
124
186
|
const manyComments = Array.from({ length: 10 }, (_, i) => ({
|
|
125
187
|
author: `User${i}`,
|
|
@@ -11,27 +11,15 @@ import { formatCookieHeader } from '@jackwener/opencli/download';
|
|
|
11
11
|
import { downloadMedia } from '@jackwener/opencli/download/media-download';
|
|
12
12
|
import { CliError } from '@jackwener/opencli/errors';
|
|
13
13
|
import { buildNoteUrl, parseNoteId } from './note-helpers.js';
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
{ name: 'note-id', positional: true, required: true, help: 'Full Xiaohongshu note URL with xsec_token, or xhslink short link' },
|
|
24
|
-
{ name: 'output', default: './xiaohongshu-downloads', help: 'Output directory' },
|
|
25
|
-
],
|
|
26
|
-
columns: ['index', 'type', 'status', 'size'],
|
|
27
|
-
func: async (page, kwargs) => {
|
|
28
|
-
const rawInput = String(kwargs['note-id']);
|
|
29
|
-
const output = kwargs.output;
|
|
30
|
-
const noteId = parseNoteId(rawInput);
|
|
31
|
-
await page.goto(buildNoteUrl(rawInput, { allowShortLink: true, commandName: 'xiaohongshu download' }));
|
|
32
|
-
await page.wait({ time: 1 + Math.random() * 2 });
|
|
33
|
-
// Extract note info and media URLs
|
|
34
|
-
const data = await page.evaluate(`
|
|
14
|
+
/**
|
|
15
|
+
* Build the media-extraction IIFE. The note id is interpolated as a default
|
|
16
|
+
* since the IIFE may also resolve it from `location.pathname`. The CDN
|
|
17
|
+
* substring allowlist includes `rednote` so the rednote adapter can reuse
|
|
18
|
+
* this script unchanged — image / video URLs on both sites are served from
|
|
19
|
+
* the same xhscdn family per #1136.
|
|
20
|
+
*/
|
|
21
|
+
export function buildDownloadExtractJs(noteId) {
|
|
22
|
+
return `
|
|
35
23
|
(() => {
|
|
36
24
|
const bodyText = document.body?.innerText || '';
|
|
37
25
|
const result = {
|
|
@@ -79,7 +67,7 @@ cli({
|
|
|
79
67
|
for (const selector of imageSelectors) {
|
|
80
68
|
document.querySelectorAll(selector).forEach(img => {
|
|
81
69
|
let src = img.src || img.getAttribute('data-src') || '';
|
|
82
|
-
if (src && (src.includes('xhscdn') || src.includes('xiaohongshu'))) {
|
|
70
|
+
if (src && (src.includes('xhscdn') || src.includes('xiaohongshu') || src.includes('rednote'))) {
|
|
83
71
|
src = src.split('?')[0];
|
|
84
72
|
src = src.replace(/\\/imageView\\d+\\/\\d+\\/w\\/\\d+/, '');
|
|
85
73
|
imageUrls.add(src);
|
|
@@ -154,7 +142,28 @@ cli({
|
|
|
154
142
|
|
|
155
143
|
return result;
|
|
156
144
|
})()
|
|
157
|
-
|
|
145
|
+
`;
|
|
146
|
+
}
|
|
147
|
+
export const command = cli({
|
|
148
|
+
site: 'xiaohongshu',
|
|
149
|
+
name: 'download',
|
|
150
|
+
access: 'read',
|
|
151
|
+
description: '下载小红书笔记中的图片和视频',
|
|
152
|
+
domain: 'www.xiaohongshu.com',
|
|
153
|
+
strategy: Strategy.COOKIE,
|
|
154
|
+
navigateBefore: false,
|
|
155
|
+
args: [
|
|
156
|
+
{ name: 'note-id', positional: true, required: true, help: 'Full Xiaohongshu note URL with xsec_token, or xhslink short link' },
|
|
157
|
+
{ name: 'output', default: './xiaohongshu-downloads', help: 'Output directory' },
|
|
158
|
+
],
|
|
159
|
+
columns: ['index', 'type', 'status', 'size'],
|
|
160
|
+
func: async (page, kwargs) => {
|
|
161
|
+
const rawInput = String(kwargs['note-id']);
|
|
162
|
+
const output = kwargs.output;
|
|
163
|
+
const noteId = parseNoteId(rawInput);
|
|
164
|
+
await page.goto(buildNoteUrl(rawInput, { allowShortLink: true, commandName: 'xiaohongshu download' }));
|
|
165
|
+
await page.wait({ time: 1 + Math.random() * 2 });
|
|
166
|
+
const data = await page.evaluate(buildDownloadExtractJs(noteId));
|
|
158
167
|
if (data?.securityBlock) {
|
|
159
168
|
throw new CliError('SECURITY_BLOCK', 'Xiaohongshu security block: the note detail page was blocked by risk control.', /^https?:\/\//.test(rawInput)
|
|
160
169
|
? 'The page may be temporarily restricted. Try again later or from a different session.'
|
package/clis/xiaohongshu/feed.js
CHANGED
|
@@ -1,18 +1,12 @@
|
|
|
1
1
|
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
args: [
|
|
11
|
-
{ name: 'limit', type: 'int', default: 20, help: 'Number of items to return' },
|
|
12
|
-
],
|
|
13
|
-
columns: ['id', 'title', 'author', 'likes', 'type', 'url'],
|
|
14
|
-
pipeline: [
|
|
15
|
-
{ navigate: 'https://www.xiaohongshu.com/explore' },
|
|
2
|
+
/**
|
|
3
|
+
* Build the home-feed pipeline for the given web host. Exported so the
|
|
4
|
+
* rednote adapter can register the same pipeline against www.rednote.com
|
|
5
|
+
* without duplicating the tap/map/limit steps.
|
|
6
|
+
*/
|
|
7
|
+
export function buildFeedPipeline(webHost) {
|
|
8
|
+
return [
|
|
9
|
+
{ navigate: `https://${webHost}/explore` },
|
|
16
10
|
{ tap: {
|
|
17
11
|
store: 'feed',
|
|
18
12
|
action: 'fetchFeeds',
|
|
@@ -26,8 +20,22 @@ cli({
|
|
|
26
20
|
type: '${{ item.note_card.type }}',
|
|
27
21
|
author: '${{ item.note_card.user.nickname }}',
|
|
28
22
|
likes: '${{ item.note_card.interact_info.liked_count }}',
|
|
29
|
-
url:
|
|
23
|
+
url: `https://${webHost}/explore/\${{ item.id }}`,
|
|
30
24
|
} },
|
|
31
25
|
{ limit: '${{ args.limit | default(20) }}' },
|
|
26
|
+
];
|
|
27
|
+
}
|
|
28
|
+
export const command = cli({
|
|
29
|
+
site: 'xiaohongshu',
|
|
30
|
+
name: 'feed',
|
|
31
|
+
access: 'read',
|
|
32
|
+
description: '小红书首页推荐 Feed (via Pinia Store Action)',
|
|
33
|
+
domain: 'www.xiaohongshu.com',
|
|
34
|
+
strategy: Strategy.INTERCEPT,
|
|
35
|
+
browser: true,
|
|
36
|
+
args: [
|
|
37
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Number of items to return' },
|
|
32
38
|
],
|
|
39
|
+
columns: ['id', 'title', 'author', 'likes', 'type', 'url'],
|
|
40
|
+
pipeline: buildFeedPipeline('www.xiaohongshu.com'),
|
|
33
41
|
});
|
|
@@ -14,9 +14,9 @@ function isShortLink(input) {
|
|
|
14
14
|
return /^https?:\/\/xhslink\.com\//i.test(input);
|
|
15
15
|
}
|
|
16
16
|
|
|
17
|
-
function
|
|
17
|
+
function isHostMatch(hostname, cookieRoot) {
|
|
18
18
|
const normalized = hostname.toLowerCase();
|
|
19
|
-
return normalized ===
|
|
19
|
+
return normalized === cookieRoot || normalized.endsWith('.' + cookieRoot);
|
|
20
20
|
}
|
|
21
21
|
|
|
22
22
|
function isSupportedNotePath(pathname) {
|
|
@@ -30,14 +30,24 @@ function isSupportedNotePath(pathname) {
|
|
|
30
30
|
* XHS note detail pages now require a valid signed URL for reliable access.
|
|
31
31
|
* Bare note IDs no longer resolve deterministically, so callers must provide
|
|
32
32
|
* a full note URL with xsec_token or, for downloads only, an xhslink short link.
|
|
33
|
+
*
|
|
34
|
+
* `options.cookieRoot` overrides the default `xiaohongshu.com` cookie root —
|
|
35
|
+
* the rednote adapter passes `'rednote.com'` so the same validator accepts
|
|
36
|
+
* `www.rednote.com` URLs without duplicating this function.
|
|
37
|
+
* `options.signedUrlHint` overrides the default hint surfaced on rejection.
|
|
33
38
|
*/
|
|
34
39
|
export function buildNoteUrl(input, options = {}) {
|
|
35
|
-
const {
|
|
40
|
+
const {
|
|
41
|
+
allowShortLink = false,
|
|
42
|
+
commandName = 'xiaohongshu note',
|
|
43
|
+
cookieRoot = 'xiaohongshu.com',
|
|
44
|
+
signedUrlHint = XHS_SIGNED_URL_HINT,
|
|
45
|
+
} = options;
|
|
36
46
|
const trimmed = input.trim();
|
|
37
47
|
const message = `${commandName} now requires a full signed URL`;
|
|
38
48
|
const hint = allowShortLink
|
|
39
|
-
? `${
|
|
40
|
-
:
|
|
49
|
+
? `${signedUrlHint} For downloads, xhslink short links are also supported.`
|
|
50
|
+
: signedUrlHint;
|
|
41
51
|
|
|
42
52
|
if (/^https?:\/\//.test(trimmed)) {
|
|
43
53
|
if (isShortLink(trimmed)) {
|
|
@@ -48,7 +58,7 @@ export function buildNoteUrl(input, options = {}) {
|
|
|
48
58
|
try {
|
|
49
59
|
const url = new URL(trimmed);
|
|
50
60
|
const xsecToken = url.searchParams.get('xsec_token')?.trim();
|
|
51
|
-
if (
|
|
61
|
+
if (isHostMatch(url.hostname, cookieRoot) && isSupportedNotePath(url.pathname) && xsecToken) {
|
|
52
62
|
return trimmed;
|
|
53
63
|
}
|
|
54
64
|
}
|