@jackwener/opencli 1.7.22 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -148
- package/README.zh-CN.md +37 -211
- package/cli-manifest.json +6423 -4260
- package/clis/12306/me.js +73 -0
- package/clis/12306/orders.js +96 -0
- package/clis/12306/passengers.js +90 -0
- package/clis/12306/price.js +166 -0
- package/clis/12306/stations.js +66 -0
- package/clis/12306/train.js +91 -0
- package/clis/12306/trains.js +119 -0
- package/clis/12306/utils.js +272 -0
- package/clis/12306/utils.test.js +331 -0
- package/clis/36kr/article.js +6 -3
- package/clis/36kr/article.test.js +46 -0
- package/clis/apple-podcasts/commands.test.js +20 -0
- package/clis/apple-podcasts/search.js +2 -2
- package/clis/barchart/greeks.js +144 -56
- package/clis/barchart/greeks.test.js +138 -0
- package/clis/bilibili/summary.js +167 -0
- package/clis/bilibili/summary.test.js +210 -0
- package/clis/booking/booking.test.js +356 -0
- package/clis/booking/search.js +351 -0
- package/clis/chatgpt/envelope.test.js +108 -0
- package/clis/chatgpt/image.js +2 -2
- package/clis/chatgpt/image.test.js +6 -0
- package/clis/chatgpt/utils.js +148 -41
- package/clis/chatgpt/utils.test.js +92 -2
- package/clis/douyin/_shared/browser-fetch.js +44 -20
- package/clis/douyin/_shared/browser-fetch.test.js +22 -1
- package/clis/douyin/_shared/evaluate-result.js +16 -0
- package/clis/douyin/_shared/tos-upload.js +105 -69
- package/clis/douyin/_shared/vod-upload.js +212 -0
- package/clis/douyin/_shared/vod-upload.test.js +38 -0
- package/clis/douyin/delete.js +137 -4
- package/clis/douyin/delete.test.js +90 -1
- package/clis/douyin/publish-upload-id.test.js +170 -0
- package/clis/douyin/publish.js +88 -42
- package/clis/douyin/user-videos.js +9 -2
- package/clis/douyin/user-videos.test.js +43 -0
- package/clis/flomo/memos.js +228 -0
- package/clis/flomo/memos.test.js +144 -0
- package/clis/gitee/search.js +2 -2
- package/clis/gitee/search.test.js +65 -0
- package/clis/jike/post.js +27 -17
- package/clis/jike/read.test.js +86 -0
- package/clis/jike/topic.js +32 -19
- package/clis/jike/user.js +33 -20
- package/clis/lesswrong/comments.js +1 -1
- package/clis/lesswrong/curated.js +1 -1
- package/clis/lesswrong/frontpage.js +1 -1
- package/clis/lesswrong/frontpage.test.js +37 -0
- package/clis/lesswrong/new.js +1 -1
- package/clis/lesswrong/read.js +1 -1
- package/clis/lesswrong/sequences.js +1 -1
- package/clis/lesswrong/shortform.js +1 -1
- package/clis/lesswrong/tag.js +1 -1
- package/clis/lesswrong/top-month.js +1 -1
- package/clis/lesswrong/top-week.js +1 -1
- package/clis/lesswrong/top-year.js +1 -1
- package/clis/lesswrong/top.js +1 -1
- package/clis/linkedin/connect.js +401 -0
- package/clis/linkedin/connect.test.js +213 -0
- package/clis/linkedin/inbox.js +234 -0
- package/clis/linkedin/inbox.test.js +152 -0
- package/clis/linkedin/people-search.js +262 -0
- package/clis/linkedin/people-search.test.js +216 -0
- package/clis/linkedin/safe-send.js +357 -0
- package/clis/linkedin/safe-send.test.js +204 -0
- package/clis/linkedin/salesnav-inbox.js +210 -0
- package/clis/linkedin/salesnav-inbox.test.js +113 -0
- package/clis/linkedin/salesnav-message.js +360 -0
- package/clis/linkedin/salesnav-message.test.js +172 -0
- package/clis/linkedin/salesnav-search.js +186 -0
- package/clis/linkedin/salesnav-search.test.js +76 -0
- package/clis/linkedin/salesnav-thread.js +212 -0
- package/clis/linkedin/salesnav-thread.test.js +79 -0
- package/clis/linkedin/sent-invitations.js +92 -0
- package/clis/linkedin/sent-invitations.test.js +62 -0
- package/clis/linkedin/thread-snapshot.js +214 -0
- package/clis/linkedin/thread-snapshot.test.js +89 -0
- package/clis/linkedin-learning/course.js +138 -0
- package/clis/linkedin-learning/course.test.js +114 -0
- package/clis/linkedin-learning/search.js +155 -0
- package/clis/linkedin-learning/search.test.js +144 -0
- package/clis/linkedin-learning/trending.js +133 -0
- package/clis/linkedin-learning/trending.test.js +123 -0
- package/clis/powerchina/search.js +3 -3
- package/clis/powerchina/search.test.js +27 -1
- package/clis/reddit/extract-media.test.js +149 -0
- package/clis/reddit/frontpage.js +47 -9
- package/clis/reddit/frontpage.test.js +34 -0
- package/clis/reddit/home.js +31 -1
- package/clis/reddit/home.test.js +46 -3
- package/clis/reddit/hot.js +32 -1
- package/clis/reddit/hot.test.js +15 -1
- package/clis/reddit/popular.js +39 -1
- package/clis/reddit/popular.test.js +26 -0
- package/clis/reddit/saved.js +1 -1
- package/clis/reddit/search.js +38 -1
- package/clis/reddit/search.test.js +26 -0
- package/clis/reddit/subreddit.js +52 -7
- package/clis/reddit/subreddit.test.js +31 -0
- package/clis/reddit/subscribed.js +165 -0
- package/clis/reddit/subscribed.test.js +168 -0
- package/clis/reddit/upvoted.js +1 -1
- package/clis/suno/commands.test.js +188 -0
- package/clis/suno/download.js +140 -0
- package/clis/suno/download.test.js +151 -0
- package/clis/suno/generate.js +226 -0
- package/clis/suno/generate.test.js +243 -0
- package/clis/suno/list.js +79 -0
- package/clis/suno/status.js +62 -0
- package/clis/suno/utils.js +540 -0
- package/clis/suno/utils.test.js +223 -0
- package/clis/twitter/device-follow.js +193 -0
- package/clis/twitter/device-follow.test.js +287 -0
- package/clis/twitter/download.js +443 -73
- package/clis/twitter/download.test.js +457 -0
- package/clis/twitter/list-create.js +155 -0
- package/clis/twitter/list-create.test.js +169 -0
- package/clis/twitter/list-remove.js +12 -5
- package/clis/twitter/list-remove.test.js +74 -0
- package/clis/twitter/list-tweets.js +6 -2
- package/clis/twitter/list-tweets.test.js +41 -1
- package/clis/twitter/lists.js +31 -4
- package/clis/twitter/lists.test.js +152 -16
- package/clis/twitter/search.js +6 -2
- package/clis/twitter/search.test.js +6 -0
- package/clis/twitter/shared.js +144 -0
- package/clis/twitter/shared.test.js +429 -1
- package/clis/twitter/thread.js +10 -2
- package/clis/twitter/thread.test.js +58 -0
- package/clis/twitter/timeline.js +6 -2
- package/clis/twitter/timeline.test.js +2 -0
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/weibo/delete.js +172 -0
- package/clis/weibo/delete.test.js +94 -0
- package/clis/weibo/publish.js +37 -14
- package/clis/weibo/publish.test.js +14 -5
- package/clis/weibo/user-posts.js +234 -0
- package/clis/weibo/user-posts.test.js +92 -0
- package/clis/weread/search-regression.test.js +18 -11
- package/clis/weread/search.js +15 -7
- package/clis/weread-official/book.js +135 -0
- package/clis/weread-official/commands.test.js +385 -0
- package/clis/weread-official/discover.js +107 -0
- package/clis/weread-official/list-apis.js +95 -0
- package/clis/weread-official/notes.js +171 -0
- package/clis/weread-official/readdata.js +158 -0
- package/clis/weread-official/review.js +93 -0
- package/clis/weread-official/search.js +106 -0
- package/clis/weread-official/shelf.js +97 -0
- package/clis/weread-official/utils.js +293 -0
- package/clis/weread-official/utils.test.js +242 -0
- package/clis/wikipedia/trending.js +7 -3
- package/clis/wikipedia/trending.test.js +57 -0
- package/clis/xianyu/chat.js +24 -109
- package/clis/xianyu/chat.test.js +5 -0
- package/clis/xianyu/im.js +322 -0
- package/clis/xianyu/im.test.js +253 -0
- package/clis/xianyu/inbox.js +96 -0
- package/clis/xianyu/messages.js +91 -0
- package/clis/xianyu/reply.js +82 -0
- package/clis/xiaohongshu/creator-note-detail.js +2 -1
- package/clis/xiaohongshu/creator-note-detail.test.js +11 -0
- package/clis/xiaohongshu/creator-notes-summary.js +2 -1
- package/clis/xiaohongshu/creator-notes-summary.test.js +7 -0
- package/clis/xiaohongshu/creator-notes.js +2 -1
- package/clis/xiaohongshu/creator-notes.test.js +12 -0
- package/clis/xiaohongshu/creator-stats.js +2 -1
- package/clis/xiaohongshu/creator-stats.test.js +24 -0
- package/clis/xiaohongshu/delete-note.js +260 -0
- package/clis/xiaohongshu/delete-note.test.js +172 -0
- package/clis/xiaohongshu/publish.js +48 -8
- package/clis/xiaohongshu/publish.test.js +65 -10
- package/clis/xiaohongshu/user-helpers.test.js +41 -0
- package/clis/xiaohongshu/user.js +27 -4
- package/clis/xiaoyuzhou/download.js +1 -1
- package/clis/xiaoyuzhou/transcript.js +1 -1
- package/clis/youdao/note.js +258 -0
- package/clis/youdao/note.test.js +99 -0
- package/clis/youtube/transcript.js +397 -24
- package/clis/youtube/transcript.test.js +196 -6
- package/clis/zhihu/answer-comments.js +299 -0
- package/clis/zhihu/answer-comments.test.js +287 -0
- package/clis/zhihu/answer-detail.js +12 -0
- package/clis/zhihu/answer-detail.test.js +8 -0
- package/clis/zhihu/collection.js +15 -2
- package/clis/zhihu/collection.test.js +46 -0
- package/clis/zhihu/download.js +1 -1
- package/clis/zhihu/question.js +42 -9
- package/clis/zhihu/question.test.js +111 -9
- package/clis/zhihu/search.js +206 -43
- package/clis/zhihu/search.test.js +198 -0
- package/dist/src/browser/errors.js +4 -2
- package/dist/src/browser/errors.test.js +6 -0
- package/dist/src/browser/page.js +30 -4
- package/dist/src/browser/page.test.js +42 -0
- package/dist/src/browser/utils.d.ts +1 -1
- package/dist/src/cli-argv-preprocess.d.ts +26 -0
- package/dist/src/cli-argv-preprocess.js +138 -0
- package/dist/src/cli-argv-preprocess.test.js +79 -0
- package/dist/src/convention-audit.js +15 -8
- package/dist/src/convention-audit.test.js +21 -0
- package/dist/src/download/media-download.js +15 -2
- package/dist/src/download/media-download.test.d.ts +1 -0
- package/dist/src/download/media-download.test.js +110 -0
- package/dist/src/electron-apps.js +1 -1
- package/dist/src/electron-apps.test.js +7 -2
- package/dist/src/errors.d.ts +17 -0
- package/dist/src/errors.js +22 -0
- package/dist/src/external-clis.yaml +8 -0
- package/dist/src/main.js +14 -2
- package/dist/src/utils.d.ts +43 -0
- package/dist/src/utils.js +97 -0
- package/dist/src/utils.test.d.ts +1 -0
- package/dist/src/utils.test.js +155 -0
- package/package.json +8 -2
- package/scripts/silent-column-drop-baseline.json +0 -52
- package/scripts/typed-error-lint-baseline.json +28 -380
- package/clis/slock/_utils.js +0 -12
|
@@ -3,6 +3,7 @@ import { readFileSync } from 'node:fs';
|
|
|
3
3
|
import { dirname, resolve } from 'node:path';
|
|
4
4
|
import { fileURLToPath } from 'node:url';
|
|
5
5
|
import { getRegistry } from '@jackwener/opencli/registry';
|
|
6
|
+
import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
6
7
|
import './transcript.js';
|
|
7
8
|
|
|
8
9
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
@@ -15,6 +16,7 @@ function createPageMock(captionUrl) {
|
|
|
15
16
|
evaluate: vi.fn(),
|
|
16
17
|
};
|
|
17
18
|
page.evaluate
|
|
19
|
+
.mockResolvedValueOnce(null)
|
|
18
20
|
.mockResolvedValueOnce({
|
|
19
21
|
captionUrl,
|
|
20
22
|
language: 'en',
|
|
@@ -33,7 +35,10 @@ afterEach(() => {
|
|
|
33
35
|
});
|
|
34
36
|
|
|
35
37
|
describe('youtube transcript source contract', () => {
|
|
36
|
-
it('
|
|
38
|
+
it('uses the watch player captions module before falling back to watch HTML, not Android InnerTube', () => {
|
|
39
|
+
expect(transcriptSource).toContain("player.loadModule?.('captions')");
|
|
40
|
+
expect(transcriptSource).toContain("player.setOption('captions', 'track', track)");
|
|
41
|
+
expect(transcriptSource).toContain("url.includes('pot=')");
|
|
37
42
|
expect(transcriptSource).toContain("fetch('/watch?v='");
|
|
38
43
|
expect(transcriptSource).toContain("extractJsonAssignmentFromHtml(html, 'ytInitialPlayerResponse')");
|
|
39
44
|
expect(transcriptSource).toContain('playerCaptionsTracklistRenderer');
|
|
@@ -48,6 +53,23 @@ describe('youtube transcript source contract', () => {
|
|
|
48
53
|
it('checks HTTP status before reading caption response body', () => {
|
|
49
54
|
expect(transcriptSource).toContain('resp.ok');
|
|
50
55
|
});
|
|
56
|
+
|
|
57
|
+
it('restores page fetch and XHR hooks even when caption probing exits early', () => {
|
|
58
|
+
expect(transcriptSource).toContain('} finally {');
|
|
59
|
+
expect(transcriptSource).toContain('globalThis.fetch = originalFetch');
|
|
60
|
+
expect(transcriptSource).toContain('globalThis.XMLHttpRequest = OriginalXHR');
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
it('scopes timedtext URL matching to the current videoId in both in-page paths', () => {
|
|
64
|
+
// YouTube is an SPA — daemon-shared tabs preserve prior videos'
|
|
65
|
+
// performance.getEntriesByType('resource') across watch→watch navigations.
|
|
66
|
+
// Both findTimedtextUrl (resource buffer) and isJson3TimedtextUrl (fetch/XHR
|
|
67
|
+
// hook) must require the URL contain v=<currentVideoId>, otherwise a
|
|
68
|
+
// previously-viewed same-language video's captions can be returned.
|
|
69
|
+
expect(transcriptSource).toContain('const targetVideoId = ');
|
|
70
|
+
expect(transcriptSource).toContain("parsed.searchParams.get('v') === targetVideoId");
|
|
71
|
+
expect(transcriptSource).toContain('timedtextUrlMatchesVideo(url)');
|
|
72
|
+
});
|
|
51
73
|
});
|
|
52
74
|
|
|
53
75
|
describe('youtube transcript caption fetch', () => {
|
|
@@ -58,18 +80,110 @@ describe('youtube transcript caption fetch', () => {
|
|
|
58
80
|
|
|
59
81
|
const rows = await command.func(page, { url: 'abc', mode: 'raw' });
|
|
60
82
|
|
|
61
|
-
expect(page.evaluate.mock.calls[
|
|
62
|
-
expect(page.evaluate.mock.calls[
|
|
83
|
+
expect(page.evaluate.mock.calls[2][0]).toContain('const primaryUrl = "https://www.youtube.com/api/timedtext?v=abc&lang=en&fmt=srv3"');
|
|
84
|
+
expect(page.evaluate.mock.calls[2][0]).toContain('const originalUrl = "https://www.youtube.com/api/timedtext?v=abc&lang=en"');
|
|
63
85
|
expect(rows).toEqual([{ index: 1, start: '1.00s', end: '3.00s', text: 'hello & world' }]);
|
|
64
86
|
});
|
|
65
87
|
|
|
88
|
+
it('uses Browser Bridge envelope-wrapped player caption segments without fallback', async () => {
|
|
89
|
+
const page = {
|
|
90
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
91
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
92
|
+
evaluate: vi.fn().mockResolvedValueOnce({
|
|
93
|
+
session: 'browser:default',
|
|
94
|
+
data: [{ start: 2, end: 4.5, text: 'from player captions' }],
|
|
95
|
+
}),
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
const rows = await command.func(page, { url: 'abc', mode: 'raw' });
|
|
99
|
+
|
|
100
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
101
|
+
expect(rows).toEqual([{ index: 1, start: '2.00s', end: '4.50s', text: 'from player captions' }]);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it('uses captured timedtext json3 when player selection returns no segments', async () => {
|
|
105
|
+
const page = {
|
|
106
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
107
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
108
|
+
startNetworkCapture: vi.fn().mockResolvedValue(undefined),
|
|
109
|
+
readNetworkCapture: vi.fn().mockResolvedValue({
|
|
110
|
+
session: 'browser:default',
|
|
111
|
+
data: [{
|
|
112
|
+
url: 'https://www.youtube.com/api/timedtext?v=abc&lang=en&fmt=json3&pot=token',
|
|
113
|
+
responsePreview: JSON.stringify({
|
|
114
|
+
events: [
|
|
115
|
+
{ tStartMs: 1000, dDurationMs: 1500, segs: [{ utf8: 'hello ' }, { utf8: 'capture' }] },
|
|
116
|
+
],
|
|
117
|
+
}),
|
|
118
|
+
}],
|
|
119
|
+
}),
|
|
120
|
+
evaluate: vi.fn().mockResolvedValueOnce(null),
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const rows = await command.func(page, { url: 'abc', mode: 'raw', lang: 'en' });
|
|
124
|
+
|
|
125
|
+
expect(page.startNetworkCapture).toHaveBeenCalledWith('/api/timedtext');
|
|
126
|
+
expect(page.evaluate).toHaveBeenCalledTimes(1);
|
|
127
|
+
expect(rows).toEqual([{ index: 1, start: '1.00s', end: '2.50s', text: 'hello capture' }]);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
it('ignores captured timedtext entries from a prior video and uses only the current videoId', async () => {
|
|
131
|
+
// Regression: opencli daemon reuses one Chrome tab across sequential
|
|
132
|
+
// youtube transcript calls. YouTube's SPA navigation between watch URLs
|
|
133
|
+
// leaves prior videos' timedtext entries in performance.getEntriesByType
|
|
134
|
+
// and (rarely) in the CDP capture buffer. Without filtering by videoId,
|
|
135
|
+
// the same-language predecessor's captions can leak into the current row.
|
|
136
|
+
const page = {
|
|
137
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
138
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
139
|
+
startNetworkCapture: vi.fn().mockResolvedValue(undefined),
|
|
140
|
+
readNetworkCapture: vi.fn().mockResolvedValue({
|
|
141
|
+
session: 'browser:default',
|
|
142
|
+
data: [
|
|
143
|
+
{
|
|
144
|
+
// Stale entry from a prior watch on the shared tab — must be ignored.
|
|
145
|
+
url: 'https://www.youtube.com/api/timedtext?v=prev&lang=en&fmt=json3&pot=token',
|
|
146
|
+
responsePreview: JSON.stringify({
|
|
147
|
+
events: [
|
|
148
|
+
{ tStartMs: 0, dDurationMs: 1000, segs: [{ utf8: 'WRONG video captions' }] },
|
|
149
|
+
],
|
|
150
|
+
}),
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
// Prefix collision: substring matching for "v=abc" would accept this.
|
|
154
|
+
url: 'https://www.youtube.com/api/timedtext?v=abcd&lang=en&fmt=json3&pot=token',
|
|
155
|
+
responsePreview: JSON.stringify({
|
|
156
|
+
events: [
|
|
157
|
+
{ tStartMs: 1000, dDurationMs: 1000, segs: [{ utf8: 'WRONG prefix captions' }] },
|
|
158
|
+
],
|
|
159
|
+
}),
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
// Current video's captions.
|
|
163
|
+
url: 'https://www.youtube.com/api/timedtext?v=abc&lang=en&fmt=json3&pot=token',
|
|
164
|
+
responsePreview: JSON.stringify({
|
|
165
|
+
events: [
|
|
166
|
+
{ tStartMs: 2000, dDurationMs: 1000, segs: [{ utf8: 'right captions' }] },
|
|
167
|
+
],
|
|
168
|
+
}),
|
|
169
|
+
},
|
|
170
|
+
],
|
|
171
|
+
}),
|
|
172
|
+
evaluate: vi.fn().mockResolvedValueOnce(null),
|
|
173
|
+
};
|
|
174
|
+
|
|
175
|
+
const rows = await command.func(page, { url: 'abc', mode: 'raw', lang: 'en' });
|
|
176
|
+
|
|
177
|
+
expect(rows).toEqual([{ index: 1, start: '2.00s', end: '3.00s', text: 'right captions' }]);
|
|
178
|
+
});
|
|
179
|
+
|
|
66
180
|
it('does not override an existing caption format', async () => {
|
|
67
181
|
const page = createPageMock('https://www.youtube.com/api/timedtext?v=abc&lang=en&fmt=vtt');
|
|
68
182
|
|
|
69
183
|
await command.func(page, { url: 'abc', mode: 'raw' });
|
|
70
184
|
|
|
71
|
-
expect(page.evaluate.mock.calls[
|
|
72
|
-
expect(page.evaluate.mock.calls[
|
|
185
|
+
expect(page.evaluate.mock.calls[2][0]).toContain('const primaryUrl = "https://www.youtube.com/api/timedtext?v=abc&lang=en&fmt=vtt"');
|
|
186
|
+
expect(page.evaluate.mock.calls[2][0]).toContain('const originalUrl = "https://www.youtube.com/api/timedtext?v=abc&lang=en&fmt=vtt"');
|
|
73
187
|
});
|
|
74
188
|
|
|
75
189
|
it('falls back to the original URL only after an empty successful srv3 response', async () => {
|
|
@@ -77,7 +191,7 @@ describe('youtube transcript caption fetch', () => {
|
|
|
77
191
|
|
|
78
192
|
await command.func(page, { url: 'abc', mode: 'raw' });
|
|
79
193
|
|
|
80
|
-
const script = page.evaluate.mock.calls[
|
|
194
|
+
const script = page.evaluate.mock.calls[2][0];
|
|
81
195
|
expect(script).toContain('if (!result.xml.length && originalUrl !== primaryUrl)');
|
|
82
196
|
expect(script).toContain('result = await fetchCaptionXml(originalUrl)');
|
|
83
197
|
expect(script).toContain('if (result.error) {');
|
|
@@ -87,6 +201,7 @@ describe('youtube transcript caption fetch', () => {
|
|
|
87
201
|
const page = createPageMock('https://www.youtube.com/api/timedtext?v=abc&lang=en');
|
|
88
202
|
page.evaluate.mockReset();
|
|
89
203
|
page.evaluate
|
|
204
|
+
.mockResolvedValueOnce(null)
|
|
90
205
|
.mockResolvedValueOnce({
|
|
91
206
|
captionUrl: 'https://www.youtube.com/api/timedtext?v=abc&lang=en',
|
|
92
207
|
language: 'en',
|
|
@@ -103,4 +218,79 @@ describe('youtube transcript caption fetch', () => {
|
|
|
103
218
|
message: expect.stringContaining('HTTP 503'),
|
|
104
219
|
});
|
|
105
220
|
});
|
|
221
|
+
|
|
222
|
+
it('fails typed on malformed browser extraction payloads', async () => {
|
|
223
|
+
const page = createPageMock('https://www.youtube.com/api/timedtext?v=abc&lang=en');
|
|
224
|
+
page.evaluate.mockReset();
|
|
225
|
+
page.evaluate
|
|
226
|
+
.mockResolvedValueOnce(null)
|
|
227
|
+
.mockResolvedValueOnce({
|
|
228
|
+
captionUrl: 'https://www.youtube.com/api/timedtext?v=abc&lang=en',
|
|
229
|
+
language: 'en',
|
|
230
|
+
kind: 'manual',
|
|
231
|
+
available: ['en'],
|
|
232
|
+
requestedLang: null,
|
|
233
|
+
langMatched: false,
|
|
234
|
+
langPrefixMatched: false,
|
|
235
|
+
})
|
|
236
|
+
.mockResolvedValueOnce({ session: 'browser:default', data: { rows: [] } });
|
|
237
|
+
|
|
238
|
+
await expect(command.func(page, { url: 'abc', mode: 'raw' })).rejects.toMatchObject({
|
|
239
|
+
code: 'COMMAND_EXEC',
|
|
240
|
+
message: expect.stringContaining('Malformed caption XML extraction payload'),
|
|
241
|
+
});
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
it('fails typed on malformed caption info payloads before URL construction', async () => {
|
|
245
|
+
const page = createPageMock('https://www.youtube.com/api/timedtext?v=abc&lang=en');
|
|
246
|
+
page.evaluate.mockReset();
|
|
247
|
+
page.evaluate
|
|
248
|
+
.mockResolvedValueOnce(null)
|
|
249
|
+
.mockResolvedValueOnce({ session: 'browser:default', data: { rows: [] } });
|
|
250
|
+
|
|
251
|
+
await expect(command.func(page, { url: 'abc', mode: 'raw' })).rejects.toMatchObject({
|
|
252
|
+
code: 'COMMAND_EXEC',
|
|
253
|
+
message: expect.stringContaining('Malformed caption info payload'),
|
|
254
|
+
});
|
|
255
|
+
});
|
|
256
|
+
|
|
257
|
+
it('maps explicit no-captions watch metadata to EmptyResultError', async () => {
|
|
258
|
+
const page = createPageMock('https://www.youtube.com/api/timedtext?v=abc&lang=en');
|
|
259
|
+
page.evaluate.mockReset();
|
|
260
|
+
page.evaluate
|
|
261
|
+
.mockResolvedValueOnce(null)
|
|
262
|
+
.mockResolvedValueOnce({ error: 'No captions available for this video' });
|
|
263
|
+
|
|
264
|
+
await expect(command.func(page, { url: 'abc', mode: 'raw' })).rejects.toBeInstanceOf(EmptyResultError);
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
it('keeps malformed watch metadata as CommandExecutionError', async () => {
|
|
268
|
+
const page = createPageMock('https://www.youtube.com/api/timedtext?v=abc&lang=en');
|
|
269
|
+
page.evaluate.mockReset();
|
|
270
|
+
page.evaluate
|
|
271
|
+
.mockResolvedValueOnce(null)
|
|
272
|
+
.mockResolvedValueOnce({ error: 'ytInitialPlayerResponse not found in watch HTML' });
|
|
273
|
+
|
|
274
|
+
await expect(command.func(page, { url: 'abc', mode: 'raw' })).rejects.toBeInstanceOf(CommandExecutionError);
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
it('fails typed on malformed captured timedtext json3', async () => {
|
|
278
|
+
const page = {
|
|
279
|
+
goto: vi.fn().mockResolvedValue(undefined),
|
|
280
|
+
wait: vi.fn().mockResolvedValue(undefined),
|
|
281
|
+
startNetworkCapture: vi.fn().mockResolvedValue(undefined),
|
|
282
|
+
readNetworkCapture: vi.fn().mockResolvedValue([
|
|
283
|
+
{
|
|
284
|
+
url: 'https://www.youtube.com/api/timedtext?v=abc&lang=en&fmt=json3&pot=token',
|
|
285
|
+
responsePreview: '{"events":',
|
|
286
|
+
},
|
|
287
|
+
]),
|
|
288
|
+
evaluate: vi.fn().mockResolvedValueOnce(null),
|
|
289
|
+
};
|
|
290
|
+
|
|
291
|
+
await expect(command.func(page, { url: 'abc', mode: 'raw', lang: 'en' })).rejects.toMatchObject({
|
|
292
|
+
code: 'COMMAND_EXEC',
|
|
293
|
+
message: expect.stringContaining('Malformed json3 timedtext response'),
|
|
294
|
+
});
|
|
295
|
+
});
|
|
106
296
|
});
|
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
2
|
+
import { ArgumentError, AuthRequiredError, CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
3
|
+
|
|
4
|
+
function decodeEntity(codePoint) {
|
|
5
|
+
return Number.isInteger(codePoint) && codePoint >= 0 && codePoint <= 0x10FFFF
|
|
6
|
+
? String.fromCodePoint(codePoint)
|
|
7
|
+
: null;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function stripHtml(html) {
|
|
11
|
+
if (!html) return '';
|
|
12
|
+
return html
|
|
13
|
+
.replace(/<br\s*\/?\s*>/gi, '\n')
|
|
14
|
+
.replace(/<\/(?:p|div|h[1-6]|li|blockquote)>/gi, '\n\n')
|
|
15
|
+
.replace(/<[^>]+>/g, '')
|
|
16
|
+
.replace(/ /g, ' ')
|
|
17
|
+
.replace(/</g, '<')
|
|
18
|
+
.replace(/>/g, '>')
|
|
19
|
+
.replace(/&/g, '&')
|
|
20
|
+
.replace(/"/g, '"')
|
|
21
|
+
.replace(/'/g, "'")
|
|
22
|
+
.replace(/&#(\d+);/g, (entity, value) => decodeEntity(Number(value)) ?? entity)
|
|
23
|
+
.replace(/&#x([0-9a-f]+);/gi, (entity, value) => decodeEntity(Number.parseInt(value, 16)) ?? entity)
|
|
24
|
+
.replace(/\n{3,}/g, '\n\n')
|
|
25
|
+
.trim();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const ANSWER_ID_RE = /^\d+$/;
|
|
29
|
+
const ANSWER_TYPED_RE = /^answer:(\d+):(\d+)$/;
|
|
30
|
+
const ANSWER_PATH_RE = /^\/question\/(\d+)\/answer\/(\d+)\/?$/;
|
|
31
|
+
const BARE_ANSWER_PATH_RE = /^\/answer\/(\d+)\/?$/;
|
|
32
|
+
|
|
33
|
+
function parseAnswerTarget(input) {
|
|
34
|
+
const value = String(input ?? '').trim();
|
|
35
|
+
if (!value) return null;
|
|
36
|
+
if (ANSWER_ID_RE.test(value)) return { answerId: value, questionId: '' };
|
|
37
|
+
const typed = value.match(ANSWER_TYPED_RE);
|
|
38
|
+
if (typed) return { questionId: typed[1], answerId: typed[2] };
|
|
39
|
+
try {
|
|
40
|
+
const url = new URL(value);
|
|
41
|
+
if (
|
|
42
|
+
url.protocol !== 'https:' ||
|
|
43
|
+
url.username ||
|
|
44
|
+
url.password ||
|
|
45
|
+
url.port ||
|
|
46
|
+
(url.hostname !== 'www.zhihu.com' && url.hostname !== 'zhihu.com')
|
|
47
|
+
) {
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
let m = url.pathname.match(ANSWER_PATH_RE);
|
|
51
|
+
if (m) return { questionId: m[1], answerId: m[2] };
|
|
52
|
+
m = url.pathname.match(BARE_ANSWER_PATH_RE);
|
|
53
|
+
if (m) return { answerId: m[1], questionId: '' };
|
|
54
|
+
} catch {
|
|
55
|
+
return null;
|
|
56
|
+
}
|
|
57
|
+
return null;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function extractQuestionIdFromAnswerUrl(input) {
|
|
61
|
+
const value = String(input ?? '').trim();
|
|
62
|
+
if (!value) return '';
|
|
63
|
+
try {
|
|
64
|
+
const url = new URL(value);
|
|
65
|
+
if (url.protocol !== 'https:' || (url.hostname !== 'www.zhihu.com' && url.hostname !== 'zhihu.com')) return '';
|
|
66
|
+
return url.pathname.match(ANSWER_PATH_RE)?.[1] || '';
|
|
67
|
+
} catch {
|
|
68
|
+
return '';
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function normalizeCount(value) {
|
|
73
|
+
return Number.isInteger(value) && value >= 0 ? value : 0;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function normalizeUnixSeconds(value) {
|
|
77
|
+
return typeof value === 'number' && Number.isFinite(value) && value > 0
|
|
78
|
+
? new Date(value * 1000).toISOString()
|
|
79
|
+
: '';
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function memberName(author) {
|
|
83
|
+
return author?.member?.name || author?.name || '';
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
function normalizeCommentId(value) {
|
|
87
|
+
if (typeof value === 'string') return value.trim();
|
|
88
|
+
if (typeof value === 'number' && Number.isSafeInteger(value) && value >= 0) return String(value);
|
|
89
|
+
return '';
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
function normalizeCommentUrl(url, questionId, answerId, commentId) {
|
|
93
|
+
if (questionId && answerId && commentId) {
|
|
94
|
+
return `https://www.zhihu.com/question/${questionId}/answer/${answerId}#comment-${commentId}`;
|
|
95
|
+
}
|
|
96
|
+
return typeof url === 'string' ? url : '';
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
function normalizeCommentsApiUrl(url, answerId) {
|
|
100
|
+
if (typeof url !== 'string' || !url) return '';
|
|
101
|
+
try {
|
|
102
|
+
const parsed = new URL(url);
|
|
103
|
+
const expectedWwwPath = `/api/v4/answers/${answerId}/comments`;
|
|
104
|
+
const expectedApiPath = `/answers/${answerId}/comments`;
|
|
105
|
+
if (parsed.protocol !== 'https:' || parsed.username || parsed.password || parsed.port) return '';
|
|
106
|
+
if (parsed.hostname === 'www.zhihu.com' && parsed.pathname === expectedWwwPath) return parsed.toString();
|
|
107
|
+
if (parsed.hostname === 'api.zhihu.com' && parsed.pathname === expectedApiPath) {
|
|
108
|
+
return `https://www.zhihu.com${expectedWwwPath}${parsed.search}`;
|
|
109
|
+
}
|
|
110
|
+
} catch {
|
|
111
|
+
return '';
|
|
112
|
+
}
|
|
113
|
+
return '';
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
function buildRows(comments, { answerId, questionId, topLevelLimit, repliesLimit }) {
|
|
117
|
+
const rows = [];
|
|
118
|
+
let topLevelCount = 0;
|
|
119
|
+
let currentCommentRank = 0;
|
|
120
|
+
let currentReplyCount = 0;
|
|
121
|
+
let reachedTopLevelLimit = false;
|
|
122
|
+
let malformedComments = 0;
|
|
123
|
+
|
|
124
|
+
for (const comment of comments) {
|
|
125
|
+
if (!comment || typeof comment !== 'object' || Array.isArray(comment)) {
|
|
126
|
+
malformedComments += 1;
|
|
127
|
+
continue;
|
|
128
|
+
}
|
|
129
|
+
const id = normalizeCommentId(comment.id);
|
|
130
|
+
if (!id) {
|
|
131
|
+
malformedComments += 1;
|
|
132
|
+
continue;
|
|
133
|
+
}
|
|
134
|
+
const author = memberName(comment.author);
|
|
135
|
+
const replyToAuthor = memberName(comment.reply_to_author);
|
|
136
|
+
const isReply = Boolean(replyToAuthor);
|
|
137
|
+
|
|
138
|
+
if (!isReply) {
|
|
139
|
+
if (topLevelCount >= topLevelLimit) {
|
|
140
|
+
reachedTopLevelLimit = true;
|
|
141
|
+
break;
|
|
142
|
+
}
|
|
143
|
+
topLevelCount += 1;
|
|
144
|
+
currentCommentRank = topLevelCount;
|
|
145
|
+
currentReplyCount = 0;
|
|
146
|
+
} else if (!currentCommentRank || currentReplyCount >= repliesLimit) {
|
|
147
|
+
continue;
|
|
148
|
+
} else {
|
|
149
|
+
currentReplyCount += 1;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
rows.push({
|
|
153
|
+
rank: rows.length + 1,
|
|
154
|
+
comment_rank: currentCommentRank,
|
|
155
|
+
reply_rank: isReply ? currentReplyCount : 0,
|
|
156
|
+
depth: 0,
|
|
157
|
+
id,
|
|
158
|
+
parent_id: '',
|
|
159
|
+
author: author || 'anonymous',
|
|
160
|
+
reply_to: replyToAuthor,
|
|
161
|
+
likes: normalizeCount(comment.vote_count),
|
|
162
|
+
created_at: normalizeUnixSeconds(comment.created_time),
|
|
163
|
+
url: normalizeCommentUrl(comment.url, questionId, answerId, id),
|
|
164
|
+
content: stripHtml(comment.content || ''),
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
return { rows, topLevelCount, reachedTopLevelLimit, malformedComments };
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
const MAX_LIMIT = 1000;
|
|
171
|
+
const MAX_REPLIES_LIMIT = 100;
|
|
172
|
+
const ZHIHU_PAGE_SIZE = 20;
|
|
173
|
+
|
|
174
|
+
cli({
|
|
175
|
+
site: 'zhihu',
|
|
176
|
+
name: 'answer-comments',
|
|
177
|
+
access: 'read',
|
|
178
|
+
description: '知乎回答评论列表',
|
|
179
|
+
domain: 'www.zhihu.com',
|
|
180
|
+
strategy: Strategy.COOKIE,
|
|
181
|
+
args: [
|
|
182
|
+
{ name: 'id', required: true, positional: true, help: 'Answer ID, full Zhihu answer URL, or typed target (answer:<qid>:<aid>)' },
|
|
183
|
+
{ name: 'limit', type: 'int', default: 20, help: 'Number of top-level comments (max 1000)' },
|
|
184
|
+
{ name: 'replies-limit', type: 'int', default: 3, help: 'Number of replies to include per top-level comment (max 100)' },
|
|
185
|
+
],
|
|
186
|
+
columns: ['rank', 'comment_rank', 'reply_rank', 'depth', 'id', 'parent_id', 'author', 'reply_to', 'likes', 'created_at', 'url', 'content'],
|
|
187
|
+
func: async (page, kwargs) => {
|
|
188
|
+
const target = parseAnswerTarget(kwargs.id);
|
|
189
|
+
if (!target) {
|
|
190
|
+
throw new ArgumentError(
|
|
191
|
+
'Answer ID must be a numeric id, a Zhihu answer URL, or answer:<qid>:<aid>',
|
|
192
|
+
'Example: opencli zhihu answer-comments 1937205528846655537',
|
|
193
|
+
);
|
|
194
|
+
}
|
|
195
|
+
const topLevelLimit = Number(kwargs.limit ?? 20);
|
|
196
|
+
if (!Number.isInteger(topLevelLimit) || topLevelLimit <= 0 || topLevelLimit > MAX_LIMIT) {
|
|
197
|
+
throw new ArgumentError(`--limit must be a positive integer no greater than ${MAX_LIMIT}`);
|
|
198
|
+
}
|
|
199
|
+
const repliesLimit = Number(kwargs['replies-limit'] ?? 3);
|
|
200
|
+
if (!Number.isInteger(repliesLimit) || repliesLimit < 0 || repliesLimit > MAX_REPLIES_LIMIT) {
|
|
201
|
+
throw new ArgumentError(`--replies-limit must be an integer between 0 and ${MAX_REPLIES_LIMIT}`);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
const { answerId } = target;
|
|
205
|
+
try {
|
|
206
|
+
await page.goto(`https://www.zhihu.com/answer/${answerId}`);
|
|
207
|
+
} catch (err) {
|
|
208
|
+
throw new CommandExecutionError(
|
|
209
|
+
`Failed to open Zhihu answer ${answerId}: ${err instanceof Error ? err.message : String(err)}`,
|
|
210
|
+
'Open the answer URL in Chrome and retry after the page is reachable.',
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
const currentQuestionId = page.getCurrentUrl
|
|
214
|
+
? extractQuestionIdFromAnswerUrl(await page.getCurrentUrl().catch(() => ''))
|
|
215
|
+
: '';
|
|
216
|
+
const questionId = target.questionId || currentQuestionId;
|
|
217
|
+
|
|
218
|
+
let url = `https://www.zhihu.com/api/v4/answers/${answerId}/comments?order=normal&limit=${ZHIHU_PAGE_SIZE}&offset=0&status=open`;
|
|
219
|
+
const fetched = [];
|
|
220
|
+
const visited = new Set();
|
|
221
|
+
|
|
222
|
+
while (url && !visited.has(url)) {
|
|
223
|
+
visited.add(url);
|
|
224
|
+
const data = await page.evaluate(`
|
|
225
|
+
(async () => {
|
|
226
|
+
const r = await fetch(${JSON.stringify(url)}, { credentials: 'include' });
|
|
227
|
+
if (!r.ok) return { __httpError: r.status };
|
|
228
|
+
try {
|
|
229
|
+
return await r.json();
|
|
230
|
+
} catch (error) {
|
|
231
|
+
return { __malformedJson: error instanceof Error ? error.message : String(error) };
|
|
232
|
+
}
|
|
233
|
+
})()
|
|
234
|
+
`).catch((err) => {
|
|
235
|
+
throw new CommandExecutionError(
|
|
236
|
+
`Zhihu answer comments request failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
237
|
+
'Try again later or rerun with -v for more detail.',
|
|
238
|
+
);
|
|
239
|
+
});
|
|
240
|
+
if (!data || data.__httpError) {
|
|
241
|
+
const status = data?.__httpError;
|
|
242
|
+
if (status === 401 || status === 403) {
|
|
243
|
+
throw new AuthRequiredError('www.zhihu.com', 'Failed to fetch Zhihu answer comments');
|
|
244
|
+
}
|
|
245
|
+
if (status === 404) {
|
|
246
|
+
throw new EmptyResultError('zhihu answer-comments', `No Zhihu answer comments resource was found for ${answerId}.`);
|
|
247
|
+
}
|
|
248
|
+
throw new CommandExecutionError(
|
|
249
|
+
status
|
|
250
|
+
? `Zhihu answer comments request failed (HTTP ${status})`
|
|
251
|
+
: 'Zhihu answer comments request failed',
|
|
252
|
+
'Try again later or rerun with -v for more detail',
|
|
253
|
+
);
|
|
254
|
+
}
|
|
255
|
+
if (data.__malformedJson) {
|
|
256
|
+
throw new CommandExecutionError(
|
|
257
|
+
`Zhihu answer comments returned malformed JSON: ${data.__malformedJson}`,
|
|
258
|
+
'Try again later or rerun with -v for more detail',
|
|
259
|
+
);
|
|
260
|
+
}
|
|
261
|
+
if (!Array.isArray(data.data) || !data.paging || typeof data.paging !== 'object') {
|
|
262
|
+
throw new CommandExecutionError(
|
|
263
|
+
'Zhihu answer comments returned a malformed payload',
|
|
264
|
+
'Try again later or rerun with -v for more detail',
|
|
265
|
+
);
|
|
266
|
+
}
|
|
267
|
+
fetched.push(...data.data);
|
|
268
|
+
const built = buildRows(fetched, { answerId, questionId, topLevelLimit, repliesLimit });
|
|
269
|
+
if (built.malformedComments > 0) {
|
|
270
|
+
throw new CommandExecutionError('Zhihu answer comments contained rows without comment ids');
|
|
271
|
+
}
|
|
272
|
+
if (built.reachedTopLevelLimit || data.paging?.is_end) {
|
|
273
|
+
if (built.rows.length === 0) {
|
|
274
|
+
throw new EmptyResultError('zhihu answer-comments', `No comments found for answer ${answerId}.`);
|
|
275
|
+
}
|
|
276
|
+
return built.rows;
|
|
277
|
+
}
|
|
278
|
+
const next = normalizeCommentsApiUrl(data.paging?.next, answerId);
|
|
279
|
+
if (!next) {
|
|
280
|
+
throw new CommandExecutionError('Zhihu answer comments pagination returned malformed next URL');
|
|
281
|
+
}
|
|
282
|
+
if (visited.has(next)) {
|
|
283
|
+
throw new CommandExecutionError('Zhihu answer comments pagination returned a repeated next URL');
|
|
284
|
+
}
|
|
285
|
+
url = next;
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
const built = buildRows(fetched, { answerId, questionId, topLevelLimit, repliesLimit });
|
|
289
|
+
if (built.malformedComments > 0) {
|
|
290
|
+
throw new CommandExecutionError('Zhihu answer comments contained rows without comment ids');
|
|
291
|
+
}
|
|
292
|
+
if (built.rows.length === 0) {
|
|
293
|
+
throw new EmptyResultError('zhihu answer-comments', `No comments found for answer ${answerId}.`);
|
|
294
|
+
}
|
|
295
|
+
return built.rows;
|
|
296
|
+
},
|
|
297
|
+
});
|
|
298
|
+
|
|
299
|
+
export const __test__ = { stripHtml, parseAnswerTarget, normalizeCommentsApiUrl, buildRows };
|