@jackwener/opencli 1.7.21 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +31 -148
- package/README.zh-CN.md +38 -211
- package/cli-manifest.json +6423 -4260
- package/clis/12306/me.js +73 -0
- package/clis/12306/orders.js +96 -0
- package/clis/12306/passengers.js +90 -0
- package/clis/12306/price.js +166 -0
- package/clis/12306/stations.js +66 -0
- package/clis/12306/train.js +91 -0
- package/clis/12306/trains.js +119 -0
- package/clis/12306/utils.js +272 -0
- package/clis/12306/utils.test.js +331 -0
- package/clis/36kr/article.js +6 -3
- package/clis/36kr/article.test.js +46 -0
- package/clis/apple-podcasts/commands.test.js +20 -0
- package/clis/apple-podcasts/search.js +2 -2
- package/clis/barchart/greeks.js +144 -56
- package/clis/barchart/greeks.test.js +138 -0
- package/clis/bilibili/summary.js +167 -0
- package/clis/bilibili/summary.test.js +210 -0
- package/clis/booking/booking.test.js +356 -0
- package/clis/booking/search.js +351 -0
- package/clis/boss/utils.js +17 -1
- package/clis/boss/utils.test.js +34 -0
- package/clis/chatgpt/envelope.test.js +108 -0
- package/clis/chatgpt/image.js +2 -2
- package/clis/chatgpt/image.test.js +6 -0
- package/clis/chatgpt/utils.js +148 -41
- package/clis/chatgpt/utils.test.js +92 -2
- package/clis/douyin/_shared/browser-fetch.js +44 -20
- package/clis/douyin/_shared/browser-fetch.test.js +22 -1
- package/clis/douyin/_shared/evaluate-result.js +16 -0
- package/clis/douyin/_shared/tos-upload.js +105 -69
- package/clis/douyin/_shared/vod-upload.js +212 -0
- package/clis/douyin/_shared/vod-upload.test.js +38 -0
- package/clis/douyin/delete.js +137 -4
- package/clis/douyin/delete.test.js +90 -1
- package/clis/douyin/publish-upload-id.test.js +170 -0
- package/clis/douyin/publish.js +88 -42
- package/clis/douyin/user-videos.js +9 -2
- package/clis/douyin/user-videos.test.js +43 -0
- package/clis/flomo/memos.js +228 -0
- package/clis/flomo/memos.test.js +144 -0
- package/clis/gitee/search.js +2 -2
- package/clis/gitee/search.test.js +65 -0
- package/clis/jike/post.js +27 -17
- package/clis/jike/read.test.js +86 -0
- package/clis/jike/topic.js +32 -19
- package/clis/jike/user.js +33 -20
- package/clis/lesswrong/comments.js +1 -1
- package/clis/lesswrong/curated.js +1 -1
- package/clis/lesswrong/frontpage.js +1 -1
- package/clis/lesswrong/frontpage.test.js +37 -0
- package/clis/lesswrong/new.js +1 -1
- package/clis/lesswrong/read.js +1 -1
- package/clis/lesswrong/sequences.js +1 -1
- package/clis/lesswrong/shortform.js +1 -1
- package/clis/lesswrong/tag.js +1 -1
- package/clis/lesswrong/top-month.js +1 -1
- package/clis/lesswrong/top-week.js +1 -1
- package/clis/lesswrong/top-year.js +1 -1
- package/clis/lesswrong/top.js +1 -1
- package/clis/linkedin/connect.js +401 -0
- package/clis/linkedin/connect.test.js +213 -0
- package/clis/linkedin/inbox.js +234 -0
- package/clis/linkedin/inbox.test.js +152 -0
- package/clis/linkedin/people-search.js +262 -0
- package/clis/linkedin/people-search.test.js +216 -0
- package/clis/linkedin/safe-send.js +357 -0
- package/clis/linkedin/safe-send.test.js +204 -0
- package/clis/linkedin/salesnav-inbox.js +210 -0
- package/clis/linkedin/salesnav-inbox.test.js +113 -0
- package/clis/linkedin/salesnav-message.js +360 -0
- package/clis/linkedin/salesnav-message.test.js +172 -0
- package/clis/linkedin/salesnav-search.js +186 -0
- package/clis/linkedin/salesnav-search.test.js +76 -0
- package/clis/linkedin/salesnav-thread.js +212 -0
- package/clis/linkedin/salesnav-thread.test.js +79 -0
- package/clis/linkedin/sent-invitations.js +92 -0
- package/clis/linkedin/sent-invitations.test.js +62 -0
- package/clis/linkedin/thread-snapshot.js +214 -0
- package/clis/linkedin/thread-snapshot.test.js +89 -0
- package/clis/linkedin-learning/course.js +138 -0
- package/clis/linkedin-learning/course.test.js +114 -0
- package/clis/linkedin-learning/search.js +155 -0
- package/clis/linkedin-learning/search.test.js +144 -0
- package/clis/linkedin-learning/trending.js +133 -0
- package/clis/linkedin-learning/trending.test.js +123 -0
- package/clis/powerchina/search.js +3 -3
- package/clis/powerchina/search.test.js +27 -1
- package/clis/reddit/extract-media.test.js +149 -0
- package/clis/reddit/frontpage.js +47 -9
- package/clis/reddit/frontpage.test.js +34 -0
- package/clis/reddit/home.js +31 -1
- package/clis/reddit/home.test.js +46 -3
- package/clis/reddit/hot.js +32 -1
- package/clis/reddit/hot.test.js +15 -1
- package/clis/reddit/popular.js +39 -1
- package/clis/reddit/popular.test.js +26 -0
- package/clis/reddit/saved.js +1 -1
- package/clis/reddit/search.js +38 -1
- package/clis/reddit/search.test.js +26 -0
- package/clis/reddit/subreddit.js +52 -7
- package/clis/reddit/subreddit.test.js +31 -0
- package/clis/reddit/subscribed.js +165 -0
- package/clis/reddit/subscribed.test.js +168 -0
- package/clis/reddit/upvoted.js +1 -1
- package/clis/suno/commands.test.js +188 -0
- package/clis/suno/download.js +140 -0
- package/clis/suno/download.test.js +151 -0
- package/clis/suno/generate.js +226 -0
- package/clis/suno/generate.test.js +243 -0
- package/clis/suno/list.js +79 -0
- package/clis/suno/status.js +62 -0
- package/clis/suno/utils.js +540 -0
- package/clis/suno/utils.test.js +223 -0
- package/clis/twitter/device-follow.js +193 -0
- package/clis/twitter/device-follow.test.js +287 -0
- package/clis/twitter/download.js +443 -73
- package/clis/twitter/download.test.js +457 -0
- package/clis/twitter/list-create.js +155 -0
- package/clis/twitter/list-create.test.js +169 -0
- package/clis/twitter/list-remove.js +12 -5
- package/clis/twitter/list-remove.test.js +74 -0
- package/clis/twitter/list-tweets.js +6 -2
- package/clis/twitter/list-tweets.test.js +41 -1
- package/clis/twitter/lists.js +31 -4
- package/clis/twitter/lists.test.js +152 -16
- package/clis/twitter/search.js +6 -2
- package/clis/twitter/search.test.js +6 -0
- package/clis/twitter/shared.js +144 -0
- package/clis/twitter/shared.test.js +429 -1
- package/clis/twitter/thread.js +10 -2
- package/clis/twitter/thread.test.js +58 -0
- package/clis/twitter/timeline.js +6 -2
- package/clis/twitter/timeline.test.js +2 -0
- package/clis/twitter/tweets.js +3 -2
- package/clis/twitter/tweets.test.js +1 -1
- package/clis/weibo/comments.js +3 -4
- package/clis/weibo/delete.js +172 -0
- package/clis/weibo/delete.test.js +94 -0
- package/clis/weibo/envelope.test.js +85 -0
- package/clis/weibo/favorites.js +4 -4
- package/clis/weibo/feed.js +3 -5
- package/clis/weibo/hot.js +3 -4
- package/clis/weibo/me.js +3 -5
- package/clis/weibo/post.js +3 -4
- package/clis/weibo/publish.js +37 -14
- package/clis/weibo/publish.test.js +14 -5
- package/clis/weibo/search.js +4 -3
- package/clis/weibo/user-posts.js +234 -0
- package/clis/weibo/user-posts.test.js +92 -0
- package/clis/weibo/user.js +3 -4
- package/clis/weibo/utils.js +34 -5
- package/clis/weibo/utils.test.js +36 -0
- package/clis/weread/search-regression.test.js +18 -11
- package/clis/weread/search.js +15 -7
- package/clis/weread-official/book.js +135 -0
- package/clis/weread-official/commands.test.js +385 -0
- package/clis/weread-official/discover.js +107 -0
- package/clis/weread-official/list-apis.js +95 -0
- package/clis/weread-official/notes.js +171 -0
- package/clis/weread-official/readdata.js +158 -0
- package/clis/weread-official/review.js +93 -0
- package/clis/weread-official/search.js +106 -0
- package/clis/weread-official/shelf.js +97 -0
- package/clis/weread-official/utils.js +293 -0
- package/clis/weread-official/utils.test.js +242 -0
- package/clis/wikipedia/trending.js +7 -3
- package/clis/wikipedia/trending.test.js +57 -0
- package/clis/xianyu/chat.js +24 -109
- package/clis/xianyu/chat.test.js +5 -0
- package/clis/xianyu/im.js +322 -0
- package/clis/xianyu/im.test.js +253 -0
- package/clis/xianyu/inbox.js +96 -0
- package/clis/xianyu/messages.js +91 -0
- package/clis/xianyu/reply.js +82 -0
- package/clis/xiaohongshu/creator-note-detail.js +2 -1
- package/clis/xiaohongshu/creator-note-detail.test.js +11 -0
- package/clis/xiaohongshu/creator-notes-summary.js +2 -1
- package/clis/xiaohongshu/creator-notes-summary.test.js +7 -0
- package/clis/xiaohongshu/creator-notes.js +2 -1
- package/clis/xiaohongshu/creator-notes.test.js +12 -0
- package/clis/xiaohongshu/creator-stats.js +2 -1
- package/clis/xiaohongshu/creator-stats.test.js +24 -0
- package/clis/xiaohongshu/delete-note.js +260 -0
- package/clis/xiaohongshu/delete-note.test.js +172 -0
- package/clis/xiaohongshu/publish.js +48 -8
- package/clis/xiaohongshu/publish.test.js +65 -10
- package/clis/xiaohongshu/user-helpers.test.js +41 -0
- package/clis/xiaohongshu/user.js +27 -4
- package/clis/xiaoyuzhou/download.js +1 -1
- package/clis/xiaoyuzhou/transcript.js +1 -1
- package/clis/youdao/note.js +258 -0
- package/clis/youdao/note.test.js +99 -0
- package/clis/youtube/transcript.js +397 -24
- package/clis/youtube/transcript.test.js +196 -6
- package/clis/zhihu/answer-comments.js +299 -0
- package/clis/zhihu/answer-comments.test.js +287 -0
- package/clis/zhihu/answer-detail.js +12 -0
- package/clis/zhihu/answer-detail.test.js +8 -0
- package/clis/zhihu/collection.js +15 -2
- package/clis/zhihu/collection.test.js +46 -0
- package/clis/zhihu/download.js +1 -1
- package/clis/zhihu/question.js +42 -9
- package/clis/zhihu/question.test.js +111 -9
- package/clis/zhihu/search.js +206 -43
- package/clis/zhihu/search.test.js +198 -0
- package/dist/src/browser/errors.js +4 -2
- package/dist/src/browser/errors.test.js +6 -0
- package/dist/src/browser/page.js +30 -4
- package/dist/src/browser/page.test.js +42 -0
- package/dist/src/browser/utils.d.ts +1 -1
- package/dist/src/cli-argv-preprocess.d.ts +26 -0
- package/dist/src/cli-argv-preprocess.js +138 -0
- package/dist/src/cli-argv-preprocess.test.js +79 -0
- package/dist/src/cli.js +1 -1
- package/dist/src/convention-audit.js +15 -8
- package/dist/src/convention-audit.test.js +21 -0
- package/dist/src/download/media-download.js +15 -2
- package/dist/src/download/media-download.test.d.ts +1 -0
- package/dist/src/download/media-download.test.js +110 -0
- package/dist/src/electron-apps.js +1 -1
- package/dist/src/electron-apps.test.js +7 -2
- package/dist/src/errors.d.ts +17 -0
- package/dist/src/errors.js +22 -0
- package/dist/src/external-clis.yaml +20 -0
- package/dist/src/external.d.ts +6 -1
- package/dist/src/external.test.js +19 -0
- package/dist/src/main.js +14 -2
- package/dist/src/utils.d.ts +43 -0
- package/dist/src/utils.js +97 -0
- package/dist/src/utils.test.d.ts +1 -0
- package/dist/src/utils.test.js +155 -0
- package/package.json +8 -2
- package/scripts/silent-column-drop-baseline.json +0 -52
- package/scripts/typed-error-lint-baseline.json +28 -380
- package/clis/slock/_utils.js +0 -12
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* YouTube transcript — extracts caption tracks
|
|
2
|
+
* YouTube transcript — extracts caption tracks through the watch player.
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* YouTube timedtext URLs can require player-generated PO tokens. Load the
|
|
5
|
+
* watch page, ask the player captions module to select a track, then reuse the
|
|
6
|
+
* generated json3 timedtext URL before falling back to watch HTML captions.
|
|
7
7
|
*
|
|
8
8
|
* Modes:
|
|
9
9
|
* --mode grouped (default): sentences merged, speaker detection, chapters
|
|
@@ -13,6 +13,115 @@ import { cli, Strategy } from '@jackwener/opencli/registry';
|
|
|
13
13
|
import { extractJsonAssignmentFromHtml, parseVideoId, prepareYoutubeApiPage } from './utils.js';
|
|
14
14
|
import { groupTranscriptSegments, formatGroupedTranscript, } from './transcript-group.js';
|
|
15
15
|
import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
|
|
16
|
+
|
|
17
|
+
function unwrapBrowserResult(value) {
|
|
18
|
+
if (value && typeof value === 'object' && 'session' in value && 'data' in value) {
|
|
19
|
+
return value.data;
|
|
20
|
+
}
|
|
21
|
+
return value;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function normalizeSegmentsPayload(value, source, { allowNull = false } = {}) {
|
|
25
|
+
const payload = unwrapBrowserResult(value);
|
|
26
|
+
if (payload == null && allowNull)
|
|
27
|
+
return null;
|
|
28
|
+
if (Array.isArray(payload))
|
|
29
|
+
return payload;
|
|
30
|
+
if (payload && typeof payload === 'object' && payload.error) {
|
|
31
|
+
throw new CommandExecutionError(String(payload.error));
|
|
32
|
+
}
|
|
33
|
+
throw new CommandExecutionError(`Malformed ${source} payload`);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function parseJson3Segments(text) {
|
|
37
|
+
let data;
|
|
38
|
+
try {
|
|
39
|
+
data = JSON.parse(text);
|
|
40
|
+
}
|
|
41
|
+
catch (err) {
|
|
42
|
+
throw new CommandExecutionError(`Malformed json3 timedtext response: ${err?.message || err}`);
|
|
43
|
+
}
|
|
44
|
+
if (!Array.isArray(data?.events)) {
|
|
45
|
+
throw new CommandExecutionError('Malformed json3 timedtext response: missing events array');
|
|
46
|
+
}
|
|
47
|
+
const rows = [];
|
|
48
|
+
for (const event of data.events) {
|
|
49
|
+
const startMs = Number(event?.tStartMs || 0);
|
|
50
|
+
const durMs = Number(event?.dDurationMs || 0);
|
|
51
|
+
const segs = Array.isArray(event?.segs) ? event.segs : [];
|
|
52
|
+
const line = segs.map(seg => seg?.utf8 || '').join('').replace(/\s+/g, ' ').trim();
|
|
53
|
+
if (!line)
|
|
54
|
+
continue;
|
|
55
|
+
rows.push({
|
|
56
|
+
start: startMs / 1000,
|
|
57
|
+
end: (startMs + durMs) / 1000,
|
|
58
|
+
text: line,
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
return rows;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function timedtextUrlMatchesVideo(url, videoId) {
|
|
65
|
+
if (!videoId)
|
|
66
|
+
return true;
|
|
67
|
+
try {
|
|
68
|
+
const parsed = new URL(url);
|
|
69
|
+
return parsed.searchParams.get('v') === videoId;
|
|
70
|
+
}
|
|
71
|
+
catch {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function extractSegmentsFromNetworkCapture(entries, lang, videoId) {
|
|
77
|
+
const payload = unwrapBrowserResult(entries);
|
|
78
|
+
if (!Array.isArray(payload) || payload.length === 0)
|
|
79
|
+
return { segments: [] };
|
|
80
|
+
const wanted = String(lang || '').toLowerCase();
|
|
81
|
+
const wantedBase = wanted.split('-')[0];
|
|
82
|
+
const timedtext = payload
|
|
83
|
+
.filter((entry) => {
|
|
84
|
+
const url = String(entry?.url || '');
|
|
85
|
+
if (!url.includes('/api/timedtext'))
|
|
86
|
+
return false;
|
|
87
|
+
if (!url.includes('fmt=json3') || !url.includes('pot='))
|
|
88
|
+
return false;
|
|
89
|
+
// Scope to the current video — daemon-shared tabs can retain captured
|
|
90
|
+
// timedtext entries from prior YouTube SPA navigations that match
|
|
91
|
+
// the same lang. Use exact query-param equality rather than substring
|
|
92
|
+
// matching so v=<prefix> cannot match v=<prefix><suffix>.
|
|
93
|
+
if (!timedtextUrlMatchesVideo(url, videoId))
|
|
94
|
+
return false;
|
|
95
|
+
if (!wanted)
|
|
96
|
+
return true;
|
|
97
|
+
try {
|
|
98
|
+
const u = new URL(url);
|
|
99
|
+
const got = String(u.searchParams.get('lang') || '').toLowerCase();
|
|
100
|
+
const gotBase = got.split('-')[0];
|
|
101
|
+
return got === wanted || gotBase === wantedBase || wantedBase === got;
|
|
102
|
+
}
|
|
103
|
+
catch {
|
|
104
|
+
return false;
|
|
105
|
+
}
|
|
106
|
+
})
|
|
107
|
+
.reverse();
|
|
108
|
+
let malformed = '';
|
|
109
|
+
for (const entry of timedtext) {
|
|
110
|
+
const body = typeof entry?.responsePreview === 'string' ? entry.responsePreview : '';
|
|
111
|
+
if (!body)
|
|
112
|
+
continue;
|
|
113
|
+
try {
|
|
114
|
+
const parsed = parseJson3Segments(body);
|
|
115
|
+
if (parsed.length > 0)
|
|
116
|
+
return { segments: parsed };
|
|
117
|
+
}
|
|
118
|
+
catch (err) {
|
|
119
|
+
malformed = err?.message || String(err);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return malformed ? { error: malformed } : { segments: [] };
|
|
123
|
+
}
|
|
124
|
+
|
|
16
125
|
cli({
|
|
17
126
|
site: 'youtube',
|
|
18
127
|
name: 'transcript',
|
|
@@ -29,11 +138,266 @@ cli({
|
|
|
29
138
|
// so we let the renderer auto-detect columns from the data keys.
|
|
30
139
|
func: async (page, kwargs) => {
|
|
31
140
|
const videoId = parseVideoId(kwargs.url);
|
|
32
|
-
await prepareYoutubeApiPage(page);
|
|
33
141
|
const lang = kwargs.lang || '';
|
|
34
142
|
const mode = kwargs.mode || 'grouped';
|
|
35
|
-
|
|
36
|
-
const
|
|
143
|
+
const watchUrl = 'https://www.youtube.com/watch?v=' + encodeURIComponent(videoId);
|
|
144
|
+
const canCapture = typeof page.startNetworkCapture === 'function' && typeof page.readNetworkCapture === 'function';
|
|
145
|
+
if (canCapture) {
|
|
146
|
+
try {
|
|
147
|
+
await page.startNetworkCapture('/api/timedtext');
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
// Best-effort only. The in-page capture and XML fallback still run.
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
await page.goto(watchUrl, { waitUntil: 'none' });
|
|
154
|
+
await page.wait(3);
|
|
155
|
+
const playerResult = await page.evaluate(`
|
|
156
|
+
(async () => {
|
|
157
|
+
const langPref = ${JSON.stringify(lang)};
|
|
158
|
+
// Scope all timedtext URL matching to the current video. YouTube is an
|
|
159
|
+
// SPA, so watch→watch navigations preserve performance.getEntriesByType
|
|
160
|
+
// entries from prior videos. Without this check a stale same-language
|
|
161
|
+
// URL can be picked up by the polling loop before the current video's
|
|
162
|
+
// fetch hook fires, leaking the predecessor's captions.
|
|
163
|
+
const targetVideoId = ${JSON.stringify(videoId)};
|
|
164
|
+
const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
|
|
165
|
+
|
|
166
|
+
function textFromJson3Event(event) {
|
|
167
|
+
if (!Array.isArray(event?.segs)) return '';
|
|
168
|
+
return event.segs.map(seg => seg?.utf8 || '').join('').replace(/\\s+/g, ' ').trim();
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
function parseJson3(text) {
|
|
172
|
+
let data;
|
|
173
|
+
try {
|
|
174
|
+
data = JSON.parse(text);
|
|
175
|
+
} catch (err) {
|
|
176
|
+
return { error: 'Malformed json3 timedtext response: ' + (err?.message || String(err)) };
|
|
177
|
+
}
|
|
178
|
+
if (!Array.isArray(data.events)) {
|
|
179
|
+
return { error: 'Malformed json3 timedtext response: missing events array' };
|
|
180
|
+
}
|
|
181
|
+
const rows = [];
|
|
182
|
+
for (const event of data.events) {
|
|
183
|
+
const startMs = Number(event.tStartMs || 0);
|
|
184
|
+
const durMs = Number(event.dDurationMs || 0);
|
|
185
|
+
const text = textFromJson3Event(event);
|
|
186
|
+
if (!text) continue;
|
|
187
|
+
rows.push({
|
|
188
|
+
start: startMs / 1000,
|
|
189
|
+
end: (startMs + durMs) / 1000,
|
|
190
|
+
text,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
return { rows };
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
function timedtextUrlMatchesVideo(url) {
|
|
197
|
+
try {
|
|
198
|
+
const parsed = new URL(url, location.origin);
|
|
199
|
+
return parsed.searchParams.get('v') === targetVideoId;
|
|
200
|
+
} catch {
|
|
201
|
+
return false;
|
|
202
|
+
}
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
function captionTrackToPlayerTrack(track) {
|
|
206
|
+
if (!track?.languageCode) return null;
|
|
207
|
+
const name = track.name?.simpleText
|
|
208
|
+
|| (Array.isArray(track.name?.runs) ? track.name.runs.map(run => run?.text || '').join('') : '')
|
|
209
|
+
|| track.languageCode;
|
|
210
|
+
return {
|
|
211
|
+
displayName: name,
|
|
212
|
+
id: null,
|
|
213
|
+
is_default: false,
|
|
214
|
+
is_servable: false,
|
|
215
|
+
is_translateable: !!track.isTranslatable,
|
|
216
|
+
kind: track.kind || '',
|
|
217
|
+
languageCode: track.languageCode,
|
|
218
|
+
languageName: name,
|
|
219
|
+
name: '',
|
|
220
|
+
vss_id: track.vssId || ((track.kind === 'asr' ? 'a.' : '.') + track.languageCode),
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
function getTrackCandidates(player) {
|
|
225
|
+
const tracklist = player?.getOption?.('captions', 'tracklist');
|
|
226
|
+
if (Array.isArray(tracklist) && tracklist.length > 0) return tracklist;
|
|
227
|
+
const captionTracks = player?.getPlayerResponse?.()
|
|
228
|
+
?.captions?.playerCaptionsTracklistRenderer?.captionTracks;
|
|
229
|
+
if (!Array.isArray(captionTracks)) return [];
|
|
230
|
+
return captionTracks.map(captionTrackToPlayerTrack).filter(Boolean);
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function pickTrack(tracklist) {
|
|
234
|
+
if (!Array.isArray(tracklist) || tracklist.length === 0) return null;
|
|
235
|
+
if (langPref) {
|
|
236
|
+
return tracklist.find(t => t.languageCode === langPref)
|
|
237
|
+
|| tracklist.find(t => t.languageCode?.startsWith(langPref));
|
|
238
|
+
}
|
|
239
|
+
return tracklist.find(t => t.languageCode === 'en' && t.kind !== 'asr')
|
|
240
|
+
|| tracklist.find(t => t.languageCode === 'en')
|
|
241
|
+
|| tracklist.find(t => t.kind !== 'asr')
|
|
242
|
+
|| tracklist[0];
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
function findTimedtextUrl(track) {
|
|
246
|
+
const urls = performance.getEntriesByType('resource')
|
|
247
|
+
.map(entry => entry.name)
|
|
248
|
+
.filter(url => url.includes('/api/timedtext')
|
|
249
|
+
&& url.includes('fmt=json3')
|
|
250
|
+
&& url.includes('pot=')
|
|
251
|
+
&& timedtextUrlMatchesVideo(url));
|
|
252
|
+
if (!urls.length) return '';
|
|
253
|
+
if (track?.languageCode) {
|
|
254
|
+
const wanted = String(track.languageCode || '').toLowerCase();
|
|
255
|
+
const wantedBase = wanted.split('-')[0];
|
|
256
|
+
const match = [...urls].reverse().find((rawUrl) => {
|
|
257
|
+
try {
|
|
258
|
+
const u = new URL(rawUrl, location.origin);
|
|
259
|
+
const got = String(u.searchParams.get('lang') || '').toLowerCase();
|
|
260
|
+
const gotBase = got.split('-')[0];
|
|
261
|
+
return got === wanted || gotBase === wantedBase || wantedBase === got;
|
|
262
|
+
} catch {
|
|
263
|
+
return false;
|
|
264
|
+
}
|
|
265
|
+
});
|
|
266
|
+
if (match) return match;
|
|
267
|
+
}
|
|
268
|
+
return urls[urls.length - 1];
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
function isJson3TimedtextUrl(url, track) {
|
|
272
|
+
if (!url || !url.includes('/api/timedtext')) return false;
|
|
273
|
+
if (!url.includes('fmt=json3')) return false;
|
|
274
|
+
if (!url.includes('pot=')) return false;
|
|
275
|
+
if (!timedtextUrlMatchesVideo(url)) return false;
|
|
276
|
+
if (!track?.languageCode) return true;
|
|
277
|
+
try {
|
|
278
|
+
const u = new URL(url, location.origin);
|
|
279
|
+
const got = String(u.searchParams.get('lang') || '').toLowerCase();
|
|
280
|
+
const wanted = String(track.languageCode || '').toLowerCase();
|
|
281
|
+
const gotBase = got.split('-')[0];
|
|
282
|
+
const wantedBase = wanted.split('-')[0];
|
|
283
|
+
return got === wanted || gotBase === wantedBase || wantedBase === got;
|
|
284
|
+
} catch {
|
|
285
|
+
return false;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
const player = document.getElementById('movie_player');
|
|
290
|
+
if (!player?.getOption || !player?.setOption) {
|
|
291
|
+
return null;
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
let track = null;
|
|
295
|
+
for (let i = 0; i < 20; i++) {
|
|
296
|
+
track = pickTrack(getTrackCandidates(player));
|
|
297
|
+
if (track) break;
|
|
298
|
+
await sleep(500);
|
|
299
|
+
}
|
|
300
|
+
if (!track) return null;
|
|
301
|
+
|
|
302
|
+
const originalFetch = globalThis.fetch;
|
|
303
|
+
const boundOriginalFetch = originalFetch?.bind(globalThis);
|
|
304
|
+
const OriginalXHR = globalThis.XMLHttpRequest;
|
|
305
|
+
let capturedJson3Text = '';
|
|
306
|
+
try {
|
|
307
|
+
if (boundOriginalFetch) {
|
|
308
|
+
globalThis.fetch = async (...args) => {
|
|
309
|
+
const response = await boundOriginalFetch(...args);
|
|
310
|
+
try {
|
|
311
|
+
const req = args[0];
|
|
312
|
+
const reqUrl = typeof req === 'string' ? req : req?.url || '';
|
|
313
|
+
if (isJson3TimedtextUrl(reqUrl, track) && response?.ok) {
|
|
314
|
+
const text = await response.clone().text();
|
|
315
|
+
if (text && !capturedJson3Text) {
|
|
316
|
+
capturedJson3Text = text;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
} catch {}
|
|
320
|
+
return response;
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
if (OriginalXHR) {
|
|
324
|
+
globalThis.XMLHttpRequest = class TimedtextCaptureXHR extends OriginalXHR {
|
|
325
|
+
open(method, url, ...rest) {
|
|
326
|
+
this.__opencliTimedtextUrl = typeof url === 'string' ? url : '';
|
|
327
|
+
return super.open(method, url, ...rest);
|
|
328
|
+
}
|
|
329
|
+
send(...args) {
|
|
330
|
+
this.addEventListener('load', () => {
|
|
331
|
+
try {
|
|
332
|
+
const url = this.__opencliTimedtextUrl || this.responseURL || '';
|
|
333
|
+
if (!isJson3TimedtextUrl(url, track)) return;
|
|
334
|
+
if (this.status < 200 || this.status >= 300) return;
|
|
335
|
+
const text = typeof this.responseText === 'string' ? this.responseText : '';
|
|
336
|
+
if (text && !capturedJson3Text) {
|
|
337
|
+
capturedJson3Text = text;
|
|
338
|
+
}
|
|
339
|
+
} catch {}
|
|
340
|
+
});
|
|
341
|
+
return super.send(...args);
|
|
342
|
+
}
|
|
343
|
+
};
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Do not clear resource timings: some videos emit a valid timedtext URL
|
|
347
|
+
// before our polling loop starts; keeping existing entries avoids misses.
|
|
348
|
+
try { player.loadModule?.('captions'); } catch {}
|
|
349
|
+
await sleep(500);
|
|
350
|
+
try { player.setOption('captions', 'track', track); } catch {}
|
|
351
|
+
try { player.playVideo?.(); } catch {}
|
|
352
|
+
|
|
353
|
+
for (let i = 0; i < 30; i++) {
|
|
354
|
+
await sleep(500);
|
|
355
|
+
if (capturedJson3Text) {
|
|
356
|
+
const parsed = parseJson3(capturedJson3Text);
|
|
357
|
+
if (parsed.error) return { error: parsed.error };
|
|
358
|
+
if (parsed.rows.length > 0) return parsed.rows;
|
|
359
|
+
}
|
|
360
|
+
const url = findTimedtextUrl(track);
|
|
361
|
+
if (!url) continue;
|
|
362
|
+
const resp = await fetch(url, { credentials: 'include' });
|
|
363
|
+
if (!resp.ok) continue;
|
|
364
|
+
const text = await resp.text();
|
|
365
|
+
if (!text) continue;
|
|
366
|
+
const parsed = parseJson3(text);
|
|
367
|
+
if (parsed.error) return { error: parsed.error };
|
|
368
|
+
if (parsed.rows.length > 0) return parsed.rows;
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
return null;
|
|
372
|
+
} finally {
|
|
373
|
+
try { player?.pauseVideo?.(); } catch {}
|
|
374
|
+
if (originalFetch) globalThis.fetch = originalFetch;
|
|
375
|
+
if (OriginalXHR) globalThis.XMLHttpRequest = OriginalXHR;
|
|
376
|
+
}
|
|
377
|
+
})()
|
|
378
|
+
`);
|
|
379
|
+
let segments = normalizeSegmentsPayload(playerResult, 'player caption extraction', { allowNull: true });
|
|
380
|
+
if (!segments && canCapture) {
|
|
381
|
+
try {
|
|
382
|
+
const captured = extractSegmentsFromNetworkCapture(await page.readNetworkCapture(), lang, videoId);
|
|
383
|
+
if (captured.error) {
|
|
384
|
+
throw new CommandExecutionError(captured.error);
|
|
385
|
+
}
|
|
386
|
+
if (captured.segments.length > 0) {
|
|
387
|
+
segments = captured.segments;
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
catch (err) {
|
|
391
|
+
if (err instanceof CommandExecutionError)
|
|
392
|
+
throw err;
|
|
393
|
+
// Keep existing fallback path when capture is unavailable.
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
if (!segments) {
|
|
397
|
+
await prepareYoutubeApiPage(page);
|
|
398
|
+
}
|
|
399
|
+
// Fallback: get caption track URL from watch page HTML
|
|
400
|
+
const captionData = segments ? null : unwrapBrowserResult(await page.evaluate(`
|
|
37
401
|
(async () => {
|
|
38
402
|
const extractJsonAssignmentFromHtml = ${extractJsonAssignmentFromHtml.toString()};
|
|
39
403
|
|
|
@@ -74,26 +438,37 @@ cli({
|
|
|
74
438
|
langPrefixMatched: !!(langPref && track.languageCode !== langPref && track.languageCode.startsWith(langPref))
|
|
75
439
|
};
|
|
76
440
|
})()
|
|
77
|
-
`);
|
|
78
|
-
if (!captionData || typeof captionData
|
|
79
|
-
throw new CommandExecutionError(`Failed to get caption info: ${typeof captionData === 'string' ? captionData : '
|
|
441
|
+
`));
|
|
442
|
+
if (!segments && (!captionData || typeof captionData !== 'object' || Array.isArray(captionData))) {
|
|
443
|
+
throw new CommandExecutionError(`Failed to get caption info: ${typeof captionData === 'string' ? captionData : 'malformed response'}`);
|
|
80
444
|
}
|
|
81
|
-
if (captionData
|
|
82
|
-
|
|
445
|
+
if (captionData?.error) {
|
|
446
|
+
const msg = `${captionData.error}${captionData.available ? ' (available: ' + captionData.available.join(', ') + ')' : ''}`;
|
|
447
|
+
// "No captions available" 是合法 empty 数据条件(作者没开字幕 + YT 没自动生成),
|
|
448
|
+
// 与 bilibili subtitle 的 EmptyResultError 同模式。下游应按 code EMPTY_RESULT 跳过
|
|
449
|
+
// 重试和 softFail 计数。其它 error(HTTP / parse / 短暂空响应)仍按 fetch 失败抛。
|
|
450
|
+
if (captionData.error === 'No captions available for this video') {
|
|
451
|
+
throw new EmptyResultError('youtube transcript', '该视频没有字幕(作者未开启 + 无自动字幕)。');
|
|
452
|
+
}
|
|
453
|
+
throw new CommandExecutionError(msg);
|
|
454
|
+
}
|
|
455
|
+
if (!segments && typeof captionData?.captionUrl !== 'string') {
|
|
456
|
+
throw new CommandExecutionError('Malformed caption info payload');
|
|
83
457
|
}
|
|
84
458
|
// Warn if --lang was specified but not matched
|
|
85
|
-
if (captionData
|
|
459
|
+
if (captionData?.requestedLang && !captionData.langMatched && !captionData.langPrefixMatched) {
|
|
86
460
|
console.error(`Warning: --lang "${captionData.requestedLang}" not found. Using "${captionData.language}" instead. Available: ${captionData.available.join(', ')}`);
|
|
87
461
|
}
|
|
88
462
|
// Step 2: Fetch caption XML and parse segments
|
|
89
463
|
// Ensure caption URL requests srv3 XML format — YouTube may return empty
|
|
90
464
|
// responses when no explicit format is specified.
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
465
|
+
if (!segments) {
|
|
466
|
+
const originalCaptionUrl = captionData.captionUrl;
|
|
467
|
+
let captionUrl = originalCaptionUrl;
|
|
468
|
+
if (!/[&?]fmt=/.test(originalCaptionUrl)) {
|
|
469
|
+
captionUrl = originalCaptionUrl + (originalCaptionUrl.includes('?') ? '&' : '?') + 'fmt=srv3';
|
|
470
|
+
}
|
|
471
|
+
segments = normalizeSegmentsPayload(await page.evaluate(`
|
|
97
472
|
(async () => {
|
|
98
473
|
async function fetchCaptionXml(url) {
|
|
99
474
|
const resp = await fetch(url);
|
|
@@ -181,9 +556,7 @@ cli({
|
|
|
181
556
|
|
|
182
557
|
return results;
|
|
183
558
|
})()
|
|
184
|
-
`);
|
|
185
|
-
if (!Array.isArray(segments)) {
|
|
186
|
-
throw new CommandExecutionError(segments?.error || 'Failed to parse caption segments');
|
|
559
|
+
`), 'caption XML extraction');
|
|
187
560
|
}
|
|
188
561
|
if (segments.length === 0) {
|
|
189
562
|
throw new EmptyResultError('youtube transcript');
|
|
@@ -192,7 +565,7 @@ cli({
|
|
|
192
565
|
let chapters = [];
|
|
193
566
|
if (mode === 'grouped') {
|
|
194
567
|
try {
|
|
195
|
-
const chapterData = await page.evaluate(`
|
|
568
|
+
const chapterData = unwrapBrowserResult(await page.evaluate(`
|
|
196
569
|
(async () => {
|
|
197
570
|
const cfg = window.ytcfg?.data_ || {};
|
|
198
571
|
const apiKey = cfg.INNERTUBE_API_KEY;
|
|
@@ -256,7 +629,7 @@ cli({
|
|
|
256
629
|
}
|
|
257
630
|
return chapters;
|
|
258
631
|
})()
|
|
259
|
-
`);
|
|
632
|
+
`));
|
|
260
633
|
if (Array.isArray(chapterData)) {
|
|
261
634
|
chapters = chapterData;
|
|
262
635
|
}
|