@jackwener/opencli 1.7.22 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/README.md +30 -148
  2. package/README.zh-CN.md +37 -211
  3. package/cli-manifest.json +6423 -4260
  4. package/clis/12306/me.js +73 -0
  5. package/clis/12306/orders.js +96 -0
  6. package/clis/12306/passengers.js +90 -0
  7. package/clis/12306/price.js +166 -0
  8. package/clis/12306/stations.js +66 -0
  9. package/clis/12306/train.js +91 -0
  10. package/clis/12306/trains.js +119 -0
  11. package/clis/12306/utils.js +272 -0
  12. package/clis/12306/utils.test.js +331 -0
  13. package/clis/36kr/article.js +6 -3
  14. package/clis/36kr/article.test.js +46 -0
  15. package/clis/apple-podcasts/commands.test.js +20 -0
  16. package/clis/apple-podcasts/search.js +2 -2
  17. package/clis/barchart/greeks.js +144 -56
  18. package/clis/barchart/greeks.test.js +138 -0
  19. package/clis/bilibili/summary.js +167 -0
  20. package/clis/bilibili/summary.test.js +210 -0
  21. package/clis/booking/booking.test.js +356 -0
  22. package/clis/booking/search.js +351 -0
  23. package/clis/chatgpt/envelope.test.js +108 -0
  24. package/clis/chatgpt/image.js +2 -2
  25. package/clis/chatgpt/image.test.js +6 -0
  26. package/clis/chatgpt/utils.js +148 -41
  27. package/clis/chatgpt/utils.test.js +92 -2
  28. package/clis/douyin/_shared/browser-fetch.js +44 -20
  29. package/clis/douyin/_shared/browser-fetch.test.js +22 -1
  30. package/clis/douyin/_shared/evaluate-result.js +16 -0
  31. package/clis/douyin/_shared/tos-upload.js +105 -69
  32. package/clis/douyin/_shared/vod-upload.js +212 -0
  33. package/clis/douyin/_shared/vod-upload.test.js +38 -0
  34. package/clis/douyin/delete.js +137 -4
  35. package/clis/douyin/delete.test.js +90 -1
  36. package/clis/douyin/publish-upload-id.test.js +170 -0
  37. package/clis/douyin/publish.js +88 -42
  38. package/clis/douyin/user-videos.js +9 -2
  39. package/clis/douyin/user-videos.test.js +43 -0
  40. package/clis/flomo/memos.js +228 -0
  41. package/clis/flomo/memos.test.js +144 -0
  42. package/clis/gitee/search.js +2 -2
  43. package/clis/gitee/search.test.js +65 -0
  44. package/clis/jike/post.js +27 -17
  45. package/clis/jike/read.test.js +86 -0
  46. package/clis/jike/topic.js +32 -19
  47. package/clis/jike/user.js +33 -20
  48. package/clis/lesswrong/comments.js +1 -1
  49. package/clis/lesswrong/curated.js +1 -1
  50. package/clis/lesswrong/frontpage.js +1 -1
  51. package/clis/lesswrong/frontpage.test.js +37 -0
  52. package/clis/lesswrong/new.js +1 -1
  53. package/clis/lesswrong/read.js +1 -1
  54. package/clis/lesswrong/sequences.js +1 -1
  55. package/clis/lesswrong/shortform.js +1 -1
  56. package/clis/lesswrong/tag.js +1 -1
  57. package/clis/lesswrong/top-month.js +1 -1
  58. package/clis/lesswrong/top-week.js +1 -1
  59. package/clis/lesswrong/top-year.js +1 -1
  60. package/clis/lesswrong/top.js +1 -1
  61. package/clis/linkedin/connect.js +401 -0
  62. package/clis/linkedin/connect.test.js +213 -0
  63. package/clis/linkedin/inbox.js +234 -0
  64. package/clis/linkedin/inbox.test.js +152 -0
  65. package/clis/linkedin/people-search.js +262 -0
  66. package/clis/linkedin/people-search.test.js +216 -0
  67. package/clis/linkedin/safe-send.js +357 -0
  68. package/clis/linkedin/safe-send.test.js +204 -0
  69. package/clis/linkedin/salesnav-inbox.js +210 -0
  70. package/clis/linkedin/salesnav-inbox.test.js +113 -0
  71. package/clis/linkedin/salesnav-message.js +360 -0
  72. package/clis/linkedin/salesnav-message.test.js +172 -0
  73. package/clis/linkedin/salesnav-search.js +186 -0
  74. package/clis/linkedin/salesnav-search.test.js +76 -0
  75. package/clis/linkedin/salesnav-thread.js +212 -0
  76. package/clis/linkedin/salesnav-thread.test.js +79 -0
  77. package/clis/linkedin/sent-invitations.js +92 -0
  78. package/clis/linkedin/sent-invitations.test.js +62 -0
  79. package/clis/linkedin/thread-snapshot.js +214 -0
  80. package/clis/linkedin/thread-snapshot.test.js +89 -0
  81. package/clis/linkedin-learning/course.js +138 -0
  82. package/clis/linkedin-learning/course.test.js +114 -0
  83. package/clis/linkedin-learning/search.js +155 -0
  84. package/clis/linkedin-learning/search.test.js +144 -0
  85. package/clis/linkedin-learning/trending.js +133 -0
  86. package/clis/linkedin-learning/trending.test.js +123 -0
  87. package/clis/powerchina/search.js +3 -3
  88. package/clis/powerchina/search.test.js +27 -1
  89. package/clis/reddit/extract-media.test.js +149 -0
  90. package/clis/reddit/frontpage.js +47 -9
  91. package/clis/reddit/frontpage.test.js +34 -0
  92. package/clis/reddit/home.js +31 -1
  93. package/clis/reddit/home.test.js +46 -3
  94. package/clis/reddit/hot.js +32 -1
  95. package/clis/reddit/hot.test.js +15 -1
  96. package/clis/reddit/popular.js +39 -1
  97. package/clis/reddit/popular.test.js +26 -0
  98. package/clis/reddit/saved.js +1 -1
  99. package/clis/reddit/search.js +38 -1
  100. package/clis/reddit/search.test.js +26 -0
  101. package/clis/reddit/subreddit.js +52 -7
  102. package/clis/reddit/subreddit.test.js +31 -0
  103. package/clis/reddit/subscribed.js +165 -0
  104. package/clis/reddit/subscribed.test.js +168 -0
  105. package/clis/reddit/upvoted.js +1 -1
  106. package/clis/suno/commands.test.js +188 -0
  107. package/clis/suno/download.js +140 -0
  108. package/clis/suno/download.test.js +151 -0
  109. package/clis/suno/generate.js +226 -0
  110. package/clis/suno/generate.test.js +243 -0
  111. package/clis/suno/list.js +79 -0
  112. package/clis/suno/status.js +62 -0
  113. package/clis/suno/utils.js +540 -0
  114. package/clis/suno/utils.test.js +223 -0
  115. package/clis/twitter/device-follow.js +193 -0
  116. package/clis/twitter/device-follow.test.js +287 -0
  117. package/clis/twitter/download.js +443 -73
  118. package/clis/twitter/download.test.js +457 -0
  119. package/clis/twitter/list-create.js +155 -0
  120. package/clis/twitter/list-create.test.js +169 -0
  121. package/clis/twitter/list-remove.js +12 -5
  122. package/clis/twitter/list-remove.test.js +74 -0
  123. package/clis/twitter/list-tweets.js +6 -2
  124. package/clis/twitter/list-tweets.test.js +41 -1
  125. package/clis/twitter/lists.js +31 -4
  126. package/clis/twitter/lists.test.js +152 -16
  127. package/clis/twitter/search.js +6 -2
  128. package/clis/twitter/search.test.js +6 -0
  129. package/clis/twitter/shared.js +144 -0
  130. package/clis/twitter/shared.test.js +429 -1
  131. package/clis/twitter/thread.js +10 -2
  132. package/clis/twitter/thread.test.js +58 -0
  133. package/clis/twitter/timeline.js +6 -2
  134. package/clis/twitter/timeline.test.js +2 -0
  135. package/clis/twitter/tweets.js +3 -2
  136. package/clis/twitter/tweets.test.js +1 -1
  137. package/clis/weibo/delete.js +172 -0
  138. package/clis/weibo/delete.test.js +94 -0
  139. package/clis/weibo/publish.js +37 -14
  140. package/clis/weibo/publish.test.js +14 -5
  141. package/clis/weibo/user-posts.js +234 -0
  142. package/clis/weibo/user-posts.test.js +92 -0
  143. package/clis/weread/search-regression.test.js +18 -11
  144. package/clis/weread/search.js +15 -7
  145. package/clis/weread-official/book.js +135 -0
  146. package/clis/weread-official/commands.test.js +385 -0
  147. package/clis/weread-official/discover.js +107 -0
  148. package/clis/weread-official/list-apis.js +95 -0
  149. package/clis/weread-official/notes.js +171 -0
  150. package/clis/weread-official/readdata.js +158 -0
  151. package/clis/weread-official/review.js +93 -0
  152. package/clis/weread-official/search.js +106 -0
  153. package/clis/weread-official/shelf.js +97 -0
  154. package/clis/weread-official/utils.js +293 -0
  155. package/clis/weread-official/utils.test.js +242 -0
  156. package/clis/wikipedia/trending.js +7 -3
  157. package/clis/wikipedia/trending.test.js +57 -0
  158. package/clis/xianyu/chat.js +24 -109
  159. package/clis/xianyu/chat.test.js +5 -0
  160. package/clis/xianyu/im.js +322 -0
  161. package/clis/xianyu/im.test.js +253 -0
  162. package/clis/xianyu/inbox.js +96 -0
  163. package/clis/xianyu/messages.js +91 -0
  164. package/clis/xianyu/reply.js +82 -0
  165. package/clis/xiaohongshu/creator-note-detail.js +2 -1
  166. package/clis/xiaohongshu/creator-note-detail.test.js +11 -0
  167. package/clis/xiaohongshu/creator-notes-summary.js +2 -1
  168. package/clis/xiaohongshu/creator-notes-summary.test.js +7 -0
  169. package/clis/xiaohongshu/creator-notes.js +2 -1
  170. package/clis/xiaohongshu/creator-notes.test.js +12 -0
  171. package/clis/xiaohongshu/creator-stats.js +2 -1
  172. package/clis/xiaohongshu/creator-stats.test.js +24 -0
  173. package/clis/xiaohongshu/delete-note.js +260 -0
  174. package/clis/xiaohongshu/delete-note.test.js +172 -0
  175. package/clis/xiaohongshu/publish.js +48 -8
  176. package/clis/xiaohongshu/publish.test.js +65 -10
  177. package/clis/xiaohongshu/user-helpers.test.js +41 -0
  178. package/clis/xiaohongshu/user.js +27 -4
  179. package/clis/xiaoyuzhou/download.js +1 -1
  180. package/clis/xiaoyuzhou/transcript.js +1 -1
  181. package/clis/youdao/note.js +258 -0
  182. package/clis/youdao/note.test.js +99 -0
  183. package/clis/youtube/transcript.js +397 -24
  184. package/clis/youtube/transcript.test.js +196 -6
  185. package/clis/zhihu/answer-comments.js +299 -0
  186. package/clis/zhihu/answer-comments.test.js +287 -0
  187. package/clis/zhihu/answer-detail.js +12 -0
  188. package/clis/zhihu/answer-detail.test.js +8 -0
  189. package/clis/zhihu/collection.js +15 -2
  190. package/clis/zhihu/collection.test.js +46 -0
  191. package/clis/zhihu/download.js +1 -1
  192. package/clis/zhihu/question.js +42 -9
  193. package/clis/zhihu/question.test.js +111 -9
  194. package/clis/zhihu/search.js +206 -43
  195. package/clis/zhihu/search.test.js +198 -0
  196. package/dist/src/browser/errors.js +4 -2
  197. package/dist/src/browser/errors.test.js +6 -0
  198. package/dist/src/browser/page.js +30 -4
  199. package/dist/src/browser/page.test.js +42 -0
  200. package/dist/src/browser/utils.d.ts +1 -1
  201. package/dist/src/cli-argv-preprocess.d.ts +26 -0
  202. package/dist/src/cli-argv-preprocess.js +138 -0
  203. package/dist/src/cli-argv-preprocess.test.js +79 -0
  204. package/dist/src/convention-audit.js +15 -8
  205. package/dist/src/convention-audit.test.js +21 -0
  206. package/dist/src/download/media-download.js +15 -2
  207. package/dist/src/download/media-download.test.d.ts +1 -0
  208. package/dist/src/download/media-download.test.js +110 -0
  209. package/dist/src/electron-apps.js +1 -1
  210. package/dist/src/electron-apps.test.js +7 -2
  211. package/dist/src/errors.d.ts +17 -0
  212. package/dist/src/errors.js +22 -0
  213. package/dist/src/external-clis.yaml +8 -0
  214. package/dist/src/main.js +14 -2
  215. package/dist/src/utils.d.ts +43 -0
  216. package/dist/src/utils.js +97 -0
  217. package/dist/src/utils.test.d.ts +1 -0
  218. package/dist/src/utils.test.js +155 -0
  219. package/package.json +8 -2
  220. package/scripts/silent-column-drop-baseline.json +0 -52
  221. package/scripts/typed-error-lint-baseline.json +28 -380
  222. package/clis/slock/_utils.js +0 -12
@@ -1,9 +1,9 @@
1
1
  /**
2
- * YouTube transcript — extracts caption tracks from watch page bootstrap data.
2
+ * YouTube transcript — extracts caption tracks through the watch player.
3
3
  *
4
- * The old Android InnerTube client path stopped reliably returning captions.
5
- * We now match youtube/video.js: fetch watch HTML with the browser session and
6
- * parse ytInitialPlayerResponse.captions.playerCaptionsTracklistRenderer.
4
+ * YouTube timedtext URLs can require player-generated PO tokens. Load the
5
+ * watch page, ask the player captions module to select a track, then reuse the
6
+ * generated json3 timedtext URL before falling back to watch HTML captions.
7
7
  *
8
8
  * Modes:
9
9
  * --mode grouped (default): sentences merged, speaker detection, chapters
@@ -13,6 +13,115 @@ import { cli, Strategy } from '@jackwener/opencli/registry';
13
13
  import { extractJsonAssignmentFromHtml, parseVideoId, prepareYoutubeApiPage } from './utils.js';
14
14
  import { groupTranscriptSegments, formatGroupedTranscript, } from './transcript-group.js';
15
15
  import { CommandExecutionError, EmptyResultError } from '@jackwener/opencli/errors';
16
+
17
+ function unwrapBrowserResult(value) {
18
+ if (value && typeof value === 'object' && 'session' in value && 'data' in value) {
19
+ return value.data;
20
+ }
21
+ return value;
22
+ }
23
+
24
+ function normalizeSegmentsPayload(value, source, { allowNull = false } = {}) {
25
+ const payload = unwrapBrowserResult(value);
26
+ if (payload == null && allowNull)
27
+ return null;
28
+ if (Array.isArray(payload))
29
+ return payload;
30
+ if (payload && typeof payload === 'object' && payload.error) {
31
+ throw new CommandExecutionError(String(payload.error));
32
+ }
33
+ throw new CommandExecutionError(`Malformed ${source} payload`);
34
+ }
35
+
36
+ function parseJson3Segments(text) {
37
+ let data;
38
+ try {
39
+ data = JSON.parse(text);
40
+ }
41
+ catch (err) {
42
+ throw new CommandExecutionError(`Malformed json3 timedtext response: ${err?.message || err}`);
43
+ }
44
+ if (!Array.isArray(data?.events)) {
45
+ throw new CommandExecutionError('Malformed json3 timedtext response: missing events array');
46
+ }
47
+ const rows = [];
48
+ for (const event of data.events) {
49
+ const startMs = Number(event?.tStartMs || 0);
50
+ const durMs = Number(event?.dDurationMs || 0);
51
+ const segs = Array.isArray(event?.segs) ? event.segs : [];
52
+ const line = segs.map(seg => seg?.utf8 || '').join('').replace(/\s+/g, ' ').trim();
53
+ if (!line)
54
+ continue;
55
+ rows.push({
56
+ start: startMs / 1000,
57
+ end: (startMs + durMs) / 1000,
58
+ text: line,
59
+ });
60
+ }
61
+ return rows;
62
+ }
63
+
64
+ function timedtextUrlMatchesVideo(url, videoId) {
65
+ if (!videoId)
66
+ return true;
67
+ try {
68
+ const parsed = new URL(url);
69
+ return parsed.searchParams.get('v') === videoId;
70
+ }
71
+ catch {
72
+ return false;
73
+ }
74
+ }
75
+
76
+ function extractSegmentsFromNetworkCapture(entries, lang, videoId) {
77
+ const payload = unwrapBrowserResult(entries);
78
+ if (!Array.isArray(payload) || payload.length === 0)
79
+ return { segments: [] };
80
+ const wanted = String(lang || '').toLowerCase();
81
+ const wantedBase = wanted.split('-')[0];
82
+ const timedtext = payload
83
+ .filter((entry) => {
84
+ const url = String(entry?.url || '');
85
+ if (!url.includes('/api/timedtext'))
86
+ return false;
87
+ if (!url.includes('fmt=json3') || !url.includes('pot='))
88
+ return false;
89
+ // Scope to the current video — daemon-shared tabs can retain captured
90
+ // timedtext entries from prior YouTube SPA navigations that match
91
+ // the same lang. Use exact query-param equality rather than substring
92
+ // matching so v=<prefix> cannot match v=<prefix><suffix>.
93
+ if (!timedtextUrlMatchesVideo(url, videoId))
94
+ return false;
95
+ if (!wanted)
96
+ return true;
97
+ try {
98
+ const u = new URL(url);
99
+ const got = String(u.searchParams.get('lang') || '').toLowerCase();
100
+ const gotBase = got.split('-')[0];
101
+ return got === wanted || gotBase === wantedBase || wantedBase === got;
102
+ }
103
+ catch {
104
+ return false;
105
+ }
106
+ })
107
+ .reverse();
108
+ let malformed = '';
109
+ for (const entry of timedtext) {
110
+ const body = typeof entry?.responsePreview === 'string' ? entry.responsePreview : '';
111
+ if (!body)
112
+ continue;
113
+ try {
114
+ const parsed = parseJson3Segments(body);
115
+ if (parsed.length > 0)
116
+ return { segments: parsed };
117
+ }
118
+ catch (err) {
119
+ malformed = err?.message || String(err);
120
+ }
121
+ }
122
+ return malformed ? { error: malformed } : { segments: [] };
123
+ }
124
+
16
125
  cli({
17
126
  site: 'youtube',
18
127
  name: 'transcript',
@@ -29,11 +138,266 @@ cli({
29
138
  // so we let the renderer auto-detect columns from the data keys.
30
139
  func: async (page, kwargs) => {
31
140
  const videoId = parseVideoId(kwargs.url);
32
- await prepareYoutubeApiPage(page);
33
141
  const lang = kwargs.lang || '';
34
142
  const mode = kwargs.mode || 'grouped';
35
- // Step 1: Get caption track URL from watch page HTML
36
- const captionData = await page.evaluate(`
143
+ const watchUrl = 'https://www.youtube.com/watch?v=' + encodeURIComponent(videoId);
144
+ const canCapture = typeof page.startNetworkCapture === 'function' && typeof page.readNetworkCapture === 'function';
145
+ if (canCapture) {
146
+ try {
147
+ await page.startNetworkCapture('/api/timedtext');
148
+ }
149
+ catch {
150
+ // Best-effort only. The in-page capture and XML fallback still run.
151
+ }
152
+ }
153
+ await page.goto(watchUrl, { waitUntil: 'none' });
154
+ await page.wait(3);
155
+ const playerResult = await page.evaluate(`
156
+ (async () => {
157
+ const langPref = ${JSON.stringify(lang)};
158
+ // Scope all timedtext URL matching to the current video. YouTube is an
159
+ // SPA, so watch→watch navigations preserve performance.getEntriesByType
160
+ // entries from prior videos. Without this check a stale same-language
161
+ // URL can be picked up by the polling loop before the current video's
162
+ // fetch hook fires, leaking the predecessor's captions.
163
+ const targetVideoId = ${JSON.stringify(videoId)};
164
+ const sleep = (ms) => new Promise(resolve => setTimeout(resolve, ms));
165
+
166
+ function textFromJson3Event(event) {
167
+ if (!Array.isArray(event?.segs)) return '';
168
+ return event.segs.map(seg => seg?.utf8 || '').join('').replace(/\\s+/g, ' ').trim();
169
+ }
170
+
171
+ function parseJson3(text) {
172
+ let data;
173
+ try {
174
+ data = JSON.parse(text);
175
+ } catch (err) {
176
+ return { error: 'Malformed json3 timedtext response: ' + (err?.message || String(err)) };
177
+ }
178
+ if (!Array.isArray(data.events)) {
179
+ return { error: 'Malformed json3 timedtext response: missing events array' };
180
+ }
181
+ const rows = [];
182
+ for (const event of data.events) {
183
+ const startMs = Number(event.tStartMs || 0);
184
+ const durMs = Number(event.dDurationMs || 0);
185
+ const text = textFromJson3Event(event);
186
+ if (!text) continue;
187
+ rows.push({
188
+ start: startMs / 1000,
189
+ end: (startMs + durMs) / 1000,
190
+ text,
191
+ });
192
+ }
193
+ return { rows };
194
+ }
195
+
196
+ function timedtextUrlMatchesVideo(url) {
197
+ try {
198
+ const parsed = new URL(url, location.origin);
199
+ return parsed.searchParams.get('v') === targetVideoId;
200
+ } catch {
201
+ return false;
202
+ }
203
+ }
204
+
205
+ function captionTrackToPlayerTrack(track) {
206
+ if (!track?.languageCode) return null;
207
+ const name = track.name?.simpleText
208
+ || (Array.isArray(track.name?.runs) ? track.name.runs.map(run => run?.text || '').join('') : '')
209
+ || track.languageCode;
210
+ return {
211
+ displayName: name,
212
+ id: null,
213
+ is_default: false,
214
+ is_servable: false,
215
+ is_translateable: !!track.isTranslatable,
216
+ kind: track.kind || '',
217
+ languageCode: track.languageCode,
218
+ languageName: name,
219
+ name: '',
220
+ vss_id: track.vssId || ((track.kind === 'asr' ? 'a.' : '.') + track.languageCode),
221
+ };
222
+ }
223
+
224
+ function getTrackCandidates(player) {
225
+ const tracklist = player?.getOption?.('captions', 'tracklist');
226
+ if (Array.isArray(tracklist) && tracklist.length > 0) return tracklist;
227
+ const captionTracks = player?.getPlayerResponse?.()
228
+ ?.captions?.playerCaptionsTracklistRenderer?.captionTracks;
229
+ if (!Array.isArray(captionTracks)) return [];
230
+ return captionTracks.map(captionTrackToPlayerTrack).filter(Boolean);
231
+ }
232
+
233
+ function pickTrack(tracklist) {
234
+ if (!Array.isArray(tracklist) || tracklist.length === 0) return null;
235
+ if (langPref) {
236
+ return tracklist.find(t => t.languageCode === langPref)
237
+ || tracklist.find(t => t.languageCode?.startsWith(langPref));
238
+ }
239
+ return tracklist.find(t => t.languageCode === 'en' && t.kind !== 'asr')
240
+ || tracklist.find(t => t.languageCode === 'en')
241
+ || tracklist.find(t => t.kind !== 'asr')
242
+ || tracklist[0];
243
+ }
244
+
245
+ function findTimedtextUrl(track) {
246
+ const urls = performance.getEntriesByType('resource')
247
+ .map(entry => entry.name)
248
+ .filter(url => url.includes('/api/timedtext')
249
+ && url.includes('fmt=json3')
250
+ && url.includes('pot=')
251
+ && timedtextUrlMatchesVideo(url));
252
+ if (!urls.length) return '';
253
+ if (track?.languageCode) {
254
+ const wanted = String(track.languageCode || '').toLowerCase();
255
+ const wantedBase = wanted.split('-')[0];
256
+ const match = [...urls].reverse().find((rawUrl) => {
257
+ try {
258
+ const u = new URL(rawUrl, location.origin);
259
+ const got = String(u.searchParams.get('lang') || '').toLowerCase();
260
+ const gotBase = got.split('-')[0];
261
+ return got === wanted || gotBase === wantedBase || wantedBase === got;
262
+ } catch {
263
+ return false;
264
+ }
265
+ });
266
+ if (match) return match;
267
+ }
268
+ return urls[urls.length - 1];
269
+ }
270
+
271
+ function isJson3TimedtextUrl(url, track) {
272
+ if (!url || !url.includes('/api/timedtext')) return false;
273
+ if (!url.includes('fmt=json3')) return false;
274
+ if (!url.includes('pot=')) return false;
275
+ if (!timedtextUrlMatchesVideo(url)) return false;
276
+ if (!track?.languageCode) return true;
277
+ try {
278
+ const u = new URL(url, location.origin);
279
+ const got = String(u.searchParams.get('lang') || '').toLowerCase();
280
+ const wanted = String(track.languageCode || '').toLowerCase();
281
+ const gotBase = got.split('-')[0];
282
+ const wantedBase = wanted.split('-')[0];
283
+ return got === wanted || gotBase === wantedBase || wantedBase === got;
284
+ } catch {
285
+ return false;
286
+ }
287
+ }
288
+
289
+ const player = document.getElementById('movie_player');
290
+ if (!player?.getOption || !player?.setOption) {
291
+ return null;
292
+ }
293
+
294
+ let track = null;
295
+ for (let i = 0; i < 20; i++) {
296
+ track = pickTrack(getTrackCandidates(player));
297
+ if (track) break;
298
+ await sleep(500);
299
+ }
300
+ if (!track) return null;
301
+
302
+ const originalFetch = globalThis.fetch;
303
+ const boundOriginalFetch = originalFetch?.bind(globalThis);
304
+ const OriginalXHR = globalThis.XMLHttpRequest;
305
+ let capturedJson3Text = '';
306
+ try {
307
+ if (boundOriginalFetch) {
308
+ globalThis.fetch = async (...args) => {
309
+ const response = await boundOriginalFetch(...args);
310
+ try {
311
+ const req = args[0];
312
+ const reqUrl = typeof req === 'string' ? req : req?.url || '';
313
+ if (isJson3TimedtextUrl(reqUrl, track) && response?.ok) {
314
+ const text = await response.clone().text();
315
+ if (text && !capturedJson3Text) {
316
+ capturedJson3Text = text;
317
+ }
318
+ }
319
+ } catch {}
320
+ return response;
321
+ };
322
+ }
323
+ if (OriginalXHR) {
324
+ globalThis.XMLHttpRequest = class TimedtextCaptureXHR extends OriginalXHR {
325
+ open(method, url, ...rest) {
326
+ this.__opencliTimedtextUrl = typeof url === 'string' ? url : '';
327
+ return super.open(method, url, ...rest);
328
+ }
329
+ send(...args) {
330
+ this.addEventListener('load', () => {
331
+ try {
332
+ const url = this.__opencliTimedtextUrl || this.responseURL || '';
333
+ if (!isJson3TimedtextUrl(url, track)) return;
334
+ if (this.status < 200 || this.status >= 300) return;
335
+ const text = typeof this.responseText === 'string' ? this.responseText : '';
336
+ if (text && !capturedJson3Text) {
337
+ capturedJson3Text = text;
338
+ }
339
+ } catch {}
340
+ });
341
+ return super.send(...args);
342
+ }
343
+ };
344
+ }
345
+
346
+ // Do not clear resource timings: some videos emit a valid timedtext URL
347
+ // before our polling loop starts; keeping existing entries avoids misses.
348
+ try { player.loadModule?.('captions'); } catch {}
349
+ await sleep(500);
350
+ try { player.setOption('captions', 'track', track); } catch {}
351
+ try { player.playVideo?.(); } catch {}
352
+
353
+ for (let i = 0; i < 30; i++) {
354
+ await sleep(500);
355
+ if (capturedJson3Text) {
356
+ const parsed = parseJson3(capturedJson3Text);
357
+ if (parsed.error) return { error: parsed.error };
358
+ if (parsed.rows.length > 0) return parsed.rows;
359
+ }
360
+ const url = findTimedtextUrl(track);
361
+ if (!url) continue;
362
+ const resp = await fetch(url, { credentials: 'include' });
363
+ if (!resp.ok) continue;
364
+ const text = await resp.text();
365
+ if (!text) continue;
366
+ const parsed = parseJson3(text);
367
+ if (parsed.error) return { error: parsed.error };
368
+ if (parsed.rows.length > 0) return parsed.rows;
369
+ }
370
+
371
+ return null;
372
+ } finally {
373
+ try { player?.pauseVideo?.(); } catch {}
374
+ if (originalFetch) globalThis.fetch = originalFetch;
375
+ if (OriginalXHR) globalThis.XMLHttpRequest = OriginalXHR;
376
+ }
377
+ })()
378
+ `);
379
+ let segments = normalizeSegmentsPayload(playerResult, 'player caption extraction', { allowNull: true });
380
+ if (!segments && canCapture) {
381
+ try {
382
+ const captured = extractSegmentsFromNetworkCapture(await page.readNetworkCapture(), lang, videoId);
383
+ if (captured.error) {
384
+ throw new CommandExecutionError(captured.error);
385
+ }
386
+ if (captured.segments.length > 0) {
387
+ segments = captured.segments;
388
+ }
389
+ }
390
+ catch (err) {
391
+ if (err instanceof CommandExecutionError)
392
+ throw err;
393
+ // Keep existing fallback path when capture is unavailable.
394
+ }
395
+ }
396
+ if (!segments) {
397
+ await prepareYoutubeApiPage(page);
398
+ }
399
+ // Fallback: get caption track URL from watch page HTML
400
+ const captionData = segments ? null : unwrapBrowserResult(await page.evaluate(`
37
401
  (async () => {
38
402
  const extractJsonAssignmentFromHtml = ${extractJsonAssignmentFromHtml.toString()};
39
403
 
@@ -74,26 +438,37 @@ cli({
74
438
  langPrefixMatched: !!(langPref && track.languageCode !== langPref && track.languageCode.startsWith(langPref))
75
439
  };
76
440
  })()
77
- `);
78
- if (!captionData || typeof captionData === 'string') {
79
- throw new CommandExecutionError(`Failed to get caption info: ${typeof captionData === 'string' ? captionData : 'null response'}`);
441
+ `));
442
+ if (!segments && (!captionData || typeof captionData !== 'object' || Array.isArray(captionData))) {
443
+ throw new CommandExecutionError(`Failed to get caption info: ${typeof captionData === 'string' ? captionData : 'malformed response'}`);
80
444
  }
81
- if (captionData.error) {
82
- throw new CommandExecutionError(`${captionData.error}${captionData.available ? ' (available: ' + captionData.available.join(', ') + ')' : ''}`);
445
+ if (captionData?.error) {
446
+ const msg = `${captionData.error}${captionData.available ? ' (available: ' + captionData.available.join(', ') + ')' : ''}`;
447
+ // "No captions available" 是合法 empty 数据条件(作者没开字幕 + YT 没自动生成),
448
+ // 与 bilibili subtitle 的 EmptyResultError 同模式。下游应按 code EMPTY_RESULT 跳过
449
+ // 重试和 softFail 计数。其它 error(HTTP / parse / 短暂空响应)仍按 fetch 失败抛。
450
+ if (captionData.error === 'No captions available for this video') {
451
+ throw new EmptyResultError('youtube transcript', '该视频没有字幕(作者未开启 + 无自动字幕)。');
452
+ }
453
+ throw new CommandExecutionError(msg);
454
+ }
455
+ if (!segments && typeof captionData?.captionUrl !== 'string') {
456
+ throw new CommandExecutionError('Malformed caption info payload');
83
457
  }
84
458
  // Warn if --lang was specified but not matched
85
- if (captionData.requestedLang && !captionData.langMatched && !captionData.langPrefixMatched) {
459
+ if (captionData?.requestedLang && !captionData.langMatched && !captionData.langPrefixMatched) {
86
460
  console.error(`Warning: --lang "${captionData.requestedLang}" not found. Using "${captionData.language}" instead. Available: ${captionData.available.join(', ')}`);
87
461
  }
88
462
  // Step 2: Fetch caption XML and parse segments
89
463
  // Ensure caption URL requests srv3 XML format — YouTube may return empty
90
464
  // responses when no explicit format is specified.
91
- const originalCaptionUrl = captionData.captionUrl;
92
- let captionUrl = originalCaptionUrl;
93
- if (!/[&?]fmt=/.test(originalCaptionUrl)) {
94
- captionUrl = originalCaptionUrl + (originalCaptionUrl.includes('?') ? '&' : '?') + 'fmt=srv3';
95
- }
96
- const segments = await page.evaluate(`
465
+ if (!segments) {
466
+ const originalCaptionUrl = captionData.captionUrl;
467
+ let captionUrl = originalCaptionUrl;
468
+ if (!/[&?]fmt=/.test(originalCaptionUrl)) {
469
+ captionUrl = originalCaptionUrl + (originalCaptionUrl.includes('?') ? '&' : '?') + 'fmt=srv3';
470
+ }
471
+ segments = normalizeSegmentsPayload(await page.evaluate(`
97
472
  (async () => {
98
473
  async function fetchCaptionXml(url) {
99
474
  const resp = await fetch(url);
@@ -181,9 +556,7 @@ cli({
181
556
 
182
557
  return results;
183
558
  })()
184
- `);
185
- if (!Array.isArray(segments)) {
186
- throw new CommandExecutionError(segments?.error || 'Failed to parse caption segments');
559
+ `), 'caption XML extraction');
187
560
  }
188
561
  if (segments.length === 0) {
189
562
  throw new EmptyResultError('youtube transcript');
@@ -192,7 +565,7 @@ cli({
192
565
  let chapters = [];
193
566
  if (mode === 'grouped') {
194
567
  try {
195
- const chapterData = await page.evaluate(`
568
+ const chapterData = unwrapBrowserResult(await page.evaluate(`
196
569
  (async () => {
197
570
  const cfg = window.ytcfg?.data_ || {};
198
571
  const apiKey = cfg.INNERTUBE_API_KEY;
@@ -256,7 +629,7 @@ cli({
256
629
  }
257
630
  return chapters;
258
631
  })()
259
- `);
632
+ `));
260
633
  if (Array.isArray(chapterData)) {
261
634
  chapters = chapterData;
262
635
  }