@hallelx/youtube-transcript 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +25 -0
- package/README.md +270 -0
- package/dist/cli.cjs +1224 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.js +1221 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.cjs +1032 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +344 -0
- package/dist/index.d.ts +344 -0
- package/dist/index.js +987 -0
- package/dist/index.js.map +1 -0
- package/package.json +83 -0
package/dist/cli.cjs
ADDED
|
@@ -0,0 +1,1224 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
// src/settings.ts
|
|
5
|
+
var WATCH_URL_TEMPLATE = "https://www.youtube.com/watch?v={video_id}";
|
|
6
|
+
var INNERTUBE_API_URL_TEMPLATE = "https://www.youtube.com/youtubei/v1/player?key={api_key}";
|
|
7
|
+
var INNERTUBE_CONTEXT = {
|
|
8
|
+
client: {
|
|
9
|
+
clientName: "ANDROID",
|
|
10
|
+
clientVersion: "20.10.38"
|
|
11
|
+
}
|
|
12
|
+
};
|
|
13
|
+
function watchUrl(videoId) {
|
|
14
|
+
return WATCH_URL_TEMPLATE.replace("{video_id}", videoId);
|
|
15
|
+
}
|
|
16
|
+
function innertubeApiUrl(apiKey) {
|
|
17
|
+
return INNERTUBE_API_URL_TEMPLATE.replace("{api_key}", apiKey);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
// src/errors/index.ts
|
|
21
|
+
var YouTubeTranscriptApiException = class extends Error {
|
|
22
|
+
constructor(message) {
|
|
23
|
+
super(message);
|
|
24
|
+
this.name = new.target.name;
|
|
25
|
+
Object.setPrototypeOf(this, new.target.prototype);
|
|
26
|
+
}
|
|
27
|
+
};
|
|
28
|
+
var ERROR_MESSAGE_TEMPLATE = "\nCould not retrieve a transcript for the video {video_url}!";
|
|
29
|
+
var CAUSE_MESSAGE_INTRO = " This is most likely caused by:\n\n{cause}";
|
|
30
|
+
var GITHUB_REFERRAL = "\n\nIf you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!";
|
|
31
|
+
var CouldNotRetrieveTranscript = class extends YouTubeTranscriptApiException {
|
|
32
|
+
videoId;
|
|
33
|
+
static CAUSE_MESSAGE = "";
|
|
34
|
+
constructor(videoId) {
|
|
35
|
+
super("");
|
|
36
|
+
this.videoId = videoId;
|
|
37
|
+
this.message = this.buildErrorMessage();
|
|
38
|
+
}
|
|
39
|
+
buildErrorMessage() {
|
|
40
|
+
let errorMessage = ERROR_MESSAGE_TEMPLATE.replace(
|
|
41
|
+
"{video_url}",
|
|
42
|
+
watchUrl(this.videoId)
|
|
43
|
+
);
|
|
44
|
+
const cause = this.cause;
|
|
45
|
+
if (cause) {
|
|
46
|
+
errorMessage += CAUSE_MESSAGE_INTRO.replace("{cause}", cause) + GITHUB_REFERRAL;
|
|
47
|
+
}
|
|
48
|
+
return errorMessage;
|
|
49
|
+
}
|
|
50
|
+
get cause() {
|
|
51
|
+
return this.constructor.CAUSE_MESSAGE;
|
|
52
|
+
}
|
|
53
|
+
toString() {
|
|
54
|
+
return this.buildErrorMessage();
|
|
55
|
+
}
|
|
56
|
+
};
|
|
57
|
+
var YouTubeDataUnparsable = class extends CouldNotRetrieveTranscript {
|
|
58
|
+
static CAUSE_MESSAGE = "The data required to fetch the transcript is not parsable. This should not happen, please open an issue (make sure to include the video ID)!";
|
|
59
|
+
};
|
|
60
|
+
var YouTubeRequestFailed = class extends CouldNotRetrieveTranscript {
|
|
61
|
+
reason = "";
|
|
62
|
+
static CAUSE_MESSAGE = "Request to YouTube failed: {reason}";
|
|
63
|
+
constructor(videoId, httpError) {
|
|
64
|
+
super(videoId);
|
|
65
|
+
this.reason = typeof httpError === "string" ? httpError : String(httpError);
|
|
66
|
+
this.message = this.buildErrorMessage();
|
|
67
|
+
}
|
|
68
|
+
get cause() {
|
|
69
|
+
if (!this.reason) return "";
|
|
70
|
+
return this.constructor.CAUSE_MESSAGE.replace("{reason}", this.reason);
|
|
71
|
+
}
|
|
72
|
+
};
|
|
73
|
+
var VideoUnplayable = class _VideoUnplayable extends CouldNotRetrieveTranscript {
|
|
74
|
+
reason = null;
|
|
75
|
+
subReasons = [];
|
|
76
|
+
static CAUSE_MESSAGE = "The video is unplayable for the following reason: {reason}";
|
|
77
|
+
static SUBREASON_MESSAGE = "\n\nAdditional Details:\n{sub_reasons}";
|
|
78
|
+
_initialized = false;
|
|
79
|
+
constructor(videoId, reason, subReasons) {
|
|
80
|
+
super(videoId);
|
|
81
|
+
this.reason = reason;
|
|
82
|
+
this.subReasons = subReasons;
|
|
83
|
+
this._initialized = true;
|
|
84
|
+
this.message = this.buildErrorMessage();
|
|
85
|
+
}
|
|
86
|
+
get cause() {
|
|
87
|
+
if (!this._initialized) return "";
|
|
88
|
+
let reason = this.reason === null ? "No reason specified!" : this.reason;
|
|
89
|
+
if (this.subReasons.length > 0) {
|
|
90
|
+
const subReasons = this.subReasons.map((s) => ` - ${s}`).join("\n");
|
|
91
|
+
reason = reason + _VideoUnplayable.SUBREASON_MESSAGE.replace("{sub_reasons}", subReasons);
|
|
92
|
+
}
|
|
93
|
+
return this.constructor.CAUSE_MESSAGE.replace(
|
|
94
|
+
"{reason}",
|
|
95
|
+
reason
|
|
96
|
+
);
|
|
97
|
+
}
|
|
98
|
+
};
|
|
99
|
+
var VideoUnavailable = class extends CouldNotRetrieveTranscript {
|
|
100
|
+
static CAUSE_MESSAGE = "The video is no longer available";
|
|
101
|
+
};
|
|
102
|
+
var InvalidVideoId = class extends CouldNotRetrieveTranscript {
|
|
103
|
+
static CAUSE_MESSAGE = 'You provided an invalid video id. Make sure you are using the video id and NOT the url!\n\nDo NOT run: `YouTubeTranscriptApi().fetch("https://www.youtube.com/watch?v=1234")`\nInstead run: `YouTubeTranscriptApi().fetch("1234")`';
|
|
104
|
+
};
|
|
105
|
+
var REQUEST_BLOCKED_BASE_CAUSE_MESSAGE = "YouTube is blocking requests from your IP. This usually is due to one of the following reasons:\n- You have done too many requests and your IP has been blocked by YouTube\n- You are doing requests from an IP belonging to a cloud provider (like AWS, Google Cloud Platform, Azure, etc.). Unfortunately, most IPs from cloud providers are blocked by YouTube.\n\n";
|
|
106
|
+
var RequestBlocked = class _RequestBlocked extends CouldNotRetrieveTranscript {
|
|
107
|
+
static CAUSE_MESSAGE = REQUEST_BLOCKED_BASE_CAUSE_MESSAGE + `There are two things you can do to work around this:
|
|
108
|
+
1. Use proxies to hide your IP address, as explained in the "Working around IP bans" section of the README (https://github.com/jdepoix/youtube-transcript-api?tab=readme-ov-file#working-around-ip-bans-requestblocked-or-ipblocked-exception).
|
|
109
|
+
2. (NOT RECOMMENDED) If you authenticate your requests using cookies, you will be able to continue doing requests for a while. However, YouTube will eventually permanently ban the account that you have used to authenticate with! So only do this if you don't mind your account being banned!`;
|
|
110
|
+
static WITH_GENERIC_PROXY_CAUSE_MESSAGE = `YouTube is blocking your requests, despite you using proxies. Keep in mind that a proxy is just a way to hide your real IP behind the IP of that proxy, but there is no guarantee that the IP of that proxy won't be blocked as well.
|
|
111
|
+
|
|
112
|
+
The only truly reliable way to prevent IP blocks is rotating through a large pool of residential IPs, by using a provider like Webshare (https://www.webshare.io/?referral_code=w0xno53eb50g), which provides you with a pool of >30M residential IPs (make sure to purchase "Residential" proxies, NOT "Proxy Server" or "Static Residential"!).
|
|
113
|
+
|
|
114
|
+
You will find more information on how to easily integrate Webshare here: https://github.com/jdepoix/youtube-transcript-api?tab=readme-ov-file#using-webshare`;
|
|
115
|
+
static WITH_WEBSHARE_PROXY_CAUSE_MESSAGE = `YouTube is blocking your requests, despite you using Webshare proxies. Please make sure that you have purchased "Residential" proxies and NOT "Proxy Server" or "Static Residential", as those won't work as reliably! The free tier also uses "Proxy Server" and will NOT work!
|
|
116
|
+
|
|
117
|
+
The only reliable option is using "Residential" proxies (not "Static Residential"), as this allows you to rotate through a pool of over 30M IPs, which means you will always find an IP that hasn't been blocked by YouTube yet!
|
|
118
|
+
|
|
119
|
+
You can support the development of this open source project by making your Webshare purchases through this affiliate link: https://www.webshare.io/?referral_code=w0xno53eb50g
|
|
120
|
+
|
|
121
|
+
Thank you for your support! <3`;
|
|
122
|
+
_proxyConfig = null;
|
|
123
|
+
withProxyConfig(proxyConfig) {
|
|
124
|
+
this._proxyConfig = proxyConfig ?? null;
|
|
125
|
+
this.message = this.buildErrorMessage();
|
|
126
|
+
return this;
|
|
127
|
+
}
|
|
128
|
+
get cause() {
|
|
129
|
+
if (this._proxyConfig !== null && this._proxyConfig !== void 0) {
|
|
130
|
+
const name = this._proxyConfig.constructor?.name ?? "";
|
|
131
|
+
if (name === "WebshareProxyConfig") {
|
|
132
|
+
return _RequestBlocked.WITH_WEBSHARE_PROXY_CAUSE_MESSAGE;
|
|
133
|
+
}
|
|
134
|
+
if (name) {
|
|
135
|
+
return _RequestBlocked.WITH_GENERIC_PROXY_CAUSE_MESSAGE;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
return this.constructor.CAUSE_MESSAGE;
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
var IpBlocked = class extends RequestBlocked {
|
|
142
|
+
static CAUSE_MESSAGE = REQUEST_BLOCKED_BASE_CAUSE_MESSAGE + 'Ways to work around this are explained in the "Working around IP bans" section of the README (https://github.com/jdepoix/youtube-transcript-api?tab=readme-ov-file#working-around-ip-bans-requestblocked-or-ipblocked-exception).\n';
|
|
143
|
+
};
|
|
144
|
+
var TranscriptsDisabled = class extends CouldNotRetrieveTranscript {
|
|
145
|
+
static CAUSE_MESSAGE = "Subtitles are disabled for this video";
|
|
146
|
+
};
|
|
147
|
+
var AgeRestricted = class extends CouldNotRetrieveTranscript {
|
|
148
|
+
static CAUSE_MESSAGE = "This video is age-restricted. Therefore, you are unable to retrieve transcripts for it without authenticating yourself.\n\nUnfortunately, Cookie Authentication is temporarily unsupported in youtube-transcript-api, as recent changes in YouTube's API broke the previous implementation. I will do my best to re-implement it as soon as possible.";
|
|
149
|
+
};
|
|
150
|
+
var NotTranslatable = class extends CouldNotRetrieveTranscript {
|
|
151
|
+
static CAUSE_MESSAGE = "The requested language is not translatable";
|
|
152
|
+
};
|
|
153
|
+
var TranslationLanguageNotAvailable = class extends CouldNotRetrieveTranscript {
|
|
154
|
+
static CAUSE_MESSAGE = "The requested translation language is not available";
|
|
155
|
+
};
|
|
156
|
+
var FailedToCreateConsentCookie = class extends CouldNotRetrieveTranscript {
|
|
157
|
+
static CAUSE_MESSAGE = "Failed to automatically give consent to saving cookies";
|
|
158
|
+
};
|
|
159
|
+
var NoTranscriptFound = class extends CouldNotRetrieveTranscript {
|
|
160
|
+
_requestedLanguageCodes = [];
|
|
161
|
+
_transcriptData = { toString: () => "" };
|
|
162
|
+
_initialized = false;
|
|
163
|
+
static CAUSE_MESSAGE = "No transcripts were found for any of the requested language codes: {requested_language_codes}\n\n{transcript_data}";
|
|
164
|
+
constructor(videoId, requestedLanguageCodes, transcriptData) {
|
|
165
|
+
super(videoId);
|
|
166
|
+
this._requestedLanguageCodes = Array.from(requestedLanguageCodes);
|
|
167
|
+
this._transcriptData = transcriptData;
|
|
168
|
+
this._initialized = true;
|
|
169
|
+
this.message = this.buildErrorMessage();
|
|
170
|
+
}
|
|
171
|
+
get cause() {
|
|
172
|
+
if (!this._initialized) return "";
|
|
173
|
+
const langs = `[${this._requestedLanguageCodes.map((c) => `'${c}'`).join(", ")}]`;
|
|
174
|
+
return this.constructor.CAUSE_MESSAGE.replace(
|
|
175
|
+
"{requested_language_codes}",
|
|
176
|
+
langs
|
|
177
|
+
).replace("{transcript_data}", String(this._transcriptData));
|
|
178
|
+
}
|
|
179
|
+
};
|
|
180
|
+
var PoTokenRequired = class extends CouldNotRetrieveTranscript {
|
|
181
|
+
static CAUSE_MESSAGE = "The requested video cannot be retrieved without a PO Token. If this happens, please open a GitHub issue!";
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
// src/utils/htmlEntities.ts
|
|
185
|
+
var NAMED = {
|
|
186
|
+
amp: "&",
|
|
187
|
+
lt: "<",
|
|
188
|
+
gt: ">",
|
|
189
|
+
quot: '"',
|
|
190
|
+
apos: "'",
|
|
191
|
+
nbsp: "\xA0"
|
|
192
|
+
};
|
|
193
|
+
function decodeHtmlEntities(input) {
|
|
194
|
+
if (!input) return input;
|
|
195
|
+
return input.replace(/&(#x[0-9a-fA-F]+|#[0-9]+|[a-zA-Z][a-zA-Z0-9]+);/g, (match, body) => {
|
|
196
|
+
if (body.startsWith("#x") || body.startsWith("#X")) {
|
|
197
|
+
const code = Number.parseInt(body.slice(2), 16);
|
|
198
|
+
if (Number.isFinite(code) && code >= 0 && code <= 1114111) {
|
|
199
|
+
try {
|
|
200
|
+
return String.fromCodePoint(code);
|
|
201
|
+
} catch {
|
|
202
|
+
return match;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
return match;
|
|
206
|
+
}
|
|
207
|
+
if (body.startsWith("#")) {
|
|
208
|
+
const code = Number.parseInt(body.slice(1), 10);
|
|
209
|
+
if (Number.isFinite(code) && code >= 0 && code <= 1114111) {
|
|
210
|
+
try {
|
|
211
|
+
return String.fromCodePoint(code);
|
|
212
|
+
} catch {
|
|
213
|
+
return match;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
return match;
|
|
217
|
+
}
|
|
218
|
+
const named = NAMED[body];
|
|
219
|
+
return named ?? match;
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// src/transcripts/fetchedTranscript.ts
|
|
224
|
+
var FetchedTranscript = class {
|
|
225
|
+
snippets;
|
|
226
|
+
videoId;
|
|
227
|
+
language;
|
|
228
|
+
languageCode;
|
|
229
|
+
isGenerated;
|
|
230
|
+
constructor(init) {
|
|
231
|
+
this.snippets = init.snippets;
|
|
232
|
+
this.videoId = init.videoId;
|
|
233
|
+
this.language = init.language;
|
|
234
|
+
this.languageCode = init.languageCode;
|
|
235
|
+
this.isGenerated = init.isGenerated;
|
|
236
|
+
}
|
|
237
|
+
[Symbol.iterator]() {
|
|
238
|
+
return this.snippets[Symbol.iterator]();
|
|
239
|
+
}
|
|
240
|
+
get length() {
|
|
241
|
+
return this.snippets.length;
|
|
242
|
+
}
|
|
243
|
+
toRawData() {
|
|
244
|
+
return this.snippets.map((s) => ({
|
|
245
|
+
text: s.text,
|
|
246
|
+
start: s.start,
|
|
247
|
+
duration: s.duration
|
|
248
|
+
}));
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
|
|
252
|
+
// src/utils/xml.ts
|
|
253
|
+
var TEXT_ELEMENT_REGEX = /<text\b([^>]*)>([\s\S]*?)<\/text>/g;
|
|
254
|
+
var ATTR_REGEX = /([a-zA-Z_:][\w:.-]*)\s*=\s*"([^"]*)"/g;
|
|
255
|
+
function parseTranscriptXml(raw) {
|
|
256
|
+
const out = [];
|
|
257
|
+
TEXT_ELEMENT_REGEX.lastIndex = 0;
|
|
258
|
+
let match;
|
|
259
|
+
while ((match = TEXT_ELEMENT_REGEX.exec(raw)) !== null) {
|
|
260
|
+
const attrString = match[1] ?? "";
|
|
261
|
+
const innerText = match[2] ?? "";
|
|
262
|
+
const attrs = {};
|
|
263
|
+
ATTR_REGEX.lastIndex = 0;
|
|
264
|
+
let attrMatch;
|
|
265
|
+
while ((attrMatch = ATTR_REGEX.exec(attrString)) !== null) {
|
|
266
|
+
const key = attrMatch[1];
|
|
267
|
+
const value = attrMatch[2];
|
|
268
|
+
if (key !== void 0 && value !== void 0) {
|
|
269
|
+
attrs[key] = value;
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
out.push({ text: innerText, attrs });
|
|
273
|
+
}
|
|
274
|
+
return out;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// src/transcripts/parser.ts
|
|
278
|
+
var FORMATTING_TAGS = [
|
|
279
|
+
"strong",
|
|
280
|
+
"em",
|
|
281
|
+
"b",
|
|
282
|
+
"i",
|
|
283
|
+
"mark",
|
|
284
|
+
"small",
|
|
285
|
+
"del",
|
|
286
|
+
"ins",
|
|
287
|
+
"sub",
|
|
288
|
+
"sup"
|
|
289
|
+
];
|
|
290
|
+
var STRIP_ALL_REGEX = /<[^>]*>/gi;
|
|
291
|
+
function buildPreserveRegex() {
|
|
292
|
+
const formats = FORMATTING_TAGS.join("|");
|
|
293
|
+
return new RegExp(`<\\/?(?!\\/?(${formats})\\b).*?\\b>`, "gi");
|
|
294
|
+
}
|
|
295
|
+
var TranscriptParser = class {
|
|
296
|
+
_htmlRegex;
|
|
297
|
+
constructor(preserveFormatting = false) {
|
|
298
|
+
this._htmlRegex = preserveFormatting ? buildPreserveRegex() : STRIP_ALL_REGEX;
|
|
299
|
+
}
|
|
300
|
+
parse(rawData) {
|
|
301
|
+
const elements = parseTranscriptXml(rawData);
|
|
302
|
+
const out = [];
|
|
303
|
+
for (const el of elements) {
|
|
304
|
+
if (el.text === "") continue;
|
|
305
|
+
const xmlDecoded = decodeHtmlEntities(el.text);
|
|
306
|
+
const htmlDecoded = decodeHtmlEntities(xmlDecoded);
|
|
307
|
+
this._htmlRegex.lastIndex = 0;
|
|
308
|
+
const stripped = htmlDecoded.replace(this._htmlRegex, "");
|
|
309
|
+
const startAttr = el.attrs["start"] ?? "0";
|
|
310
|
+
const durAttr = el.attrs["dur"] ?? "0.0";
|
|
311
|
+
out.push({
|
|
312
|
+
text: stripped,
|
|
313
|
+
start: Number.parseFloat(startAttr),
|
|
314
|
+
duration: Number.parseFloat(durAttr)
|
|
315
|
+
});
|
|
316
|
+
}
|
|
317
|
+
return out;
|
|
318
|
+
}
|
|
319
|
+
};
|
|
320
|
+
|
|
321
|
+
// src/transcripts/transcript.ts
|
|
322
|
+
var Transcript = class _Transcript {
|
|
323
|
+
videoId;
|
|
324
|
+
language;
|
|
325
|
+
languageCode;
|
|
326
|
+
isGenerated;
|
|
327
|
+
translationLanguages;
|
|
328
|
+
_httpClient;
|
|
329
|
+
_url;
|
|
330
|
+
_translationLanguagesByCode;
|
|
331
|
+
constructor(httpClient, videoId, url, language, languageCode, isGenerated, translationLanguages) {
|
|
332
|
+
this._httpClient = httpClient;
|
|
333
|
+
this.videoId = videoId;
|
|
334
|
+
this._url = url;
|
|
335
|
+
this.language = language;
|
|
336
|
+
this.languageCode = languageCode;
|
|
337
|
+
this.isGenerated = isGenerated;
|
|
338
|
+
this.translationLanguages = translationLanguages;
|
|
339
|
+
this._translationLanguagesByCode = new Map(
|
|
340
|
+
translationLanguages.map((tl) => [tl.languageCode, tl.language])
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
get isTranslatable() {
|
|
344
|
+
return this.translationLanguages.length > 0;
|
|
345
|
+
}
|
|
346
|
+
async fetch(options = {}) {
|
|
347
|
+
if (this._url.includes("&exp=xpe")) {
|
|
348
|
+
throw new PoTokenRequired(this.videoId);
|
|
349
|
+
}
|
|
350
|
+
const response = await this._httpClient.get(this._url);
|
|
351
|
+
if (response.status === 429) {
|
|
352
|
+
throw new IpBlocked(this.videoId);
|
|
353
|
+
}
|
|
354
|
+
if (!response.ok) {
|
|
355
|
+
throw new YouTubeRequestFailed(
|
|
356
|
+
this.videoId,
|
|
357
|
+
`${response.status} ${response.statusText}`
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
const xml = await response.text();
|
|
361
|
+
const parser = new TranscriptParser(options.preserveFormatting ?? false);
|
|
362
|
+
const snippets = parser.parse(xml);
|
|
363
|
+
return new FetchedTranscript({
|
|
364
|
+
snippets,
|
|
365
|
+
videoId: this.videoId,
|
|
366
|
+
language: this.language,
|
|
367
|
+
languageCode: this.languageCode,
|
|
368
|
+
isGenerated: this.isGenerated
|
|
369
|
+
});
|
|
370
|
+
}
|
|
371
|
+
translate(languageCode) {
|
|
372
|
+
if (!this.isTranslatable) {
|
|
373
|
+
throw new NotTranslatable(this.videoId);
|
|
374
|
+
}
|
|
375
|
+
const targetLanguage = this._translationLanguagesByCode.get(languageCode);
|
|
376
|
+
if (targetLanguage === void 0) {
|
|
377
|
+
throw new TranslationLanguageNotAvailable(this.videoId);
|
|
378
|
+
}
|
|
379
|
+
return new _Transcript(
|
|
380
|
+
this._httpClient,
|
|
381
|
+
this.videoId,
|
|
382
|
+
`${this._url}&tlang=${languageCode}`,
|
|
383
|
+
targetLanguage,
|
|
384
|
+
languageCode,
|
|
385
|
+
true,
|
|
386
|
+
[]
|
|
387
|
+
);
|
|
388
|
+
}
|
|
389
|
+
toString() {
|
|
390
|
+
return `${this.languageCode} ("${this.language}")${this.isTranslatable ? "[TRANSLATABLE]" : ""}`;
|
|
391
|
+
}
|
|
392
|
+
};
|
|
393
|
+
|
|
394
|
+
// src/transcripts/transcriptList.ts
|
|
395
|
+
var TranscriptList = class _TranscriptList {
|
|
396
|
+
videoId;
|
|
397
|
+
_manuallyCreated;
|
|
398
|
+
_generated;
|
|
399
|
+
_translationLanguages;
|
|
400
|
+
constructor(videoId, manuallyCreated, generated, translationLanguages) {
|
|
401
|
+
this.videoId = videoId;
|
|
402
|
+
this._manuallyCreated = manuallyCreated;
|
|
403
|
+
this._generated = generated;
|
|
404
|
+
this._translationLanguages = translationLanguages;
|
|
405
|
+
}
|
|
406
|
+
static build(httpClient, videoId, captionsJson) {
|
|
407
|
+
const translationLanguages = (captionsJson.translationLanguages ?? []).map((tl) => ({
|
|
408
|
+
language: tl.languageName.runs[0]?.text ?? "",
|
|
409
|
+
languageCode: tl.languageCode
|
|
410
|
+
}));
|
|
411
|
+
const manuallyCreated = /* @__PURE__ */ new Map();
|
|
412
|
+
const generated = /* @__PURE__ */ new Map();
|
|
413
|
+
for (const caption of captionsJson.captionTracks) {
|
|
414
|
+
const isAsr = caption.kind === "asr";
|
|
415
|
+
const target = isAsr ? generated : manuallyCreated;
|
|
416
|
+
const cleanedUrl = caption.baseUrl.replace("&fmt=srv3", "");
|
|
417
|
+
const transcriptTranslationLangs = caption.isTranslatable ? translationLanguages : [];
|
|
418
|
+
target.set(
|
|
419
|
+
caption.languageCode,
|
|
420
|
+
new Transcript(
|
|
421
|
+
httpClient,
|
|
422
|
+
videoId,
|
|
423
|
+
cleanedUrl,
|
|
424
|
+
caption.name.runs[0]?.text ?? "",
|
|
425
|
+
caption.languageCode,
|
|
426
|
+
isAsr,
|
|
427
|
+
transcriptTranslationLangs
|
|
428
|
+
)
|
|
429
|
+
);
|
|
430
|
+
}
|
|
431
|
+
return new _TranscriptList(
|
|
432
|
+
videoId,
|
|
433
|
+
manuallyCreated,
|
|
434
|
+
generated,
|
|
435
|
+
translationLanguages
|
|
436
|
+
);
|
|
437
|
+
}
|
|
438
|
+
*[Symbol.iterator]() {
|
|
439
|
+
for (const t of this._manuallyCreated.values()) yield t;
|
|
440
|
+
for (const t of this._generated.values()) yield t;
|
|
441
|
+
}
|
|
442
|
+
findTranscript(languageCodes) {
|
|
443
|
+
return this._findTranscript(languageCodes, [
|
|
444
|
+
this._manuallyCreated,
|
|
445
|
+
this._generated
|
|
446
|
+
]);
|
|
447
|
+
}
|
|
448
|
+
findGeneratedTranscript(languageCodes) {
|
|
449
|
+
return this._findTranscript(languageCodes, [this._generated]);
|
|
450
|
+
}
|
|
451
|
+
findManuallyCreatedTranscript(languageCodes) {
|
|
452
|
+
return this._findTranscript(languageCodes, [this._manuallyCreated]);
|
|
453
|
+
}
|
|
454
|
+
_findTranscript(languageCodes, transcriptDicts) {
|
|
455
|
+
const codes = Array.from(languageCodes);
|
|
456
|
+
for (const code of codes) {
|
|
457
|
+
for (const dict of transcriptDicts) {
|
|
458
|
+
const found = dict.get(code);
|
|
459
|
+
if (found !== void 0) return found;
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
throw new NoTranscriptFound(this.videoId, codes, this);
|
|
463
|
+
}
|
|
464
|
+
toString() {
|
|
465
|
+
const describe = (lines) => lines.length === 0 ? "None" : lines.map((l) => ` - ${l}`).join("\n");
|
|
466
|
+
const manuallyCreated = describe(
|
|
467
|
+
Array.from(this._manuallyCreated.values()).map((t) => t.toString())
|
|
468
|
+
);
|
|
469
|
+
const generated = describe(
|
|
470
|
+
Array.from(this._generated.values()).map((t) => t.toString())
|
|
471
|
+
);
|
|
472
|
+
const translations = describe(
|
|
473
|
+
this._translationLanguages.map(
|
|
474
|
+
(tl) => `${tl.languageCode} ("${tl.language}")`
|
|
475
|
+
)
|
|
476
|
+
);
|
|
477
|
+
return `For this video (${this.videoId}) transcripts are available in the following languages:
|
|
478
|
+
|
|
479
|
+
(MANUALLY CREATED)
|
|
480
|
+
${manuallyCreated}
|
|
481
|
+
|
|
482
|
+
(GENERATED)
|
|
483
|
+
${generated}
|
|
484
|
+
|
|
485
|
+
(TRANSLATION LANGUAGES)
|
|
486
|
+
${translations}`;
|
|
487
|
+
}
|
|
488
|
+
};
|
|
489
|
+
|
|
490
|
+
// src/transcripts/fetcher.ts
|
|
491
|
+
var PLAYABILITY_STATUS_OK = "OK";
|
|
492
|
+
var PLAYABILITY_STATUS_ERROR = "ERROR";
|
|
493
|
+
var PLAYABILITY_STATUS_LOGIN_REQUIRED = "LOGIN_REQUIRED";
|
|
494
|
+
var REASON_BOT_DETECTED = "Sign in to confirm you\u2019re not a bot";
|
|
495
|
+
var REASON_AGE_RESTRICTED = "This video may be inappropriate for some users.";
|
|
496
|
+
var REASON_VIDEO_UNAVAILABLE = "This video is unavailable";
|
|
497
|
+
var INNERTUBE_API_KEY_REGEX = /"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"/;
|
|
498
|
+
var CONSENT_COOKIE_REGEX = /name="v" value="(.*?)"/;
|
|
499
|
+
var CONSENT_FORM_MARKER = 'action="https://consent.youtube.com/s"';
|
|
500
|
+
var RECAPTCHA_MARKER = 'class="g-recaptcha"';
|
|
501
|
+
async function raiseHttpErrors(response, videoId) {
|
|
502
|
+
if (response.status === 429) {
|
|
503
|
+
throw new IpBlocked(videoId);
|
|
504
|
+
}
|
|
505
|
+
if (!response.ok) {
|
|
506
|
+
throw new YouTubeRequestFailed(
|
|
507
|
+
videoId,
|
|
508
|
+
`${response.status} ${response.statusText || "HTTP error"}`
|
|
509
|
+
);
|
|
510
|
+
}
|
|
511
|
+
return response;
|
|
512
|
+
}
|
|
513
|
+
var TranscriptListFetcher = class {
|
|
514
|
+
_httpClient;
|
|
515
|
+
_proxyConfig;
|
|
516
|
+
constructor(httpClient, proxyConfig) {
|
|
517
|
+
this._httpClient = httpClient;
|
|
518
|
+
this._proxyConfig = proxyConfig;
|
|
519
|
+
}
|
|
520
|
+
async fetch(videoId) {
|
|
521
|
+
const captionsJson = await this._fetchCaptionsJson(videoId, 0);
|
|
522
|
+
return TranscriptList.build(this._httpClient, videoId, captionsJson);
|
|
523
|
+
}
|
|
524
|
+
async _fetchCaptionsJson(videoId, tryNumber) {
|
|
525
|
+
try {
|
|
526
|
+
const html = await this._fetchVideoHtml(videoId);
|
|
527
|
+
const apiKey = this._extractInnertubeApiKey(html, videoId);
|
|
528
|
+
const innertubeData = await this._fetchInnertubeData(videoId, apiKey);
|
|
529
|
+
return this._extractCaptionsJson(innertubeData, videoId);
|
|
530
|
+
} catch (err) {
|
|
531
|
+
if (err instanceof RequestBlocked) {
|
|
532
|
+
const retries = this._proxyConfig?.retriesWhenBlocked ?? 0;
|
|
533
|
+
if (tryNumber + 1 < retries) {
|
|
534
|
+
return this._fetchCaptionsJson(videoId, tryNumber + 1);
|
|
535
|
+
}
|
|
536
|
+
throw err.withProxyConfig(this._proxyConfig ?? null);
|
|
537
|
+
}
|
|
538
|
+
throw err;
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
_extractInnertubeApiKey(html, videoId) {
|
|
542
|
+
const match = INNERTUBE_API_KEY_REGEX.exec(html);
|
|
543
|
+
if (match && match[1]) {
|
|
544
|
+
return match[1];
|
|
545
|
+
}
|
|
546
|
+
if (html.includes(RECAPTCHA_MARKER)) {
|
|
547
|
+
throw new IpBlocked(videoId);
|
|
548
|
+
}
|
|
549
|
+
throw new YouTubeDataUnparsable(videoId);
|
|
550
|
+
}
|
|
551
|
+
_extractCaptionsJson(innertubeData, videoId) {
|
|
552
|
+
this._assertPlayability(innertubeData.playabilityStatus, videoId);
|
|
553
|
+
const captionsJson = innertubeData.captions?.playerCaptionsTracklistRenderer;
|
|
554
|
+
if (!captionsJson || !captionsJson.captionTracks) {
|
|
555
|
+
throw new TranscriptsDisabled(videoId);
|
|
556
|
+
}
|
|
557
|
+
return captionsJson;
|
|
558
|
+
}
|
|
559
|
+
_assertPlayability(playabilityStatusData, videoId) {
|
|
560
|
+
if (!playabilityStatusData) return;
|
|
561
|
+
const status = playabilityStatusData.status;
|
|
562
|
+
if (!status || status === PLAYABILITY_STATUS_OK) return;
|
|
563
|
+
const reason = playabilityStatusData.reason;
|
|
564
|
+
if (status === PLAYABILITY_STATUS_LOGIN_REQUIRED) {
|
|
565
|
+
if (reason === REASON_BOT_DETECTED) {
|
|
566
|
+
throw new RequestBlocked(videoId);
|
|
567
|
+
}
|
|
568
|
+
if (reason === REASON_AGE_RESTRICTED) {
|
|
569
|
+
throw new AgeRestricted(videoId);
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
if (status === PLAYABILITY_STATUS_ERROR && reason === REASON_VIDEO_UNAVAILABLE) {
|
|
573
|
+
if (videoId.startsWith("http://") || videoId.startsWith("https://")) {
|
|
574
|
+
throw new InvalidVideoId(videoId);
|
|
575
|
+
}
|
|
576
|
+
throw new VideoUnavailable(videoId);
|
|
577
|
+
}
|
|
578
|
+
const subreasons = playabilityStatusData.errorScreen?.playerErrorMessageRenderer?.subreason?.runs ?? [];
|
|
579
|
+
throw new VideoUnplayable(
|
|
580
|
+
videoId,
|
|
581
|
+
reason ?? null,
|
|
582
|
+
subreasons.map((r) => r.text ?? "")
|
|
583
|
+
);
|
|
584
|
+
}
|
|
585
|
+
_createConsentCookie(html, videoId) {
|
|
586
|
+
const match = CONSENT_COOKIE_REGEX.exec(html);
|
|
587
|
+
if (!match || !match[1]) {
|
|
588
|
+
throw new FailedToCreateConsentCookie(videoId);
|
|
589
|
+
}
|
|
590
|
+
this._httpClient.setCookie("CONSENT", `YES+${match[1]}`, ".youtube.com");
|
|
591
|
+
}
|
|
592
|
+
async _fetchVideoHtml(videoId) {
|
|
593
|
+
let html = await this._fetchHtml(videoId);
|
|
594
|
+
if (html.includes(CONSENT_FORM_MARKER)) {
|
|
595
|
+
this._createConsentCookie(html, videoId);
|
|
596
|
+
html = await this._fetchHtml(videoId);
|
|
597
|
+
if (html.includes(CONSENT_FORM_MARKER)) {
|
|
598
|
+
throw new FailedToCreateConsentCookie(videoId);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
return html;
|
|
602
|
+
}
|
|
603
|
+
async _fetchHtml(videoId) {
|
|
604
|
+
const response = await this._httpClient.get(watchUrl(videoId));
|
|
605
|
+
const checked = await raiseHttpErrors(response, videoId);
|
|
606
|
+
const text = await checked.text();
|
|
607
|
+
return decodeHtmlEntities(text);
|
|
608
|
+
}
|
|
609
|
+
async _fetchInnertubeData(videoId, apiKey) {
|
|
610
|
+
const response = await this._httpClient.post(innertubeApiUrl(apiKey), {
|
|
611
|
+
context: INNERTUBE_CONTEXT,
|
|
612
|
+
videoId
|
|
613
|
+
});
|
|
614
|
+
const checked = await raiseHttpErrors(response, videoId);
|
|
615
|
+
return await checked.json();
|
|
616
|
+
}
|
|
617
|
+
};
|
|
618
|
+
|
|
619
|
+
// src/utils/httpClient.ts
|
|
620
|
+
function isBun() {
|
|
621
|
+
return typeof globalThis.Bun !== "undefined";
|
|
622
|
+
}
|
|
623
|
+
function isDeno() {
|
|
624
|
+
return typeof globalThis.Deno !== "undefined";
|
|
625
|
+
}
|
|
626
|
+
function createCookieJar() {
|
|
627
|
+
const store = /* @__PURE__ */ new Map();
|
|
628
|
+
return {
|
|
629
|
+
set(name, value, domain) {
|
|
630
|
+
const key = domain.startsWith(".") ? domain : `.${domain}`;
|
|
631
|
+
let inner = store.get(key);
|
|
632
|
+
if (!inner) {
|
|
633
|
+
inner = /* @__PURE__ */ new Map();
|
|
634
|
+
store.set(key, inner);
|
|
635
|
+
}
|
|
636
|
+
inner.set(name, value);
|
|
637
|
+
},
|
|
638
|
+
get(host) {
|
|
639
|
+
const parts = [];
|
|
640
|
+
for (const [domain, cookies] of store) {
|
|
641
|
+
const bare = domain.startsWith(".") ? domain.slice(1) : domain;
|
|
642
|
+
if (host === bare || host.endsWith(`.${bare}`)) {
|
|
643
|
+
for (const [name, value] of cookies) {
|
|
644
|
+
parts.push(`${name}=${value}`);
|
|
645
|
+
}
|
|
646
|
+
}
|
|
647
|
+
}
|
|
648
|
+
return parts.length > 0 ? parts.join("; ") : void 0;
|
|
649
|
+
}
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
var HttpClient = class {
|
|
653
|
+
_proxyConfig;
|
|
654
|
+
_userFetch;
|
|
655
|
+
_cookies = createCookieJar();
|
|
656
|
+
_undiciDispatcher = null;
|
|
657
|
+
_undiciLoaded = false;
|
|
658
|
+
constructor(options = {}) {
|
|
659
|
+
this._proxyConfig = options.proxyConfig;
|
|
660
|
+
this._userFetch = options.fetchFn;
|
|
661
|
+
if (this._proxyConfig && !this._userFetch && isDeno()) {
|
|
662
|
+
throw new Error(
|
|
663
|
+
"Proxy support on Deno requires a custom fetchFn. Pass `fetchFn` configured with `Deno.createHttpClient` to YouTubeTranscriptApi."
|
|
664
|
+
);
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
setCookie(name, value, domain) {
|
|
668
|
+
this._cookies.set(name, value, domain);
|
|
669
|
+
}
|
|
670
|
+
async get(url) {
|
|
671
|
+
return this._fetch(url, { method: "GET" });
|
|
672
|
+
}
|
|
673
|
+
async post(url, jsonBody) {
|
|
674
|
+
return this._fetch(url, {
|
|
675
|
+
method: "POST",
|
|
676
|
+
body: JSON.stringify(jsonBody),
|
|
677
|
+
headers: { "Content-Type": "application/json" }
|
|
678
|
+
});
|
|
679
|
+
}
|
|
680
|
+
async _fetch(url, init) {
|
|
681
|
+
const headers = new Headers(init.headers);
|
|
682
|
+
if (!headers.has("Accept-Language")) {
|
|
683
|
+
headers.set("Accept-Language", "en-US");
|
|
684
|
+
}
|
|
685
|
+
if (this._proxyConfig?.preventKeepingConnectionsAlive) {
|
|
686
|
+
headers.set("Connection", "close");
|
|
687
|
+
}
|
|
688
|
+
let host;
|
|
689
|
+
try {
|
|
690
|
+
host = new URL(url).host;
|
|
691
|
+
} catch {
|
|
692
|
+
host = "";
|
|
693
|
+
}
|
|
694
|
+
if (host) {
|
|
695
|
+
const cookieHeader = this._cookies.get(host);
|
|
696
|
+
if (cookieHeader) {
|
|
697
|
+
headers.set("Cookie", cookieHeader);
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
const finalInit = { ...init, headers };
|
|
701
|
+
if (this._userFetch) {
|
|
702
|
+
return this._userFetch(url, finalInit);
|
|
703
|
+
}
|
|
704
|
+
if (this._proxyConfig) {
|
|
705
|
+
const proxyUrl = url.startsWith("https:") ? this._proxyConfig.httpsUrl : this._proxyConfig.httpUrl;
|
|
706
|
+
if (proxyUrl) {
|
|
707
|
+
if (isBun()) {
|
|
708
|
+
const bunRef = globalThis.Bun;
|
|
709
|
+
if (bunRef) {
|
|
710
|
+
return bunRef.fetch(url, {
|
|
711
|
+
...finalInit,
|
|
712
|
+
// Bun-specific option
|
|
713
|
+
proxy: proxyUrl
|
|
714
|
+
});
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
const dispatcher = await this._getUndiciDispatcher(proxyUrl);
|
|
718
|
+
return fetch(url, {
|
|
719
|
+
...finalInit,
|
|
720
|
+
dispatcher
|
|
721
|
+
});
|
|
722
|
+
}
|
|
723
|
+
}
|
|
724
|
+
return fetch(url, finalInit);
|
|
725
|
+
}
|
|
726
|
+
async _getUndiciDispatcher(proxyUrl) {
|
|
727
|
+
if (this._undiciLoaded && this._undiciDispatcher) {
|
|
728
|
+
return this._undiciDispatcher;
|
|
729
|
+
}
|
|
730
|
+
try {
|
|
731
|
+
const undici = await import('undici');
|
|
732
|
+
this._undiciDispatcher = new undici.ProxyAgent({ uri: proxyUrl });
|
|
733
|
+
this._undiciLoaded = true;
|
|
734
|
+
return this._undiciDispatcher;
|
|
735
|
+
} catch (err) {
|
|
736
|
+
throw new Error(
|
|
737
|
+
"Proxy support on Node.js requires the optional peer dependency `undici`. Install it with `npm install undici`, or pass a custom `fetchFn` to YouTubeTranscriptApi. Original error: " + String(err)
|
|
738
|
+
);
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
};
|
|
742
|
+
|
|
743
|
+
// src/api.ts
|
|
744
|
+
var YouTubeTranscriptApi = class {
|
|
745
|
+
_httpClient;
|
|
746
|
+
_fetcher;
|
|
747
|
+
constructor(options = {}) {
|
|
748
|
+
this._httpClient = new HttpClient(options);
|
|
749
|
+
this._fetcher = new TranscriptListFetcher(this._httpClient, options.proxyConfig);
|
|
750
|
+
}
|
|
751
|
+
async list(videoId) {
|
|
752
|
+
return this._fetcher.fetch(videoId);
|
|
753
|
+
}
|
|
754
|
+
async fetch(videoId, options = {}) {
|
|
755
|
+
const languages = Array.from(options.languages ?? ["en"]);
|
|
756
|
+
const transcriptList = await this.list(videoId);
|
|
757
|
+
const transcript = transcriptList.findTranscript(languages);
|
|
758
|
+
return transcript.fetch({
|
|
759
|
+
preserveFormatting: options.preserveFormatting ?? false
|
|
760
|
+
});
|
|
761
|
+
}
|
|
762
|
+
};
|
|
763
|
+
|
|
764
|
+
// src/formatters/base.ts
|
|
765
|
+
var Formatter = class {
|
|
766
|
+
};
|
|
767
|
+
|
|
768
|
+
// src/formatters/jsonFormatter.ts
|
|
769
|
+
var JSONFormatter = class extends Formatter {
|
|
770
|
+
formatTranscript(transcript, options = {}) {
|
|
771
|
+
return JSON.stringify(transcript.toRawData(), null, options.indent);
|
|
772
|
+
}
|
|
773
|
+
formatTranscripts(transcripts, options = {}) {
|
|
774
|
+
return JSON.stringify(
|
|
775
|
+
transcripts.map((t) => t.toRawData()),
|
|
776
|
+
null,
|
|
777
|
+
options.indent
|
|
778
|
+
);
|
|
779
|
+
}
|
|
780
|
+
};
|
|
781
|
+
|
|
782
|
+
// src/formatters/prettyPrintFormatter.ts
|
|
783
|
+
var PrettyPrintFormatter = class extends Formatter {
|
|
784
|
+
formatTranscript(transcript) {
|
|
785
|
+
return JSON.stringify(transcript.toRawData(), null, 2);
|
|
786
|
+
}
|
|
787
|
+
formatTranscripts(transcripts) {
|
|
788
|
+
return JSON.stringify(
|
|
789
|
+
transcripts.map((t) => t.toRawData()),
|
|
790
|
+
null,
|
|
791
|
+
2
|
|
792
|
+
);
|
|
793
|
+
}
|
|
794
|
+
};
|
|
795
|
+
|
|
796
|
+
// src/formatters/textFormatter.ts
|
|
797
|
+
var TextFormatter = class extends Formatter {
|
|
798
|
+
formatTranscript(transcript) {
|
|
799
|
+
return transcript.snippets.map((s) => s.text).join("\n");
|
|
800
|
+
}
|
|
801
|
+
formatTranscripts(transcripts) {
|
|
802
|
+
return transcripts.map((t) => this.formatTranscript(t)).join("\n\n\n");
|
|
803
|
+
}
|
|
804
|
+
};
|
|
805
|
+
|
|
806
|
+
// src/formatters/textBasedFormatter.ts
|
|
807
|
+
var TextBasedFormatter = class extends TextFormatter {
|
|
808
|
+
_secondsToTimestamp(time) {
|
|
809
|
+
const t = Number(time);
|
|
810
|
+
const totalSecs = Math.floor(t);
|
|
811
|
+
const hours = Math.floor(totalSecs / 3600);
|
|
812
|
+
const mins = Math.floor((totalSecs - hours * 3600) / 60);
|
|
813
|
+
const secs = totalSecs - hours * 3600 - mins * 60;
|
|
814
|
+
const ms = Math.round((t - totalSecs) * 1e3);
|
|
815
|
+
return this._formatTimestamp(hours, mins, secs, ms);
|
|
816
|
+
}
|
|
817
|
+
formatTranscript(transcript) {
|
|
818
|
+
const lines = [];
|
|
819
|
+
const snippets = transcript.snippets;
|
|
820
|
+
for (let i = 0; i < snippets.length; i++) {
|
|
821
|
+
const line = snippets[i];
|
|
822
|
+
const end = line.start + line.duration;
|
|
823
|
+
const next = snippets[i + 1];
|
|
824
|
+
const endTime = next !== void 0 && next.start < end ? next.start : end;
|
|
825
|
+
const timeText = `${this._secondsToTimestamp(line.start)} --> ${this._secondsToTimestamp(endTime)}`;
|
|
826
|
+
lines.push(this._formatTranscriptHelper(i, timeText, line));
|
|
827
|
+
}
|
|
828
|
+
return this._formatTranscriptHeader(lines);
|
|
829
|
+
}
|
|
830
|
+
};
|
|
831
|
+
function pad(value, width) {
|
|
832
|
+
return String(value).padStart(width, "0");
|
|
833
|
+
}
|
|
834
|
+
|
|
835
|
+
// src/formatters/srtFormatter.ts
|
|
836
|
+
var SRTFormatter = class extends TextBasedFormatter {
|
|
837
|
+
_formatTimestamp(hours, mins, secs, ms) {
|
|
838
|
+
return `${pad(hours, 2)}:${pad(mins, 2)}:${pad(secs, 2)},${pad(ms, 3)}`;
|
|
839
|
+
}
|
|
840
|
+
_formatTranscriptHeader(lines) {
|
|
841
|
+
return lines.join("\n\n") + "\n";
|
|
842
|
+
}
|
|
843
|
+
_formatTranscriptHelper(i, timeText, snippet) {
|
|
844
|
+
return `${i + 1}
|
|
845
|
+
${timeText}
|
|
846
|
+
${snippet.text}`;
|
|
847
|
+
}
|
|
848
|
+
};
|
|
849
|
+
|
|
850
|
+
// src/formatters/webvttFormatter.ts
|
|
851
|
+
var WebVTTFormatter = class extends TextBasedFormatter {
|
|
852
|
+
_formatTimestamp(hours, mins, secs, ms) {
|
|
853
|
+
return `${pad(hours, 2)}:${pad(mins, 2)}:${pad(secs, 2)}.${pad(ms, 3)}`;
|
|
854
|
+
}
|
|
855
|
+
_formatTranscriptHeader(lines) {
|
|
856
|
+
return "WEBVTT\n\n" + lines.join("\n\n") + "\n";
|
|
857
|
+
}
|
|
858
|
+
_formatTranscriptHelper(_i, timeText, snippet) {
|
|
859
|
+
return `${timeText}
|
|
860
|
+
${snippet.text}`;
|
|
861
|
+
}
|
|
862
|
+
};
|
|
863
|
+
|
|
864
|
+
// src/formatters/index.ts
|
|
865
|
+
var TYPES = {
|
|
866
|
+
json: JSONFormatter,
|
|
867
|
+
pretty: PrettyPrintFormatter,
|
|
868
|
+
text: TextFormatter,
|
|
869
|
+
webvtt: WebVTTFormatter,
|
|
870
|
+
srt: SRTFormatter
|
|
871
|
+
};
|
|
872
|
+
var UnknownFormatterType = class extends Error {
|
|
873
|
+
constructor(formatterType) {
|
|
874
|
+
super(
|
|
875
|
+
`The format '${formatterType}' is not supported. Choose one of the following formats: ${Object.keys(TYPES).join(", ")}`
|
|
876
|
+
);
|
|
877
|
+
this.name = "UnknownFormatterType";
|
|
878
|
+
}
|
|
879
|
+
};
|
|
880
|
+
var FormatterLoader = class {
|
|
881
|
+
static TYPES = TYPES;
|
|
882
|
+
load(formatterType = "pretty") {
|
|
883
|
+
if (!(formatterType in TYPES)) {
|
|
884
|
+
throw new UnknownFormatterType(formatterType);
|
|
885
|
+
}
|
|
886
|
+
const Cls = TYPES[formatterType];
|
|
887
|
+
return new Cls();
|
|
888
|
+
}
|
|
889
|
+
};
|
|
890
|
+
|
|
891
|
+
// src/proxies/proxyConfig.ts
|
|
892
|
+
var InvalidProxyConfig = class extends Error {
|
|
893
|
+
constructor(message) {
|
|
894
|
+
super(message);
|
|
895
|
+
this.name = "InvalidProxyConfig";
|
|
896
|
+
}
|
|
897
|
+
};
|
|
898
|
+
var ProxyConfig = class {
|
|
899
|
+
get preventKeepingConnectionsAlive() {
|
|
900
|
+
return false;
|
|
901
|
+
}
|
|
902
|
+
get retriesWhenBlocked() {
|
|
903
|
+
return 0;
|
|
904
|
+
}
|
|
905
|
+
};
|
|
906
|
+
|
|
907
|
+
// src/proxies/genericProxyConfig.ts
|
|
908
|
+
var GenericProxyConfig = class extends ProxyConfig {
|
|
909
|
+
_httpUrl;
|
|
910
|
+
_httpsUrl;
|
|
911
|
+
constructor(options = {}) {
|
|
912
|
+
super();
|
|
913
|
+
const { httpUrl, httpsUrl } = options;
|
|
914
|
+
if (!httpUrl && !httpsUrl) {
|
|
915
|
+
throw new InvalidProxyConfig(
|
|
916
|
+
"GenericProxyConfig requires you to define at least one of the two: http or https"
|
|
917
|
+
);
|
|
918
|
+
}
|
|
919
|
+
this._httpUrl = httpUrl;
|
|
920
|
+
this._httpsUrl = httpsUrl;
|
|
921
|
+
}
|
|
922
|
+
get httpUrl() {
|
|
923
|
+
return this._httpUrl ?? this._httpsUrl;
|
|
924
|
+
}
|
|
925
|
+
get httpsUrl() {
|
|
926
|
+
return this._httpsUrl ?? this._httpUrl;
|
|
927
|
+
}
|
|
928
|
+
};
|
|
929
|
+
|
|
930
|
+
// src/proxies/webshareProxyConfig.ts
|
|
931
|
+
var DEFAULT_DOMAIN_NAME = "p.webshare.io";
|
|
932
|
+
var DEFAULT_PORT = 80;
|
|
933
|
+
var ROTATE_SUFFIX = "-rotate";
|
|
934
|
+
var WebshareProxyConfig = class extends GenericProxyConfig {
|
|
935
|
+
proxyUsername;
|
|
936
|
+
proxyPassword;
|
|
937
|
+
domainName;
|
|
938
|
+
proxyPort;
|
|
939
|
+
_filterIpLocations;
|
|
940
|
+
_retriesWhenBlocked;
|
|
941
|
+
constructor(options) {
|
|
942
|
+
super({ httpUrl: "placeholder" });
|
|
943
|
+
this.proxyUsername = options.proxyUsername;
|
|
944
|
+
this.proxyPassword = options.proxyPassword;
|
|
945
|
+
this.domainName = options.domainName ?? DEFAULT_DOMAIN_NAME;
|
|
946
|
+
this.proxyPort = options.proxyPort ?? DEFAULT_PORT;
|
|
947
|
+
this._filterIpLocations = options.filterIpLocations ?? [];
|
|
948
|
+
this._retriesWhenBlocked = options.retriesWhenBlocked ?? 10;
|
|
949
|
+
}
|
|
950
|
+
get url() {
|
|
951
|
+
const locationCodes = this._filterIpLocations.map((code) => `-${code.toUpperCase()}`).join("");
|
|
952
|
+
let username = this.proxyUsername;
|
|
953
|
+
if (username.endsWith(ROTATE_SUFFIX)) {
|
|
954
|
+
username = username.slice(0, -ROTATE_SUFFIX.length);
|
|
955
|
+
}
|
|
956
|
+
const encodedUser = encodeURIComponent(username);
|
|
957
|
+
const encodedPass = encodeURIComponent(this.proxyPassword);
|
|
958
|
+
return `http://${encodedUser}${locationCodes}${ROTATE_SUFFIX}:${encodedPass}@${this.domainName}:${this.proxyPort}/`;
|
|
959
|
+
}
|
|
960
|
+
get httpUrl() {
|
|
961
|
+
return this.url;
|
|
962
|
+
}
|
|
963
|
+
get httpsUrl() {
|
|
964
|
+
return this.url;
|
|
965
|
+
}
|
|
966
|
+
get preventKeepingConnectionsAlive() {
|
|
967
|
+
return true;
|
|
968
|
+
}
|
|
969
|
+
get retriesWhenBlocked() {
|
|
970
|
+
return this._retriesWhenBlocked;
|
|
971
|
+
}
|
|
972
|
+
};
|
|
973
|
+
|
|
974
|
+
// src/utils/argv.ts
|
|
975
|
+
var ArgvError = class extends Error {
|
|
976
|
+
constructor(message) {
|
|
977
|
+
super(message);
|
|
978
|
+
this.name = "ArgvError";
|
|
979
|
+
}
|
|
980
|
+
};
|
|
981
|
+
function parseArgs(argv, spec) {
|
|
982
|
+
const flags = {};
|
|
983
|
+
const positional = [];
|
|
984
|
+
for (const [name, def] of Object.entries(spec)) {
|
|
985
|
+
if (def.default !== void 0) {
|
|
986
|
+
flags[name] = def.default;
|
|
987
|
+
} else if (def.kind === "boolean") {
|
|
988
|
+
flags[name] = false;
|
|
989
|
+
} else if (def.kind === "string[]") {
|
|
990
|
+
flags[name] = [];
|
|
991
|
+
}
|
|
992
|
+
}
|
|
993
|
+
const known = new Set(Object.keys(spec));
|
|
994
|
+
let i = 0;
|
|
995
|
+
let stopFlags = false;
|
|
996
|
+
while (i < argv.length) {
|
|
997
|
+
const token = argv[i];
|
|
998
|
+
if (stopFlags) {
|
|
999
|
+
positional.push(token);
|
|
1000
|
+
i++;
|
|
1001
|
+
continue;
|
|
1002
|
+
}
|
|
1003
|
+
if (token === "--") {
|
|
1004
|
+
stopFlags = true;
|
|
1005
|
+
i++;
|
|
1006
|
+
continue;
|
|
1007
|
+
}
|
|
1008
|
+
if (token.startsWith("--")) {
|
|
1009
|
+
const eqIdx = token.indexOf("=");
|
|
1010
|
+
const rawName = eqIdx === -1 ? token.slice(2) : token.slice(2, eqIdx);
|
|
1011
|
+
const inlineValue = eqIdx === -1 ? void 0 : token.slice(eqIdx + 1);
|
|
1012
|
+
const name = rawName.replace(/-/g, "_");
|
|
1013
|
+
if (!known.has(name)) {
|
|
1014
|
+
throw new ArgvError(`Unknown argument: --${rawName}`);
|
|
1015
|
+
}
|
|
1016
|
+
const def = spec[name];
|
|
1017
|
+
if (def.kind === "boolean") {
|
|
1018
|
+
if (inlineValue !== void 0) {
|
|
1019
|
+
throw new ArgvError(`Argument --${rawName} does not take a value`);
|
|
1020
|
+
}
|
|
1021
|
+
flags[name] = true;
|
|
1022
|
+
i++;
|
|
1023
|
+
} else if (def.kind === "string") {
|
|
1024
|
+
if (inlineValue !== void 0) {
|
|
1025
|
+
flags[name] = inlineValue;
|
|
1026
|
+
i++;
|
|
1027
|
+
} else {
|
|
1028
|
+
const next = argv[i + 1];
|
|
1029
|
+
if (next === void 0) {
|
|
1030
|
+
throw new ArgvError(`Argument --${rawName} requires a value`);
|
|
1031
|
+
}
|
|
1032
|
+
flags[name] = next;
|
|
1033
|
+
i += 2;
|
|
1034
|
+
}
|
|
1035
|
+
} else {
|
|
1036
|
+
const collected = [];
|
|
1037
|
+
if (inlineValue !== void 0) {
|
|
1038
|
+
collected.push(inlineValue);
|
|
1039
|
+
i++;
|
|
1040
|
+
} else {
|
|
1041
|
+
i++;
|
|
1042
|
+
}
|
|
1043
|
+
while (i < argv.length) {
|
|
1044
|
+
const peek = argv[i];
|
|
1045
|
+
if (peek === "--" || peek.startsWith("--")) break;
|
|
1046
|
+
collected.push(peek);
|
|
1047
|
+
i++;
|
|
1048
|
+
}
|
|
1049
|
+
flags[name] = collected;
|
|
1050
|
+
}
|
|
1051
|
+
} else {
|
|
1052
|
+
positional.push(token);
|
|
1053
|
+
i++;
|
|
1054
|
+
}
|
|
1055
|
+
}
|
|
1056
|
+
return { positional, flags };
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
// src/cli.ts
|
|
1060
|
+
var VERSION = "0.1.0";
|
|
1061
|
+
var SPEC = {
|
|
1062
|
+
list_transcripts: { kind: "boolean", default: false },
|
|
1063
|
+
languages: { kind: "string[]", default: ["en"] },
|
|
1064
|
+
exclude_generated: { kind: "boolean", default: false },
|
|
1065
|
+
exclude_manually_created: { kind: "boolean", default: false },
|
|
1066
|
+
format: { kind: "string", default: "pretty" },
|
|
1067
|
+
translate: { kind: "string", default: "" },
|
|
1068
|
+
webshare_proxy_username: { kind: "string", default: "" },
|
|
1069
|
+
webshare_proxy_password: { kind: "string", default: "" },
|
|
1070
|
+
http_proxy: { kind: "string", default: "" },
|
|
1071
|
+
https_proxy: { kind: "string", default: "" },
|
|
1072
|
+
version: { kind: "boolean", default: false },
|
|
1073
|
+
help: { kind: "boolean", default: false }
|
|
1074
|
+
};
|
|
1075
|
+
var HELP_TEXT = `Usage: youtube-transcript [options] VIDEO_ID [VIDEO_ID ...]
|
|
1076
|
+
|
|
1077
|
+
Fetch transcripts/subtitles for one or more YouTube videos.
|
|
1078
|
+
|
|
1079
|
+
Options:
|
|
1080
|
+
--list-transcripts List available transcript languages instead of fetching.
|
|
1081
|
+
--languages CODE [CODE ...] Language codes in descending priority (default: en).
|
|
1082
|
+
--exclude-generated Skip auto-generated transcripts.
|
|
1083
|
+
--exclude-manually-created Skip manually created transcripts.
|
|
1084
|
+
--format FORMAT Output format: json, pretty, text, webvtt, srt (default: pretty).
|
|
1085
|
+
--translate CODE Translate to the given language code.
|
|
1086
|
+
--webshare-proxy-username U Webshare "Proxy Username".
|
|
1087
|
+
--webshare-proxy-password P Webshare "Proxy Password".
|
|
1088
|
+
--http-proxy URL HTTP proxy URL.
|
|
1089
|
+
--https-proxy URL HTTPS proxy URL.
|
|
1090
|
+
--version Print version and exit.
|
|
1091
|
+
--help Show this help message and exit.
|
|
1092
|
+
`;
|
|
1093
|
+
var YouTubeTranscriptCli = class {
|
|
1094
|
+
_argv;
|
|
1095
|
+
constructor(argv) {
|
|
1096
|
+
this._argv = argv;
|
|
1097
|
+
}
|
|
1098
|
+
async run() {
|
|
1099
|
+
let parsed;
|
|
1100
|
+
try {
|
|
1101
|
+
parsed = parseArgs(this._argv, SPEC);
|
|
1102
|
+
} catch (err) {
|
|
1103
|
+
if (err instanceof ArgvError) {
|
|
1104
|
+
return { output: err.message, exitCode: 2 };
|
|
1105
|
+
}
|
|
1106
|
+
throw err;
|
|
1107
|
+
}
|
|
1108
|
+
const flags = parsed.flags;
|
|
1109
|
+
if (flags["help"] === true) {
|
|
1110
|
+
return { output: HELP_TEXT, exitCode: 0 };
|
|
1111
|
+
}
|
|
1112
|
+
if (flags["version"] === true) {
|
|
1113
|
+
return { output: `youtube-transcript, version ${VERSION}`, exitCode: 0 };
|
|
1114
|
+
}
|
|
1115
|
+
if (parsed.positional.length === 0) {
|
|
1116
|
+
return {
|
|
1117
|
+
output: "error: at least one VIDEO_ID is required\n\n" + HELP_TEXT,
|
|
1118
|
+
exitCode: 2
|
|
1119
|
+
};
|
|
1120
|
+
}
|
|
1121
|
+
const videoIds = parsed.positional.map((id) => id.replace(/\\/g, ""));
|
|
1122
|
+
const excludeManually = flags["exclude_manually_created"] === true;
|
|
1123
|
+
const excludeGenerated = flags["exclude_generated"] === true;
|
|
1124
|
+
if (excludeManually && excludeGenerated) {
|
|
1125
|
+
return { output: "", exitCode: 0 };
|
|
1126
|
+
}
|
|
1127
|
+
let proxyConfig;
|
|
1128
|
+
const httpProxy = flags["http_proxy"];
|
|
1129
|
+
const httpsProxy = flags["https_proxy"];
|
|
1130
|
+
if (httpProxy !== "" || httpsProxy !== "") {
|
|
1131
|
+
proxyConfig = new GenericProxyConfig({
|
|
1132
|
+
httpUrl: httpProxy || void 0,
|
|
1133
|
+
httpsUrl: httpsProxy || void 0
|
|
1134
|
+
});
|
|
1135
|
+
}
|
|
1136
|
+
const wsUser = flags["webshare_proxy_username"];
|
|
1137
|
+
const wsPass = flags["webshare_proxy_password"];
|
|
1138
|
+
if (wsUser !== "" || wsPass !== "") {
|
|
1139
|
+
proxyConfig = new WebshareProxyConfig({
|
|
1140
|
+
proxyUsername: wsUser,
|
|
1141
|
+
proxyPassword: wsPass
|
|
1142
|
+
});
|
|
1143
|
+
}
|
|
1144
|
+
const api = new YouTubeTranscriptApi({ proxyConfig });
|
|
1145
|
+
const collected = [];
|
|
1146
|
+
const exceptions = [];
|
|
1147
|
+
let successCount = 0;
|
|
1148
|
+
for (const videoId of videoIds) {
|
|
1149
|
+
try {
|
|
1150
|
+
const list = await api.list(videoId);
|
|
1151
|
+
if (flags["list_transcripts"] === true) {
|
|
1152
|
+
collected.push(list);
|
|
1153
|
+
} else {
|
|
1154
|
+
const fetched = await this._fetchTranscript(
|
|
1155
|
+
list,
|
|
1156
|
+
flags["languages"],
|
|
1157
|
+
excludeManually,
|
|
1158
|
+
excludeGenerated,
|
|
1159
|
+
flags["translate"]
|
|
1160
|
+
);
|
|
1161
|
+
collected.push(fetched);
|
|
1162
|
+
}
|
|
1163
|
+
successCount++;
|
|
1164
|
+
} catch (err) {
|
|
1165
|
+
exceptions.push(err instanceof Error ? err.toString() : String(err));
|
|
1166
|
+
}
|
|
1167
|
+
}
|
|
1168
|
+
const printSections = [...exceptions];
|
|
1169
|
+
if (collected.length > 0) {
|
|
1170
|
+
if (flags["list_transcripts"] === true) {
|
|
1171
|
+
for (const item of collected) {
|
|
1172
|
+
printSections.push(String(item));
|
|
1173
|
+
}
|
|
1174
|
+
} else {
|
|
1175
|
+
const formatter = new FormatterLoader().load(flags["format"]);
|
|
1176
|
+
printSections.push(
|
|
1177
|
+
formatter.formatTranscripts(collected)
|
|
1178
|
+
);
|
|
1179
|
+
}
|
|
1180
|
+
}
|
|
1181
|
+
const output = printSections.join("\n\n");
|
|
1182
|
+
const exitCode = successCount === 0 && videoIds.length > 0 ? 1 : 0;
|
|
1183
|
+
return { output, exitCode };
|
|
1184
|
+
}
|
|
1185
|
+
async _fetchTranscript(transcriptList, languages, excludeManually, excludeGenerated, translate) {
|
|
1186
|
+
let transcript;
|
|
1187
|
+
if (excludeManually) {
|
|
1188
|
+
transcript = transcriptList.findGeneratedTranscript(languages);
|
|
1189
|
+
} else if (excludeGenerated) {
|
|
1190
|
+
transcript = transcriptList.findManuallyCreatedTranscript(languages);
|
|
1191
|
+
} else {
|
|
1192
|
+
transcript = transcriptList.findTranscript(languages);
|
|
1193
|
+
}
|
|
1194
|
+
if (translate !== "") {
|
|
1195
|
+
transcript = transcript.translate(translate);
|
|
1196
|
+
}
|
|
1197
|
+
return transcript.fetch();
|
|
1198
|
+
}
|
|
1199
|
+
};
|
|
1200
|
+
async function main(argv = process.argv.slice(2)) {
|
|
1201
|
+
const cli = new YouTubeTranscriptCli(argv);
|
|
1202
|
+
const { output, exitCode } = await cli.run();
|
|
1203
|
+
if (output) {
|
|
1204
|
+
process.stdout.write(output + "\n");
|
|
1205
|
+
}
|
|
1206
|
+
process.exit(exitCode);
|
|
1207
|
+
}
|
|
1208
|
+
var isDirectRun = (() => {
|
|
1209
|
+
try {
|
|
1210
|
+
if (typeof process === "undefined" || !process.argv[1]) return false;
|
|
1211
|
+
const entry = process.argv[1];
|
|
1212
|
+
return entry.endsWith("cli.js") || entry.endsWith("cli.cjs");
|
|
1213
|
+
} catch {
|
|
1214
|
+
return false;
|
|
1215
|
+
}
|
|
1216
|
+
})();
|
|
1217
|
+
if (isDirectRun) {
|
|
1218
|
+
void main();
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
exports.YouTubeTranscriptCli = YouTubeTranscriptCli;
|
|
1222
|
+
exports.main = main;
|
|
1223
|
+
//# sourceMappingURL=cli.cjs.map
|
|
1224
|
+
//# sourceMappingURL=cli.cjs.map
|