youtube-transcript-plus 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,716 @@
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
+
4
+ /******************************************************************************
5
+ Copyright (c) Microsoft Corporation.
6
+
7
+ Permission to use, copy, modify, and/or distribute this software for any
8
+ purpose with or without fee is hereby granted.
9
+
10
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
11
+ REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
12
+ AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
13
+ INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
14
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
15
+ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
16
+ PERFORMANCE OF THIS SOFTWARE.
17
+ ***************************************************************************** */
18
+ /* global Reflect, Promise, SuppressedError, Symbol, Iterator */
19
+
20
+
21
+ function __awaiter(thisArg, _arguments, P, generator) {
22
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
23
+ return new (P || (P = Promise))(function (resolve, reject) {
24
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
25
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
26
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
27
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
28
+ });
29
+ }
30
+
31
+ typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) {
32
+ var e = new Error(message);
33
+ return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
34
+ };
35
+
36
+ const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
37
+ const RE_YOUTUBE = /(?:v=|\/|v\/|embed\/|watch\?.*v=|youtu\.be\/|\/v\/|e\/|watch\?.*vi?=|\/embed\/|\/v\/|vi?\/|watch\?.*vi?=|youtu\.be\/|\/vi?\/|\/e\/)([a-zA-Z0-9_-]{11})/i;
38
+ const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g;
39
+ const DEFAULT_CACHE_TTL = 3600000; // 1 hour in milliseconds
40
+
41
+ /** Thrown when YouTube is rate-limiting requests from your IP address. */
42
+ class YoutubeTranscriptTooManyRequestError extends Error {
43
+ constructor() {
44
+ super('YouTube is receiving too many requests from your IP address. Please try again later or use a proxy. If the issue persists, consider reducing the frequency of requests.');
45
+ this.name = 'YoutubeTranscriptTooManyRequestError';
46
+ }
47
+ }
48
+ /** Thrown when the requested video is unavailable or has been removed. */
49
+ class YoutubeTranscriptVideoUnavailableError extends Error {
50
+ constructor(videoId) {
51
+ super(`The video with ID "${videoId}" is no longer available or has been removed. Please check the video URL or ID and try again.`);
52
+ this.name = 'YoutubeTranscriptVideoUnavailableError';
53
+ this.videoId = videoId;
54
+ }
55
+ }
56
+ /** Thrown when transcripts are disabled for the video by its owner. */
57
+ class YoutubeTranscriptDisabledError extends Error {
58
+ constructor(videoId) {
59
+ super(`Transcripts are disabled for the video with ID "${videoId}". This may be due to the video owner disabling captions or the video not supporting transcripts.`);
60
+ this.name = 'YoutubeTranscriptDisabledError';
61
+ this.videoId = videoId;
62
+ }
63
+ }
64
+ /** Thrown when no transcripts are available for the video. */
65
+ class YoutubeTranscriptNotAvailableError extends Error {
66
+ constructor(videoId) {
67
+ super(`No transcripts are available for the video with ID "${videoId}". This may be because the video does not have captions or the captions are not accessible.`);
68
+ this.name = 'YoutubeTranscriptNotAvailableError';
69
+ this.videoId = videoId;
70
+ }
71
+ }
72
+ /** Thrown when the transcript is not available in the requested language. */
73
+ class YoutubeTranscriptNotAvailableLanguageError extends Error {
74
+ constructor(lang, availableLangs, videoId) {
75
+ super(`No transcripts are available in "${lang}" for the video with ID "${videoId}". Available languages: ${availableLangs.join(', ')}. Please try a different language.`);
76
+ this.name = 'YoutubeTranscriptNotAvailableLanguageError';
77
+ this.videoId = videoId;
78
+ this.lang = lang;
79
+ this.availableLangs = availableLangs;
80
+ }
81
+ }
82
+ /** Thrown when the provided `lang` option is not a valid BCP 47 language code. */
83
+ class YoutubeTranscriptInvalidLangError extends Error {
84
+ constructor(lang) {
85
+ super(`Invalid language code "${lang}". Please provide a valid BCP 47 language code (e.g., "en", "fr", "pt-BR").`);
86
+ this.name = 'YoutubeTranscriptInvalidLangError';
87
+ this.lang = lang;
88
+ }
89
+ }
90
+ /** Thrown when the provided video ID or URL is invalid. */
91
+ class YoutubeTranscriptInvalidVideoIdError extends Error {
92
+ constructor() {
93
+ super('Invalid YouTube video ID or URL. Please provide a valid video ID or URL. Example: "dQw4w9WgXcQ" or "https://www.youtube.com/watch?v=dQw4w9WgXcQ".');
94
+ this.name = 'YoutubeTranscriptInvalidVideoIdError';
95
+ }
96
+ }
97
+
98
+ const RE_VIDEO_ID = /^[a-zA-Z0-9_-]{11}$/;
99
+ const RE_BCP47_LANG = /^[a-zA-Z]{2,3}(-[a-zA-Z0-9]{2,8})*$/;
100
+ const XML_ENTITIES = {
101
+ '&amp;': '&',
102
+ '&lt;': '<',
103
+ '&gt;': '>',
104
+ '&quot;': '"',
105
+ '&#39;': "'",
106
+ '&apos;': "'",
107
+ };
108
+ const RE_XML_ENTITY = /&(?:amp|lt|gt|quot|apos|#39);/g;
109
+ function decodeXmlEntities(text) {
110
+ return text.replace(RE_XML_ENTITY, (match) => { var _a; return (_a = XML_ENTITIES[match]) !== null && _a !== void 0 ? _a : match; });
111
+ }
112
+ function retrieveVideoId(videoId) {
113
+ if (RE_VIDEO_ID.test(videoId)) {
114
+ return videoId;
115
+ }
116
+ const matchId = videoId.match(RE_YOUTUBE);
117
+ if (matchId && matchId.length) {
118
+ return matchId[1];
119
+ }
120
+ throw new YoutubeTranscriptInvalidVideoIdError();
121
+ }
122
+ /**
123
+ * Validate that a language code matches a BCP 47-like pattern.
124
+ * @throws {@link YoutubeTranscriptInvalidLangError} if the language code is invalid.
125
+ */
126
+ function validateLang(lang) {
127
+ if (!RE_BCP47_LANG.test(lang)) {
128
+ throw new YoutubeTranscriptInvalidLangError(lang);
129
+ }
130
+ }
131
+ function defaultFetch(params) {
132
+ return __awaiter(this, void 0, void 0, function* () {
133
+ const { url, lang, userAgent, method = 'GET', body, headers = {}, signal } = params;
134
+ const fetchHeaders = Object.assign(Object.assign({ 'User-Agent': userAgent || DEFAULT_USER_AGENT }, (lang && { 'Accept-Language': lang })), headers);
135
+ const fetchOptions = {
136
+ method,
137
+ headers: fetchHeaders,
138
+ signal,
139
+ };
140
+ if (body && method === 'POST') {
141
+ fetchOptions.body = body;
142
+ }
143
+ return fetch(url, fetchOptions);
144
+ });
145
+ }
146
+ /** Returns true if the HTTP status code is retryable (429 or 5xx). */
147
+ function isRetryableStatus(status) {
148
+ return status === 429 || (status >= 500 && status <= 599);
149
+ }
150
+ /**
151
+ * Wait for the given number of milliseconds, aborting early if the signal fires.
152
+ */
153
+ function sleep(ms, signal) {
154
+ return new Promise((resolve, reject) => {
155
+ signal === null || signal === void 0 ? void 0 : signal.throwIfAborted();
156
+ const timer = setTimeout(resolve, ms);
157
+ if (signal) {
158
+ const onAbort = () => {
159
+ clearTimeout(timer);
160
+ reject(signal.reason);
161
+ };
162
+ signal.addEventListener('abort', onAbort, { once: true });
163
+ }
164
+ });
165
+ }
166
+ /**
167
+ * Wrap a fetch call with retry logic using exponential backoff.
168
+ *
169
+ * Retries on 429 (Too Many Requests) and 5xx (Server Errors).
170
+ * Client errors (4xx other than 429) are returned immediately.
171
+ *
172
+ * @param fetchFn - Function that performs the fetch call.
173
+ * @param retries - Maximum number of retry attempts (0 = no retries).
174
+ * @param retryDelay - Base delay in milliseconds for exponential backoff.
175
+ * @param signal - Optional AbortSignal to cancel the operation.
176
+ * @returns The fetch Response.
177
+ */
178
+ function fetchWithRetry(fetchFn, retries, retryDelay, signal) {
179
+ return __awaiter(this, void 0, void 0, function* () {
180
+ for (let attempt = 0; attempt <= retries; attempt++) {
181
+ signal === null || signal === void 0 ? void 0 : signal.throwIfAborted();
182
+ const response = yield fetchFn();
183
+ if (!isRetryableStatus(response.status) || attempt === retries) {
184
+ return response;
185
+ }
186
+ // Wait with exponential backoff: delay * 2^attempt
187
+ const delay = retryDelay * Math.pow(2, attempt);
188
+ yield sleep(delay, signal);
189
+ }
190
+ // Unreachable — the loop always returns — but TypeScript requires it
191
+ throw new Error('Unexpected: retry loop exited without returning');
192
+ });
193
+ }
194
+
195
+ function sanitizeKey(key) {
196
+ return key.replace(/[^a-zA-Z0-9_-]/g, '_');
197
+ }
198
+ /**
199
+ * File-system-based cache implementation.
200
+ *
201
+ * Each entry is stored as a JSON file in the specified directory.
202
+ * Expired entries are automatically deleted when accessed.
203
+ *
204
+ * @example
205
+ * ```typescript
206
+ * import { fetchTranscript, FsCache } from 'youtube-transcript-plus';
207
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ', {
208
+ * cache: new FsCache('./my-cache-dir', 86400000), // 1 day TTL
209
+ * });
210
+ * ```
211
+ */
212
+ class FsCache {
213
+ /**
214
+ * @param cacheDir - Directory to store cache files. Created automatically if it doesn't exist.
215
+ * @param defaultTTL - Default time-to-live in milliseconds. Defaults to 1 hour.
216
+ */
217
+ constructor(cacheDir = './cache', defaultTTL = DEFAULT_CACHE_TTL) {
218
+ this.cacheDir = cacheDir;
219
+ this.defaultTTL = defaultTTL;
220
+ this.ready = fs.mkdir(cacheDir, { recursive: true }).then(() => { });
221
+ }
222
+ get(key) {
223
+ return __awaiter(this, void 0, void 0, function* () {
224
+ yield this.ready;
225
+ const filePath = path.join(this.cacheDir, sanitizeKey(key));
226
+ try {
227
+ const data = yield fs.readFile(filePath, 'utf-8');
228
+ const { value, expires } = JSON.parse(data);
229
+ if (expires > Date.now()) {
230
+ return value;
231
+ }
232
+ yield fs.unlink(filePath);
233
+ }
234
+ catch (_error) { }
235
+ return null;
236
+ });
237
+ }
238
+ set(key, value, ttl) {
239
+ return __awaiter(this, void 0, void 0, function* () {
240
+ yield this.ready;
241
+ const filePath = path.join(this.cacheDir, sanitizeKey(key));
242
+ const expires = Date.now() + (ttl !== null && ttl !== void 0 ? ttl : this.defaultTTL);
243
+ yield fs.writeFile(filePath, JSON.stringify({ value, expires }), 'utf-8');
244
+ });
245
+ }
246
+ }
247
+
248
+ /**
249
+ * In-memory cache implementation using a `Map`.
250
+ *
251
+ * Entries are automatically cleaned up when accessed after expiration.
252
+ *
253
+ * @example
254
+ * ```typescript
255
+ * import { fetchTranscript, InMemoryCache } from 'youtube-transcript-plus';
256
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ', {
257
+ * cache: new InMemoryCache(1800000), // 30 minutes TTL
258
+ * });
259
+ * ```
260
+ */
261
+ class InMemoryCache {
262
+ /** @param defaultTTL - Default time-to-live in milliseconds. Defaults to 1 hour. */
263
+ constructor(defaultTTL = DEFAULT_CACHE_TTL) {
264
+ this.cache = new Map();
265
+ this.defaultTTL = defaultTTL;
266
+ }
267
+ get(key) {
268
+ return __awaiter(this, void 0, void 0, function* () {
269
+ const entry = this.cache.get(key);
270
+ if (entry && entry.expires > Date.now()) {
271
+ return entry.value;
272
+ }
273
+ this.cache.delete(key); // Clean up expired entries
274
+ return null;
275
+ });
276
+ }
277
+ set(key, value, ttl) {
278
+ return __awaiter(this, void 0, void 0, function* () {
279
+ const expires = Date.now() + (ttl !== null && ttl !== void 0 ? ttl : this.defaultTTL);
280
+ this.cache.set(key, { value, expires });
281
+ });
282
+ }
283
+ }
284
+
285
+ /**
286
+ * Format seconds as an SRT timestamp: `HH:MM:SS,mmm`
287
+ * SRT uses comma as the decimal separator per specification.
288
+ */
289
+ function formatSrtTimestamp(seconds) {
290
+ const h = Math.floor(seconds / 3600);
291
+ const m = Math.floor((seconds % 3600) / 60);
292
+ const s = Math.floor(seconds % 60);
293
+ const ms = Math.round((seconds % 1) * 1000);
294
+ return (String(h).padStart(2, '0') +
295
+ ':' +
296
+ String(m).padStart(2, '0') +
297
+ ':' +
298
+ String(s).padStart(2, '0') +
299
+ ',' +
300
+ String(ms).padStart(3, '0'));
301
+ }
302
+ /**
303
+ * Format seconds as a VTT timestamp: `HH:MM:SS.mmm`
304
+ * VTT uses period as the decimal separator per specification.
305
+ */
306
+ function formatVttTimestamp(seconds) {
307
+ const h = Math.floor(seconds / 3600);
308
+ const m = Math.floor((seconds % 3600) / 60);
309
+ const s = Math.floor(seconds % 60);
310
+ const ms = Math.round((seconds % 1) * 1000);
311
+ return (String(h).padStart(2, '0') +
312
+ ':' +
313
+ String(m).padStart(2, '0') +
314
+ ':' +
315
+ String(s).padStart(2, '0') +
316
+ '.' +
317
+ String(ms).padStart(3, '0'));
318
+ }
319
+ /**
320
+ * Convert transcript segments to SubRip (SRT) format.
321
+ *
322
+ * @param segments - Array of transcript segments from {@link fetchTranscript}.
323
+ * @returns A string in SRT format with sequence numbers and `HH:MM:SS,mmm` timestamps.
324
+ *
325
+ * @example
326
+ * ```typescript
327
+ * import { fetchTranscript, toSRT } from 'youtube-transcript-plus';
328
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
329
+ * const srt = toSRT(transcript);
330
+ *
331
+ * // With videoDetails enabled, use result.segments:
332
+ * const result = await fetchTranscript('dQw4w9WgXcQ', { videoDetails: true });
333
+ * const srt2 = toSRT(result.segments);
334
+ * ```
335
+ */
336
+ function toSRT(segments) {
337
+ return segments
338
+ .map((segment, index) => {
339
+ const start = formatSrtTimestamp(segment.offset);
340
+ const end = formatSrtTimestamp(segment.offset + segment.duration);
341
+ return `${index + 1}\n${start} --> ${end}\n${segment.text}`;
342
+ })
343
+ .join('\n\n');
344
+ }
345
+ /**
346
+ * Convert transcript segments to WebVTT (VTT) format.
347
+ *
348
+ * @param segments - Array of transcript segments from {@link fetchTranscript}.
349
+ * @returns A string in VTT format with `WEBVTT` header and `HH:MM:SS.mmm` timestamps.
350
+ *
351
+ * @example
352
+ * ```typescript
353
+ * import { fetchTranscript, toVTT } from 'youtube-transcript-plus';
354
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
355
+ * const vtt = toVTT(transcript);
356
+ *
357
+ * // With videoDetails enabled, use result.segments:
358
+ * const result = await fetchTranscript('dQw4w9WgXcQ', { videoDetails: true });
359
+ * const vtt2 = toVTT(result.segments);
360
+ * ```
361
+ */
362
+ function toVTT(segments) {
363
+ const cues = segments
364
+ .map((segment) => {
365
+ const start = formatVttTimestamp(segment.offset);
366
+ const end = formatVttTimestamp(segment.offset + segment.duration);
367
+ return `${start} --> ${end}\n${segment.text}`;
368
+ })
369
+ .join('\n\n');
370
+ return `WEBVTT\n\n${cues}`;
371
+ }
372
+ /**
373
+ * Convert transcript segments to plain text.
374
+ *
375
+ * @param segments - Array of transcript segments from {@link fetchTranscript}.
376
+ * @param separator - String to join segments with. Defaults to `'\n'`.
377
+ * @returns A plain text string with segments joined by the separator.
378
+ *
379
+ * @example
380
+ * ```typescript
381
+ * import { fetchTranscript, toPlainText } from 'youtube-transcript-plus';
382
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
383
+ * const text = toPlainText(transcript);
384
+ * const paragraph = toPlainText(transcript, ' ');
385
+ *
386
+ * // With videoDetails enabled, use result.segments:
387
+ * const result = await fetchTranscript('dQw4w9WgXcQ', { videoDetails: true });
388
+ * const text2 = toPlainText(result.segments);
389
+ * ```
390
+ */
391
+ function toPlainText(segments, separator = '\n') {
392
+ return segments.map((segment) => segment.text).join(separator);
393
+ }
394
+
395
+ /**
396
+ * Fetches YouTube video transcripts and caption metadata using the Innertube API.
397
+ *
398
+ * Can be used as an instance (with shared config) or via static/convenience methods.
399
+ *
400
+ * @example
401
+ * ```typescript
402
+ * // Instance usage with shared config
403
+ * const yt = new YoutubeTranscript({ lang: 'en' });
404
+ * const transcript = await yt.fetchTranscript('dQw4w9WgXcQ');
405
+ * const languages = await yt.listLanguages('dQw4w9WgXcQ');
406
+ *
407
+ * // Static method
408
+ * const transcript = await YoutubeTranscript.fetchTranscript('dQw4w9WgXcQ', { lang: 'en' });
409
+ *
410
+ * // Opt-in to video details
411
+ * const { videoDetails, segments } = await YoutubeTranscript.fetchTranscript('dQw4w9WgXcQ', {
412
+ * videoDetails: true,
413
+ * });
414
+ *
415
+ * // Convenience export
416
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
417
+ * const languages = await listLanguages('dQw4w9WgXcQ');
418
+ * ```
419
+ */
420
+ class YoutubeTranscript {
421
+ constructor(config) {
422
+ this.config = config;
423
+ }
424
+ /**
425
+ * Fetch caption tracks and the player response from the Innertube player API.
426
+ * Shared logic used by both fetchTranscript and listLanguages.
427
+ */
428
+ _fetchCaptionTracks(identifier, lang) {
429
+ return __awaiter(this, void 0, void 0, function* () {
430
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
431
+ const userAgent = (_b = (_a = this.config) === null || _a === void 0 ? void 0 : _a.userAgent) !== null && _b !== void 0 ? _b : DEFAULT_USER_AGENT;
432
+ const protocol = ((_c = this.config) === null || _c === void 0 ? void 0 : _c.disableHttps) ? 'http' : 'https';
433
+ const retries = (_e = (_d = this.config) === null || _d === void 0 ? void 0 : _d.retries) !== null && _e !== void 0 ? _e : 0;
434
+ const retryDelay = (_g = (_f = this.config) === null || _f === void 0 ? void 0 : _f.retryDelay) !== null && _g !== void 0 ? _g : 1000;
435
+ const signal = (_h = this.config) === null || _h === void 0 ? void 0 : _h.signal;
436
+ // 1) Fetch the watch page to extract an Innertube API key
437
+ const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
438
+ const watchFetchParams = { url: watchUrl, lang, userAgent, signal };
439
+ const videoPageResponse = yield fetchWithRetry(() => {
440
+ var _a;
441
+ return ((_a = this.config) === null || _a === void 0 ? void 0 : _a.videoFetch)
442
+ ? this.config.videoFetch(watchFetchParams)
443
+ : defaultFetch(watchFetchParams);
444
+ }, retries, retryDelay, signal);
445
+ if (!videoPageResponse.ok) {
446
+ throw new YoutubeTranscriptVideoUnavailableError(identifier);
447
+ }
448
+ const videoPageBody = yield videoPageResponse.text();
449
+ // Basic bot/recaptcha detection preserves old error behavior
450
+ if (videoPageBody.includes('class="g-recaptcha"')) {
451
+ throw new YoutubeTranscriptTooManyRequestError();
452
+ }
453
+ // 2) Extract Innertube API key from the page
454
+ const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
455
+ videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
456
+ if (!apiKeyMatch) {
457
+ throw new YoutubeTranscriptNotAvailableError(identifier);
458
+ }
459
+ const apiKey = apiKeyMatch[1];
460
+ // 3) Call Innertube player as ANDROID client to retrieve captionTracks
461
+ const playerEndpoint = `${protocol}://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
462
+ const playerBody = {
463
+ context: {
464
+ client: {
465
+ clientName: 'ANDROID',
466
+ clientVersion: '20.10.38',
467
+ },
468
+ },
469
+ videoId: identifier,
470
+ };
471
+ const playerFetchParams = {
472
+ url: playerEndpoint,
473
+ method: 'POST',
474
+ lang,
475
+ userAgent,
476
+ headers: { 'Content-Type': 'application/json' },
477
+ body: JSON.stringify(playerBody),
478
+ signal,
479
+ };
480
+ const playerRes = yield fetchWithRetry(() => {
481
+ var _a;
482
+ return ((_a = this.config) === null || _a === void 0 ? void 0 : _a.playerFetch)
483
+ ? this.config.playerFetch(playerFetchParams)
484
+ : defaultFetch(playerFetchParams);
485
+ }, retries, retryDelay, signal);
486
+ if (!playerRes.ok) {
487
+ throw new YoutubeTranscriptVideoUnavailableError(identifier);
488
+ }
489
+ const playerJson = (yield playerRes.json());
490
+ const tracklist = (_k = (_j = playerJson.captions) === null || _j === void 0 ? void 0 : _j.playerCaptionsTracklistRenderer) !== null && _k !== void 0 ? _k : playerJson.playerCaptionsTracklistRenderer;
491
+ const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
492
+ const isPlayableOk = ((_l = playerJson.playabilityStatus) === null || _l === void 0 ? void 0 : _l.status) === 'OK';
493
+ // If `captions` is entirely missing, treat as "not available"
494
+ if (!playerJson.captions || !tracklist) {
495
+ // If video is playable but captions aren't provided, treat as "disabled"
496
+ if (isPlayableOk) {
497
+ throw new YoutubeTranscriptDisabledError(identifier);
498
+ }
499
+ // Otherwise we can't assert they're disabled; treat as "not available"
500
+ throw new YoutubeTranscriptNotAvailableError(identifier);
501
+ }
502
+ // If `captions` exists but there are zero tracks, treat as "disabled"
503
+ if (!Array.isArray(tracks) || tracks.length === 0) {
504
+ throw new YoutubeTranscriptDisabledError(identifier);
505
+ }
506
+ return { tracks, playerJson };
507
+ });
508
+ }
509
+ /**
510
+ * Extract VideoDetails from the Innertube player response.
511
+ */
512
+ _extractVideoDetails(playerJson, identifier) {
513
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
514
+ const raw = playerJson.videoDetails;
515
+ return {
516
+ videoId: (_a = raw === null || raw === void 0 ? void 0 : raw.videoId) !== null && _a !== void 0 ? _a : identifier,
517
+ title: (_b = raw === null || raw === void 0 ? void 0 : raw.title) !== null && _b !== void 0 ? _b : '',
518
+ author: (_c = raw === null || raw === void 0 ? void 0 : raw.author) !== null && _c !== void 0 ? _c : '',
519
+ channelId: (_d = raw === null || raw === void 0 ? void 0 : raw.channelId) !== null && _d !== void 0 ? _d : '',
520
+ lengthSeconds: parseInt((_e = raw === null || raw === void 0 ? void 0 : raw.lengthSeconds) !== null && _e !== void 0 ? _e : '0', 10),
521
+ viewCount: parseInt((_f = raw === null || raw === void 0 ? void 0 : raw.viewCount) !== null && _f !== void 0 ? _f : '0', 10),
522
+ description: (_g = raw === null || raw === void 0 ? void 0 : raw.shortDescription) !== null && _g !== void 0 ? _g : '',
523
+ keywords: (_h = raw === null || raw === void 0 ? void 0 : raw.keywords) !== null && _h !== void 0 ? _h : [],
524
+ thumbnails: (_k = (_j = raw === null || raw === void 0 ? void 0 : raw.thumbnail) === null || _j === void 0 ? void 0 : _j.thumbnails) !== null && _k !== void 0 ? _k : [],
525
+ isLiveContent: (_l = raw === null || raw === void 0 ? void 0 : raw.isLiveContent) !== null && _l !== void 0 ? _l : false,
526
+ };
527
+ }
528
+ /**
529
+ * Fetch the transcript for a YouTube video.
530
+ *
531
+ * When `videoDetails` is set to `true` in the config, returns a {@link TranscriptResult}
532
+ * containing both video metadata and transcript segments. Otherwise returns an array of
533
+ * {@link TranscriptSegment} objects.
534
+ *
535
+ * **Note:** The instance method returns a union type because `videoDetails` is set at
536
+ * construction time. For automatic type narrowing, use the static method or the
537
+ * `fetchTranscript` convenience export instead.
538
+ *
539
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
540
+ * @returns An array of transcript segments, or a TranscriptResult if `videoDetails` is enabled.
541
+ * @throws {@link YoutubeTranscriptInvalidVideoIdError} if the video ID/URL is invalid.
542
+ * @throws {@link YoutubeTranscriptVideoUnavailableError} if the video is unavailable.
543
+ * @throws {@link YoutubeTranscriptDisabledError} if transcripts are disabled.
544
+ * @throws {@link YoutubeTranscriptNotAvailableError} if no transcript is available.
545
+ * @throws {@link YoutubeTranscriptNotAvailableLanguageError} if the requested language is unavailable.
546
+ * @throws {@link YoutubeTranscriptTooManyRequestError} if rate-limited by YouTube.
547
+ */
548
+ fetchTranscript(videoId) {
549
+ return __awaiter(this, void 0, void 0, function* () {
550
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
551
+ const identifier = retrieveVideoId(videoId);
552
+ const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
553
+ if (lang) {
554
+ validateLang(lang);
555
+ }
556
+ const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
557
+ const includeDetails = ((_d = this.config) === null || _d === void 0 ? void 0 : _d.videoDetails) === true;
558
+ // Cache lookup (if provided)
559
+ const cache = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cache;
560
+ const cacheTTL = (_f = this.config) === null || _f === void 0 ? void 0 : _f.cacheTTL;
561
+ const cacheKey = includeDetails
562
+ ? `yt:transcript+details:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`
563
+ : `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
564
+ if (cache) {
565
+ const cached = yield cache.get(cacheKey);
566
+ if (cached) {
567
+ try {
568
+ return JSON.parse(cached);
569
+ }
570
+ catch (_p) {
571
+ // ignore parse errors and continue
572
+ }
573
+ }
574
+ }
575
+ const { tracks, playerJson } = yield this._fetchCaptionTracks(identifier, lang);
576
+ // Respect requested language or fallback to first track
577
+ const selectedTrack = lang
578
+ ? tracks.find((t) => t.languageCode === lang)
579
+ : tracks[0];
580
+ if (!selectedTrack) {
581
+ const available = tracks.map((t) => t.languageCode).filter(Boolean);
582
+ throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
583
+ }
584
+ // Build transcript URL; prefer XML by stripping fmt if present
585
+ const transcriptBaseURL = (_g = selectedTrack.baseUrl) !== null && _g !== void 0 ? _g : selectedTrack.url;
586
+ if (!transcriptBaseURL) {
587
+ throw new YoutubeTranscriptNotAvailableError(identifier);
588
+ }
589
+ let transcriptURL = transcriptBaseURL;
590
+ transcriptURL = transcriptURL.replace(/&fmt=[^&]+/, '');
591
+ if ((_h = this.config) === null || _h === void 0 ? void 0 : _h.disableHttps) {
592
+ transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
593
+ }
594
+ // Fetch transcript XML using the same hook surface as before
595
+ const retries = (_k = (_j = this.config) === null || _j === void 0 ? void 0 : _j.retries) !== null && _k !== void 0 ? _k : 0;
596
+ const retryDelay = (_m = (_l = this.config) === null || _l === void 0 ? void 0 : _l.retryDelay) !== null && _m !== void 0 ? _m : 1000;
597
+ const signal = (_o = this.config) === null || _o === void 0 ? void 0 : _o.signal;
598
+ const transcriptFetchParams = { url: transcriptURL, lang, userAgent, signal };
599
+ const transcriptResponse = yield fetchWithRetry(() => {
600
+ var _a;
601
+ return ((_a = this.config) === null || _a === void 0 ? void 0 : _a.transcriptFetch)
602
+ ? this.config.transcriptFetch(transcriptFetchParams)
603
+ : defaultFetch(transcriptFetchParams);
604
+ }, retries, retryDelay, signal);
605
+ if (!transcriptResponse.ok) {
606
+ // Preserve legacy behavior
607
+ if (transcriptResponse.status === 429) {
608
+ throw new YoutubeTranscriptTooManyRequestError();
609
+ }
610
+ throw new YoutubeTranscriptNotAvailableError(identifier);
611
+ }
612
+ const transcriptBody = yield transcriptResponse.text();
613
+ // Parse XML into TranscriptSegment objects
614
+ const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
615
+ const segments = results.map((m) => ({
616
+ text: decodeXmlEntities(m[3]),
617
+ duration: parseFloat(m[2]),
618
+ offset: parseFloat(m[1]),
619
+ lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
620
+ }));
621
+ if (segments.length === 0) {
622
+ throw new YoutubeTranscriptNotAvailableError(identifier);
623
+ }
624
+ // Build the result based on whether videoDetails was requested
625
+ const result = includeDetails
626
+ ? { videoDetails: this._extractVideoDetails(playerJson, identifier), segments }
627
+ : segments;
628
+ // Cache store
629
+ if (cache) {
630
+ try {
631
+ yield cache.set(cacheKey, JSON.stringify(result), cacheTTL);
632
+ }
633
+ catch (_q) {
634
+ // non-fatal
635
+ }
636
+ }
637
+ return result;
638
+ });
639
+ }
640
+ /**
641
+ * List available caption languages for a YouTube video.
642
+ *
643
+ * Queries the Innertube player API to discover what caption tracks exist,
644
+ * without downloading any transcript data.
645
+ *
646
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
647
+ * @returns An array of available caption track info objects.
648
+ * @throws {@link YoutubeTranscriptInvalidVideoIdError} if the video ID/URL is invalid.
649
+ * @throws {@link YoutubeTranscriptVideoUnavailableError} if the video is unavailable.
650
+ * @throws {@link YoutubeTranscriptDisabledError} if transcripts are disabled.
651
+ * @throws {@link YoutubeTranscriptNotAvailableError} if no captions are available.
652
+ * @throws {@link YoutubeTranscriptTooManyRequestError} if rate-limited by YouTube.
653
+ *
654
+ * @example
655
+ * ```typescript
656
+ * const yt = new YoutubeTranscript();
657
+ * const languages = await yt.listLanguages('dQw4w9WgXcQ');
658
+ * // [
659
+ * // { languageCode: 'en', languageName: 'English', isAutoGenerated: false },
660
+ * // { languageCode: 'es', languageName: 'Spanish (auto-generated)', isAutoGenerated: true },
661
+ * // ]
662
+ * ```
663
+ */
664
+ listLanguages(videoId) {
665
+ return __awaiter(this, void 0, void 0, function* () {
666
+ const identifier = retrieveVideoId(videoId);
667
+ const { tracks } = yield this._fetchCaptionTracks(identifier);
668
+ return tracks.map((track) => {
669
+ var _a, _b;
670
+ return ({
671
+ languageCode: track.languageCode,
672
+ languageName: (_b = (_a = track.name) === null || _a === void 0 ? void 0 : _a.simpleText) !== null && _b !== void 0 ? _b : track.languageCode,
673
+ isAutoGenerated: track.kind === 'asr',
674
+ });
675
+ });
676
+ });
677
+ }
678
+ static fetchTranscript(videoId, config) {
679
+ return __awaiter(this, void 0, void 0, function* () {
680
+ const instance = new YoutubeTranscript(config);
681
+ return instance.fetchTranscript(videoId);
682
+ });
683
+ }
684
+ /**
685
+ * Static convenience method to list available caption languages without creating an instance.
686
+ *
687
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
688
+ * @param config - Optional configuration options.
689
+ * @returns An array of available caption track info objects.
690
+ */
691
+ static listLanguages(videoId, config) {
692
+ return __awaiter(this, void 0, void 0, function* () {
693
+ const instance = new YoutubeTranscript(config);
694
+ return instance.listLanguages(videoId);
695
+ });
696
+ }
697
+ }
698
+ function fetchTranscript(videoId, config) {
699
+ return YoutubeTranscript.fetchTranscript(videoId, config);
700
+ }
701
+ /**
702
+ * Convenience function to list available caption languages for a YouTube video.
703
+ *
704
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
705
+ * @param config - Optional configuration options.
706
+ * @returns An array of available caption track info objects.
707
+ *
708
+ * @example
709
+ * ```typescript
710
+ * import { listLanguages } from 'youtube-transcript-plus';
711
+ * const languages = await listLanguages('dQw4w9WgXcQ');
712
+ * ```
713
+ */
714
+ const listLanguages = YoutubeTranscript.listLanguages;
715
+
716
+ export { FsCache, InMemoryCache, YoutubeTranscript, YoutubeTranscriptDisabledError, YoutubeTranscriptInvalidLangError, YoutubeTranscriptInvalidVideoIdError, YoutubeTranscriptNotAvailableError, YoutubeTranscriptNotAvailableLanguageError, YoutubeTranscriptTooManyRequestError, YoutubeTranscriptVideoUnavailableError, fetchTranscript, listLanguages, toPlainText, toSRT, toVTT };