youtube-transcript-plus 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,732 @@
1
+ 'use strict';
2
+
3
+ var fs = require('node:fs/promises');
4
+ var path = require('node:path');
5
+
6
+ /******************************************************************************
7
+ Copyright (c) Microsoft Corporation.
8
+
9
+ Permission to use, copy, modify, and/or distribute this software for any
10
+ purpose with or without fee is hereby granted.
11
+
12
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
13
+ REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
14
+ AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
15
+ INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
16
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
17
+ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
18
+ PERFORMANCE OF THIS SOFTWARE.
19
+ ***************************************************************************** */
20
+ /* global Reflect, Promise, SuppressedError, Symbol, Iterator */
21
+
22
+
23
+ function __awaiter(thisArg, _arguments, P, generator) {
24
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
25
+ return new (P || (P = Promise))(function (resolve, reject) {
26
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
27
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
28
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
29
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
30
+ });
31
+ }
32
+
33
+ typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) {
34
+ var e = new Error(message);
35
+ return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
36
+ };
37
+
38
+ const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
39
+ const RE_YOUTUBE = /(?:v=|\/|v\/|embed\/|watch\?.*v=|youtu\.be\/|\/v\/|e\/|watch\?.*vi?=|\/embed\/|\/v\/|vi?\/|watch\?.*vi?=|youtu\.be\/|\/vi?\/|\/e\/)([a-zA-Z0-9_-]{11})/i;
40
+ const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g;
41
+ const DEFAULT_CACHE_TTL = 3600000; // 1 hour in milliseconds
42
+
43
+ /** Thrown when YouTube is rate-limiting requests from your IP address. */
44
+ class YoutubeTranscriptTooManyRequestError extends Error {
45
+ constructor() {
46
+ super('YouTube is receiving too many requests from your IP address. Please try again later or use a proxy. If the issue persists, consider reducing the frequency of requests.');
47
+ this.name = 'YoutubeTranscriptTooManyRequestError';
48
+ }
49
+ }
50
+ /** Thrown when the requested video is unavailable or has been removed. */
51
+ class YoutubeTranscriptVideoUnavailableError extends Error {
52
+ constructor(videoId) {
53
+ super(`The video with ID "${videoId}" is no longer available or has been removed. Please check the video URL or ID and try again.`);
54
+ this.name = 'YoutubeTranscriptVideoUnavailableError';
55
+ this.videoId = videoId;
56
+ }
57
+ }
58
+ /** Thrown when transcripts are disabled for the video by its owner. */
59
+ class YoutubeTranscriptDisabledError extends Error {
60
+ constructor(videoId) {
61
+ super(`Transcripts are disabled for the video with ID "${videoId}". This may be due to the video owner disabling captions or the video not supporting transcripts.`);
62
+ this.name = 'YoutubeTranscriptDisabledError';
63
+ this.videoId = videoId;
64
+ }
65
+ }
66
+ /** Thrown when no transcripts are available for the video. */
67
+ class YoutubeTranscriptNotAvailableError extends Error {
68
+ constructor(videoId) {
69
+ super(`No transcripts are available for the video with ID "${videoId}". This may be because the video does not have captions or the captions are not accessible.`);
70
+ this.name = 'YoutubeTranscriptNotAvailableError';
71
+ this.videoId = videoId;
72
+ }
73
+ }
74
+ /** Thrown when the transcript is not available in the requested language. */
75
+ class YoutubeTranscriptNotAvailableLanguageError extends Error {
76
+ constructor(lang, availableLangs, videoId) {
77
+ super(`No transcripts are available in "${lang}" for the video with ID "${videoId}". Available languages: ${availableLangs.join(', ')}. Please try a different language.`);
78
+ this.name = 'YoutubeTranscriptNotAvailableLanguageError';
79
+ this.videoId = videoId;
80
+ this.lang = lang;
81
+ this.availableLangs = availableLangs;
82
+ }
83
+ }
84
+ /** Thrown when the provided `lang` option is not a valid BCP 47 language code. */
85
+ class YoutubeTranscriptInvalidLangError extends Error {
86
+ constructor(lang) {
87
+ super(`Invalid language code "${lang}". Please provide a valid BCP 47 language code (e.g., "en", "fr", "pt-BR").`);
88
+ this.name = 'YoutubeTranscriptInvalidLangError';
89
+ this.lang = lang;
90
+ }
91
+ }
92
+ /** Thrown when the provided video ID or URL is invalid. */
93
+ class YoutubeTranscriptInvalidVideoIdError extends Error {
94
+ constructor() {
95
+ super('Invalid YouTube video ID or URL. Please provide a valid video ID or URL. Example: "dQw4w9WgXcQ" or "https://www.youtube.com/watch?v=dQw4w9WgXcQ".');
96
+ this.name = 'YoutubeTranscriptInvalidVideoIdError';
97
+ }
98
+ }
99
+
100
+ const RE_VIDEO_ID = /^[a-zA-Z0-9_-]{11}$/;
101
+ const RE_BCP47_LANG = /^[a-zA-Z]{2,3}(-[a-zA-Z0-9]{2,8})*$/;
102
+ const XML_ENTITIES = {
103
+ '&amp;': '&',
104
+ '&lt;': '<',
105
+ '&gt;': '>',
106
+ '&quot;': '"',
107
+ '&#39;': "'",
108
+ '&apos;': "'",
109
+ };
110
+ const RE_XML_ENTITY = /&(?:amp|lt|gt|quot|apos|#39);/g;
111
+ function decodeXmlEntities(text) {
112
+ return text.replace(RE_XML_ENTITY, (match) => { var _a; return (_a = XML_ENTITIES[match]) !== null && _a !== void 0 ? _a : match; });
113
+ }
114
+ function retrieveVideoId(videoId) {
115
+ if (RE_VIDEO_ID.test(videoId)) {
116
+ return videoId;
117
+ }
118
+ const matchId = videoId.match(RE_YOUTUBE);
119
+ if (matchId && matchId.length) {
120
+ return matchId[1];
121
+ }
122
+ throw new YoutubeTranscriptInvalidVideoIdError();
123
+ }
124
+ /**
125
+ * Validate that a language code matches a BCP 47-like pattern.
126
+ * @throws {@link YoutubeTranscriptInvalidLangError} if the language code is invalid.
127
+ */
128
+ function validateLang(lang) {
129
+ if (!RE_BCP47_LANG.test(lang)) {
130
+ throw new YoutubeTranscriptInvalidLangError(lang);
131
+ }
132
+ }
133
+ function defaultFetch(params) {
134
+ return __awaiter(this, void 0, void 0, function* () {
135
+ const { url, lang, userAgent, method = 'GET', body, headers = {}, signal } = params;
136
+ const fetchHeaders = Object.assign(Object.assign({ 'User-Agent': userAgent || DEFAULT_USER_AGENT }, (lang && { 'Accept-Language': lang })), headers);
137
+ const fetchOptions = {
138
+ method,
139
+ headers: fetchHeaders,
140
+ signal,
141
+ };
142
+ if (body && method === 'POST') {
143
+ fetchOptions.body = body;
144
+ }
145
+ return fetch(url, fetchOptions);
146
+ });
147
+ }
148
+ /** Returns true if the HTTP status code is retryable (429 or 5xx). */
149
+ function isRetryableStatus(status) {
150
+ return status === 429 || (status >= 500 && status <= 599);
151
+ }
152
+ /**
153
+ * Wait for the given number of milliseconds, aborting early if the signal fires.
154
+ */
155
+ function sleep(ms, signal) {
156
+ return new Promise((resolve, reject) => {
157
+ signal === null || signal === void 0 ? void 0 : signal.throwIfAborted();
158
+ const timer = setTimeout(resolve, ms);
159
+ if (signal) {
160
+ const onAbort = () => {
161
+ clearTimeout(timer);
162
+ reject(signal.reason);
163
+ };
164
+ signal.addEventListener('abort', onAbort, { once: true });
165
+ }
166
+ });
167
+ }
168
+ /**
169
+ * Wrap a fetch call with retry logic using exponential backoff.
170
+ *
171
+ * Retries on 429 (Too Many Requests) and 5xx (Server Errors).
172
+ * Client errors (4xx other than 429) are returned immediately.
173
+ *
174
+ * @param fetchFn - Function that performs the fetch call.
175
+ * @param retries - Maximum number of retry attempts (0 = no retries).
176
+ * @param retryDelay - Base delay in milliseconds for exponential backoff.
177
+ * @param signal - Optional AbortSignal to cancel the operation.
178
+ * @returns The fetch Response.
179
+ */
180
+ function fetchWithRetry(fetchFn, retries, retryDelay, signal) {
181
+ return __awaiter(this, void 0, void 0, function* () {
182
+ for (let attempt = 0; attempt <= retries; attempt++) {
183
+ signal === null || signal === void 0 ? void 0 : signal.throwIfAborted();
184
+ const response = yield fetchFn();
185
+ if (!isRetryableStatus(response.status) || attempt === retries) {
186
+ return response;
187
+ }
188
+ // Wait with exponential backoff: delay * 2^attempt
189
+ const delay = retryDelay * Math.pow(2, attempt);
190
+ yield sleep(delay, signal);
191
+ }
192
+ // Unreachable — the loop always returns — but TypeScript requires it
193
+ throw new Error('Unexpected: retry loop exited without returning');
194
+ });
195
+ }
196
+
197
+ function sanitizeKey(key) {
198
+ return key.replace(/[^a-zA-Z0-9_-]/g, '_');
199
+ }
200
+ /**
201
+ * File-system-based cache implementation.
202
+ *
203
+ * Each entry is stored as a JSON file in the specified directory.
204
+ * Expired entries are automatically deleted when accessed.
205
+ *
206
+ * @example
207
+ * ```typescript
208
+ * import { fetchTranscript, FsCache } from 'youtube-transcript-plus';
209
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ', {
210
+ * cache: new FsCache('./my-cache-dir', 86400000), // 1 day TTL
211
+ * });
212
+ * ```
213
+ */
214
+ class FsCache {
215
+ /**
216
+ * @param cacheDir - Directory to store cache files. Created automatically if it doesn't exist.
217
+ * @param defaultTTL - Default time-to-live in milliseconds. Defaults to 1 hour.
218
+ */
219
+ constructor(cacheDir = './cache', defaultTTL = DEFAULT_CACHE_TTL) {
220
+ this.cacheDir = cacheDir;
221
+ this.defaultTTL = defaultTTL;
222
+ this.ready = fs.mkdir(cacheDir, { recursive: true }).then(() => { });
223
+ }
224
+ get(key) {
225
+ return __awaiter(this, void 0, void 0, function* () {
226
+ yield this.ready;
227
+ const filePath = path.join(this.cacheDir, sanitizeKey(key));
228
+ try {
229
+ const data = yield fs.readFile(filePath, 'utf-8');
230
+ const { value, expires } = JSON.parse(data);
231
+ if (expires > Date.now()) {
232
+ return value;
233
+ }
234
+ yield fs.unlink(filePath);
235
+ }
236
+ catch (_error) { }
237
+ return null;
238
+ });
239
+ }
240
+ set(key, value, ttl) {
241
+ return __awaiter(this, void 0, void 0, function* () {
242
+ yield this.ready;
243
+ const filePath = path.join(this.cacheDir, sanitizeKey(key));
244
+ const expires = Date.now() + (ttl !== null && ttl !== void 0 ? ttl : this.defaultTTL);
245
+ yield fs.writeFile(filePath, JSON.stringify({ value, expires }), 'utf-8');
246
+ });
247
+ }
248
+ }
249
+
250
+ /**
251
+ * In-memory cache implementation using a `Map`.
252
+ *
253
+ * Entries are automatically cleaned up when accessed after expiration.
254
+ *
255
+ * @example
256
+ * ```typescript
257
+ * import { fetchTranscript, InMemoryCache } from 'youtube-transcript-plus';
258
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ', {
259
+ * cache: new InMemoryCache(1800000), // 30 minutes TTL
260
+ * });
261
+ * ```
262
+ */
263
+ class InMemoryCache {
264
+ /** @param defaultTTL - Default time-to-live in milliseconds. Defaults to 1 hour. */
265
+ constructor(defaultTTL = DEFAULT_CACHE_TTL) {
266
+ this.cache = new Map();
267
+ this.defaultTTL = defaultTTL;
268
+ }
269
+ get(key) {
270
+ return __awaiter(this, void 0, void 0, function* () {
271
+ const entry = this.cache.get(key);
272
+ if (entry && entry.expires > Date.now()) {
273
+ return entry.value;
274
+ }
275
+ this.cache.delete(key); // Clean up expired entries
276
+ return null;
277
+ });
278
+ }
279
+ set(key, value, ttl) {
280
+ return __awaiter(this, void 0, void 0, function* () {
281
+ const expires = Date.now() + (ttl !== null && ttl !== void 0 ? ttl : this.defaultTTL);
282
+ this.cache.set(key, { value, expires });
283
+ });
284
+ }
285
+ }
286
+
287
+ /**
288
+ * Format seconds as an SRT timestamp: `HH:MM:SS,mmm`
289
+ * SRT uses comma as the decimal separator per specification.
290
+ */
291
+ function formatSrtTimestamp(seconds) {
292
+ const h = Math.floor(seconds / 3600);
293
+ const m = Math.floor((seconds % 3600) / 60);
294
+ const s = Math.floor(seconds % 60);
295
+ const ms = Math.round((seconds % 1) * 1000);
296
+ return (String(h).padStart(2, '0') +
297
+ ':' +
298
+ String(m).padStart(2, '0') +
299
+ ':' +
300
+ String(s).padStart(2, '0') +
301
+ ',' +
302
+ String(ms).padStart(3, '0'));
303
+ }
304
+ /**
305
+ * Format seconds as a VTT timestamp: `HH:MM:SS.mmm`
306
+ * VTT uses period as the decimal separator per specification.
307
+ */
308
+ function formatVttTimestamp(seconds) {
309
+ const h = Math.floor(seconds / 3600);
310
+ const m = Math.floor((seconds % 3600) / 60);
311
+ const s = Math.floor(seconds % 60);
312
+ const ms = Math.round((seconds % 1) * 1000);
313
+ return (String(h).padStart(2, '0') +
314
+ ':' +
315
+ String(m).padStart(2, '0') +
316
+ ':' +
317
+ String(s).padStart(2, '0') +
318
+ '.' +
319
+ String(ms).padStart(3, '0'));
320
+ }
321
+ /**
322
+ * Convert transcript segments to SubRip (SRT) format.
323
+ *
324
+ * @param segments - Array of transcript segments from {@link fetchTranscript}.
325
+ * @returns A string in SRT format with sequence numbers and `HH:MM:SS,mmm` timestamps.
326
+ *
327
+ * @example
328
+ * ```typescript
329
+ * import { fetchTranscript, toSRT } from 'youtube-transcript-plus';
330
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
331
+ * const srt = toSRT(transcript);
332
+ *
333
+ * // With videoDetails enabled, use result.segments:
334
+ * const result = await fetchTranscript('dQw4w9WgXcQ', { videoDetails: true });
335
+ * const srt2 = toSRT(result.segments);
336
+ * ```
337
+ */
338
+ function toSRT(segments) {
339
+ return segments
340
+ .map((segment, index) => {
341
+ const start = formatSrtTimestamp(segment.offset);
342
+ const end = formatSrtTimestamp(segment.offset + segment.duration);
343
+ return `${index + 1}\n${start} --> ${end}\n${segment.text}`;
344
+ })
345
+ .join('\n\n');
346
+ }
347
+ /**
348
+ * Convert transcript segments to WebVTT (VTT) format.
349
+ *
350
+ * @param segments - Array of transcript segments from {@link fetchTranscript}.
351
+ * @returns A string in VTT format with `WEBVTT` header and `HH:MM:SS.mmm` timestamps.
352
+ *
353
+ * @example
354
+ * ```typescript
355
+ * import { fetchTranscript, toVTT } from 'youtube-transcript-plus';
356
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
357
+ * const vtt = toVTT(transcript);
358
+ *
359
+ * // With videoDetails enabled, use result.segments:
360
+ * const result = await fetchTranscript('dQw4w9WgXcQ', { videoDetails: true });
361
+ * const vtt2 = toVTT(result.segments);
362
+ * ```
363
+ */
364
+ function toVTT(segments) {
365
+ const cues = segments
366
+ .map((segment) => {
367
+ const start = formatVttTimestamp(segment.offset);
368
+ const end = formatVttTimestamp(segment.offset + segment.duration);
369
+ return `${start} --> ${end}\n${segment.text}`;
370
+ })
371
+ .join('\n\n');
372
+ return `WEBVTT\n\n${cues}`;
373
+ }
374
+ /**
375
+ * Convert transcript segments to plain text.
376
+ *
377
+ * @param segments - Array of transcript segments from {@link fetchTranscript}.
378
+ * @param separator - String to join segments with. Defaults to `'\n'`.
379
+ * @returns A plain text string with segments joined by the separator.
380
+ *
381
+ * @example
382
+ * ```typescript
383
+ * import { fetchTranscript, toPlainText } from 'youtube-transcript-plus';
384
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
385
+ * const text = toPlainText(transcript);
386
+ * const paragraph = toPlainText(transcript, ' ');
387
+ *
388
+ * // With videoDetails enabled, use result.segments:
389
+ * const result = await fetchTranscript('dQw4w9WgXcQ', { videoDetails: true });
390
+ * const text2 = toPlainText(result.segments);
391
+ * ```
392
+ */
393
+ function toPlainText(segments, separator = '\n') {
394
+ return segments.map((segment) => segment.text).join(separator);
395
+ }
396
+
397
+ /**
398
+ * Fetches YouTube video transcripts and caption metadata using the Innertube API.
399
+ *
400
+ * Can be used as an instance (with shared config) or via static/convenience methods.
401
+ *
402
+ * @example
403
+ * ```typescript
404
+ * // Instance usage with shared config
405
+ * const yt = new YoutubeTranscript({ lang: 'en' });
406
+ * const transcript = await yt.fetchTranscript('dQw4w9WgXcQ');
407
+ * const languages = await yt.listLanguages('dQw4w9WgXcQ');
408
+ *
409
+ * // Static method
410
+ * const transcript = await YoutubeTranscript.fetchTranscript('dQw4w9WgXcQ', { lang: 'en' });
411
+ *
412
+ * // Opt-in to video details
413
+ * const { videoDetails, segments } = await YoutubeTranscript.fetchTranscript('dQw4w9WgXcQ', {
414
+ * videoDetails: true,
415
+ * });
416
+ *
417
+ * // Convenience export
418
+ * const transcript = await fetchTranscript('dQw4w9WgXcQ');
419
+ * const languages = await listLanguages('dQw4w9WgXcQ');
420
+ * ```
421
+ */
422
+ class YoutubeTranscript {
423
+ constructor(config) {
424
+ this.config = config;
425
+ }
426
+ /**
427
+ * Fetch caption tracks and the player response from the Innertube player API.
428
+ * Shared logic used by both fetchTranscript and listLanguages.
429
+ */
430
+ _fetchCaptionTracks(identifier, lang) {
431
+ return __awaiter(this, void 0, void 0, function* () {
432
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
433
+ const userAgent = (_b = (_a = this.config) === null || _a === void 0 ? void 0 : _a.userAgent) !== null && _b !== void 0 ? _b : DEFAULT_USER_AGENT;
434
+ const protocol = ((_c = this.config) === null || _c === void 0 ? void 0 : _c.disableHttps) ? 'http' : 'https';
435
+ const retries = (_e = (_d = this.config) === null || _d === void 0 ? void 0 : _d.retries) !== null && _e !== void 0 ? _e : 0;
436
+ const retryDelay = (_g = (_f = this.config) === null || _f === void 0 ? void 0 : _f.retryDelay) !== null && _g !== void 0 ? _g : 1000;
437
+ const signal = (_h = this.config) === null || _h === void 0 ? void 0 : _h.signal;
438
+ // 1) Fetch the watch page to extract an Innertube API key
439
+ const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
440
+ const watchFetchParams = { url: watchUrl, lang, userAgent, signal };
441
+ const videoPageResponse = yield fetchWithRetry(() => {
442
+ var _a;
443
+ return ((_a = this.config) === null || _a === void 0 ? void 0 : _a.videoFetch)
444
+ ? this.config.videoFetch(watchFetchParams)
445
+ : defaultFetch(watchFetchParams);
446
+ }, retries, retryDelay, signal);
447
+ if (!videoPageResponse.ok) {
448
+ throw new YoutubeTranscriptVideoUnavailableError(identifier);
449
+ }
450
+ const videoPageBody = yield videoPageResponse.text();
451
+ // Basic bot/recaptcha detection preserves old error behavior
452
+ if (videoPageBody.includes('class="g-recaptcha"')) {
453
+ throw new YoutubeTranscriptTooManyRequestError();
454
+ }
455
+ // 2) Extract Innertube API key from the page
456
+ const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
457
+ videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
458
+ if (!apiKeyMatch) {
459
+ throw new YoutubeTranscriptNotAvailableError(identifier);
460
+ }
461
+ const apiKey = apiKeyMatch[1];
462
+ // 3) Call Innertube player as ANDROID client to retrieve captionTracks
463
+ const playerEndpoint = `${protocol}://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
464
+ const playerBody = {
465
+ context: {
466
+ client: {
467
+ clientName: 'ANDROID',
468
+ clientVersion: '20.10.38',
469
+ },
470
+ },
471
+ videoId: identifier,
472
+ };
473
+ const playerFetchParams = {
474
+ url: playerEndpoint,
475
+ method: 'POST',
476
+ lang,
477
+ userAgent,
478
+ headers: { 'Content-Type': 'application/json' },
479
+ body: JSON.stringify(playerBody),
480
+ signal,
481
+ };
482
+ const playerRes = yield fetchWithRetry(() => {
483
+ var _a;
484
+ return ((_a = this.config) === null || _a === void 0 ? void 0 : _a.playerFetch)
485
+ ? this.config.playerFetch(playerFetchParams)
486
+ : defaultFetch(playerFetchParams);
487
+ }, retries, retryDelay, signal);
488
+ if (!playerRes.ok) {
489
+ throw new YoutubeTranscriptVideoUnavailableError(identifier);
490
+ }
491
+ const playerJson = (yield playerRes.json());
492
+ const tracklist = (_k = (_j = playerJson.captions) === null || _j === void 0 ? void 0 : _j.playerCaptionsTracklistRenderer) !== null && _k !== void 0 ? _k : playerJson.playerCaptionsTracklistRenderer;
493
+ const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
494
+ const isPlayableOk = ((_l = playerJson.playabilityStatus) === null || _l === void 0 ? void 0 : _l.status) === 'OK';
495
+ // If `captions` is entirely missing, treat as "not available"
496
+ if (!playerJson.captions || !tracklist) {
497
+ // If video is playable but captions aren't provided, treat as "disabled"
498
+ if (isPlayableOk) {
499
+ throw new YoutubeTranscriptDisabledError(identifier);
500
+ }
501
+ // Otherwise we can't assert they're disabled; treat as "not available"
502
+ throw new YoutubeTranscriptNotAvailableError(identifier);
503
+ }
504
+ // If `captions` exists but there are zero tracks, treat as "disabled"
505
+ if (!Array.isArray(tracks) || tracks.length === 0) {
506
+ throw new YoutubeTranscriptDisabledError(identifier);
507
+ }
508
+ return { tracks, playerJson };
509
+ });
510
+ }
511
+ /**
512
+ * Extract VideoDetails from the Innertube player response.
513
+ */
514
+ _extractVideoDetails(playerJson, identifier) {
515
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
516
+ const raw = playerJson.videoDetails;
517
+ return {
518
+ videoId: (_a = raw === null || raw === void 0 ? void 0 : raw.videoId) !== null && _a !== void 0 ? _a : identifier,
519
+ title: (_b = raw === null || raw === void 0 ? void 0 : raw.title) !== null && _b !== void 0 ? _b : '',
520
+ author: (_c = raw === null || raw === void 0 ? void 0 : raw.author) !== null && _c !== void 0 ? _c : '',
521
+ channelId: (_d = raw === null || raw === void 0 ? void 0 : raw.channelId) !== null && _d !== void 0 ? _d : '',
522
+ lengthSeconds: parseInt((_e = raw === null || raw === void 0 ? void 0 : raw.lengthSeconds) !== null && _e !== void 0 ? _e : '0', 10),
523
+ viewCount: parseInt((_f = raw === null || raw === void 0 ? void 0 : raw.viewCount) !== null && _f !== void 0 ? _f : '0', 10),
524
+ description: (_g = raw === null || raw === void 0 ? void 0 : raw.shortDescription) !== null && _g !== void 0 ? _g : '',
525
+ keywords: (_h = raw === null || raw === void 0 ? void 0 : raw.keywords) !== null && _h !== void 0 ? _h : [],
526
+ thumbnails: (_k = (_j = raw === null || raw === void 0 ? void 0 : raw.thumbnail) === null || _j === void 0 ? void 0 : _j.thumbnails) !== null && _k !== void 0 ? _k : [],
527
+ isLiveContent: (_l = raw === null || raw === void 0 ? void 0 : raw.isLiveContent) !== null && _l !== void 0 ? _l : false,
528
+ };
529
+ }
530
+ /**
531
+ * Fetch the transcript for a YouTube video.
532
+ *
533
+ * When `videoDetails` is set to `true` in the config, returns a {@link TranscriptResult}
534
+ * containing both video metadata and transcript segments. Otherwise returns an array of
535
+ * {@link TranscriptSegment} objects.
536
+ *
537
+ * **Note:** The instance method returns a union type because `videoDetails` is set at
538
+ * construction time. For automatic type narrowing, use the static method or the
539
+ * `fetchTranscript` convenience export instead.
540
+ *
541
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
542
+ * @returns An array of transcript segments, or a TranscriptResult if `videoDetails` is enabled.
543
+ * @throws {@link YoutubeTranscriptInvalidVideoIdError} if the video ID/URL is invalid.
544
+ * @throws {@link YoutubeTranscriptVideoUnavailableError} if the video is unavailable.
545
+ * @throws {@link YoutubeTranscriptDisabledError} if transcripts are disabled.
546
+ * @throws {@link YoutubeTranscriptNotAvailableError} if no transcript is available.
547
+ * @throws {@link YoutubeTranscriptNotAvailableLanguageError} if the requested language is unavailable.
548
+ * @throws {@link YoutubeTranscriptTooManyRequestError} if rate-limited by YouTube.
549
+ */
550
+ fetchTranscript(videoId) {
551
+ return __awaiter(this, void 0, void 0, function* () {
552
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
553
+ const identifier = retrieveVideoId(videoId);
554
+ const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
555
+ if (lang) {
556
+ validateLang(lang);
557
+ }
558
+ const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
559
+ const includeDetails = ((_d = this.config) === null || _d === void 0 ? void 0 : _d.videoDetails) === true;
560
+ // Cache lookup (if provided)
561
+ const cache = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cache;
562
+ const cacheTTL = (_f = this.config) === null || _f === void 0 ? void 0 : _f.cacheTTL;
563
+ const cacheKey = includeDetails
564
+ ? `yt:transcript+details:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`
565
+ : `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
566
+ if (cache) {
567
+ const cached = yield cache.get(cacheKey);
568
+ if (cached) {
569
+ try {
570
+ return JSON.parse(cached);
571
+ }
572
+ catch (_p) {
573
+ // ignore parse errors and continue
574
+ }
575
+ }
576
+ }
577
+ const { tracks, playerJson } = yield this._fetchCaptionTracks(identifier, lang);
578
+ // Respect requested language or fallback to first track
579
+ const selectedTrack = lang
580
+ ? tracks.find((t) => t.languageCode === lang)
581
+ : tracks[0];
582
+ if (!selectedTrack) {
583
+ const available = tracks.map((t) => t.languageCode).filter(Boolean);
584
+ throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
585
+ }
586
+ // Build transcript URL; prefer XML by stripping fmt if present
587
+ const transcriptBaseURL = (_g = selectedTrack.baseUrl) !== null && _g !== void 0 ? _g : selectedTrack.url;
588
+ if (!transcriptBaseURL) {
589
+ throw new YoutubeTranscriptNotAvailableError(identifier);
590
+ }
591
+ let transcriptURL = transcriptBaseURL;
592
+ transcriptURL = transcriptURL.replace(/&fmt=[^&]+/, '');
593
+ if ((_h = this.config) === null || _h === void 0 ? void 0 : _h.disableHttps) {
594
+ transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
595
+ }
596
+ // Fetch transcript XML using the same hook surface as before
597
+ const retries = (_k = (_j = this.config) === null || _j === void 0 ? void 0 : _j.retries) !== null && _k !== void 0 ? _k : 0;
598
+ const retryDelay = (_m = (_l = this.config) === null || _l === void 0 ? void 0 : _l.retryDelay) !== null && _m !== void 0 ? _m : 1000;
599
+ const signal = (_o = this.config) === null || _o === void 0 ? void 0 : _o.signal;
600
+ const transcriptFetchParams = { url: transcriptURL, lang, userAgent, signal };
601
+ const transcriptResponse = yield fetchWithRetry(() => {
602
+ var _a;
603
+ return ((_a = this.config) === null || _a === void 0 ? void 0 : _a.transcriptFetch)
604
+ ? this.config.transcriptFetch(transcriptFetchParams)
605
+ : defaultFetch(transcriptFetchParams);
606
+ }, retries, retryDelay, signal);
607
+ if (!transcriptResponse.ok) {
608
+ // Preserve legacy behavior
609
+ if (transcriptResponse.status === 429) {
610
+ throw new YoutubeTranscriptTooManyRequestError();
611
+ }
612
+ throw new YoutubeTranscriptNotAvailableError(identifier);
613
+ }
614
+ const transcriptBody = yield transcriptResponse.text();
615
+ // Parse XML into TranscriptSegment objects
616
+ const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
617
+ const segments = results.map((m) => ({
618
+ text: decodeXmlEntities(m[3]),
619
+ duration: parseFloat(m[2]),
620
+ offset: parseFloat(m[1]),
621
+ lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
622
+ }));
623
+ if (segments.length === 0) {
624
+ throw new YoutubeTranscriptNotAvailableError(identifier);
625
+ }
626
+ // Build the result based on whether videoDetails was requested
627
+ const result = includeDetails
628
+ ? { videoDetails: this._extractVideoDetails(playerJson, identifier), segments }
629
+ : segments;
630
+ // Cache store
631
+ if (cache) {
632
+ try {
633
+ yield cache.set(cacheKey, JSON.stringify(result), cacheTTL);
634
+ }
635
+ catch (_q) {
636
+ // non-fatal
637
+ }
638
+ }
639
+ return result;
640
+ });
641
+ }
642
+ /**
643
+ * List available caption languages for a YouTube video.
644
+ *
645
+ * Queries the Innertube player API to discover what caption tracks exist,
646
+ * without downloading any transcript data.
647
+ *
648
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
649
+ * @returns An array of available caption track info objects.
650
+ * @throws {@link YoutubeTranscriptInvalidVideoIdError} if the video ID/URL is invalid.
651
+ * @throws {@link YoutubeTranscriptVideoUnavailableError} if the video is unavailable.
652
+ * @throws {@link YoutubeTranscriptDisabledError} if transcripts are disabled.
653
+ * @throws {@link YoutubeTranscriptNotAvailableError} if no captions are available.
654
+ * @throws {@link YoutubeTranscriptTooManyRequestError} if rate-limited by YouTube.
655
+ *
656
+ * @example
657
+ * ```typescript
658
+ * const yt = new YoutubeTranscript();
659
+ * const languages = await yt.listLanguages('dQw4w9WgXcQ');
660
+ * // [
661
+ * // { languageCode: 'en', languageName: 'English', isAutoGenerated: false },
662
+ * // { languageCode: 'es', languageName: 'Spanish (auto-generated)', isAutoGenerated: true },
663
+ * // ]
664
+ * ```
665
+ */
666
+ listLanguages(videoId) {
667
+ return __awaiter(this, void 0, void 0, function* () {
668
+ const identifier = retrieveVideoId(videoId);
669
+ const { tracks } = yield this._fetchCaptionTracks(identifier);
670
+ return tracks.map((track) => {
671
+ var _a, _b;
672
+ return ({
673
+ languageCode: track.languageCode,
674
+ languageName: (_b = (_a = track.name) === null || _a === void 0 ? void 0 : _a.simpleText) !== null && _b !== void 0 ? _b : track.languageCode,
675
+ isAutoGenerated: track.kind === 'asr',
676
+ });
677
+ });
678
+ });
679
+ }
680
+ static fetchTranscript(videoId, config) {
681
+ return __awaiter(this, void 0, void 0, function* () {
682
+ const instance = new YoutubeTranscript(config);
683
+ return instance.fetchTranscript(videoId);
684
+ });
685
+ }
686
+ /**
687
+ * Static convenience method to list available caption languages without creating an instance.
688
+ *
689
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
690
+ * @param config - Optional configuration options.
691
+ * @returns An array of available caption track info objects.
692
+ */
693
+ static listLanguages(videoId, config) {
694
+ return __awaiter(this, void 0, void 0, function* () {
695
+ const instance = new YoutubeTranscript(config);
696
+ return instance.listLanguages(videoId);
697
+ });
698
+ }
699
+ }
700
+ function fetchTranscript(videoId, config) {
701
+ return YoutubeTranscript.fetchTranscript(videoId, config);
702
+ }
703
+ /**
704
+ * Convenience function to list available caption languages for a YouTube video.
705
+ *
706
+ * @param videoId - A YouTube video ID (11 characters) or full YouTube URL.
707
+ * @param config - Optional configuration options.
708
+ * @returns An array of available caption track info objects.
709
+ *
710
+ * @example
711
+ * ```typescript
712
+ * import { listLanguages } from 'youtube-transcript-plus';
713
+ * const languages = await listLanguages('dQw4w9WgXcQ');
714
+ * ```
715
+ */
716
+ const listLanguages = YoutubeTranscript.listLanguages;
717
+
718
+ exports.FsCache = FsCache;
719
+ exports.InMemoryCache = InMemoryCache;
720
+ exports.YoutubeTranscript = YoutubeTranscript;
721
+ exports.YoutubeTranscriptDisabledError = YoutubeTranscriptDisabledError;
722
+ exports.YoutubeTranscriptInvalidLangError = YoutubeTranscriptInvalidLangError;
723
+ exports.YoutubeTranscriptInvalidVideoIdError = YoutubeTranscriptInvalidVideoIdError;
724
+ exports.YoutubeTranscriptNotAvailableError = YoutubeTranscriptNotAvailableError;
725
+ exports.YoutubeTranscriptNotAvailableLanguageError = YoutubeTranscriptNotAvailableLanguageError;
726
+ exports.YoutubeTranscriptTooManyRequestError = YoutubeTranscriptTooManyRequestError;
727
+ exports.YoutubeTranscriptVideoUnavailableError = YoutubeTranscriptVideoUnavailableError;
728
+ exports.fetchTranscript = fetchTranscript;
729
+ exports.listLanguages = listLanguages;
730
+ exports.toPlainText = toPlainText;
731
+ exports.toSRT = toSRT;
732
+ exports.toVTT = toVTT;