youtube-transcript-plus 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
- import fs from 'fs/promises';
2
- import path from 'path';
1
+ import fs from 'node:fs/promises';
2
+ import path from 'node:path';
3
3
 
4
4
  /******************************************************************************
5
5
  Copyright (c) Microsoft Corporation.
@@ -48,24 +48,30 @@ class YoutubeTranscriptVideoUnavailableError extends Error {
48
48
  constructor(videoId) {
49
49
  super(`The video with ID "${videoId}" is no longer available or has been removed. Please check the video URL or ID and try again.`);
50
50
  this.name = 'YoutubeTranscriptVideoUnavailableError';
51
+ this.videoId = videoId;
51
52
  }
52
53
  }
53
54
  class YoutubeTranscriptDisabledError extends Error {
54
55
  constructor(videoId) {
55
56
  super(`Transcripts are disabled for the video with ID "${videoId}". This may be due to the video owner disabling captions or the video not supporting transcripts.`);
56
57
  this.name = 'YoutubeTranscriptDisabledError';
58
+ this.videoId = videoId;
57
59
  }
58
60
  }
59
61
  class YoutubeTranscriptNotAvailableError extends Error {
60
62
  constructor(videoId) {
61
63
  super(`No transcripts are available for the video with ID "${videoId}". This may be because the video does not have captions or the captions are not accessible.`);
62
64
  this.name = 'YoutubeTranscriptNotAvailableError';
65
+ this.videoId = videoId;
63
66
  }
64
67
  }
65
68
  class YoutubeTranscriptNotAvailableLanguageError extends Error {
66
69
  constructor(lang, availableLangs, videoId) {
67
70
  super(`No transcripts are available in "${lang}" for the video with ID "${videoId}". Available languages: ${availableLangs.join(', ')}. Please try a different language.`);
68
71
  this.name = 'YoutubeTranscriptNotAvailableLanguageError';
72
+ this.videoId = videoId;
73
+ this.lang = lang;
74
+ this.availableLangs = availableLangs;
69
75
  }
70
76
  }
71
77
  class YoutubeTranscriptInvalidVideoIdError extends Error {
@@ -75,8 +81,21 @@ class YoutubeTranscriptInvalidVideoIdError extends Error {
75
81
  }
76
82
  }
77
83
 
84
+ const RE_VIDEO_ID = /^[a-zA-Z0-9_-]{11}$/;
85
+ const XML_ENTITIES = {
86
+ '&': '&',
87
+ '&lt;': '<',
88
+ '&gt;': '>',
89
+ '&quot;': '"',
90
+ '&#39;': "'",
91
+ '&apos;': "'",
92
+ };
93
+ const RE_XML_ENTITY = /&(?:amp|lt|gt|quot|apos|#39);/g;
94
+ function decodeXmlEntities(text) {
95
+ return text.replace(RE_XML_ENTITY, (match) => { var _a; return (_a = XML_ENTITIES[match]) !== null && _a !== void 0 ? _a : match; });
96
+ }
78
97
  function retrieveVideoId(videoId) {
79
- if (videoId.length === 11) {
98
+ if (RE_VIDEO_ID.test(videoId)) {
80
99
  return videoId;
81
100
  }
82
101
  const matchId = videoId.match(RE_YOUTUBE);
@@ -100,15 +119,19 @@ function defaultFetch(params) {
100
119
  });
101
120
  }
102
121
 
122
+ function sanitizeKey(key) {
123
+ return key.replace(/[^a-zA-Z0-9_-]/g, '_');
124
+ }
103
125
  class FsCache {
104
126
  constructor(cacheDir = './cache', defaultTTL = DEFAULT_CACHE_TTL) {
105
127
  this.cacheDir = cacheDir;
106
128
  this.defaultTTL = defaultTTL;
107
- fs.mkdir(cacheDir, { recursive: true }).catch(() => { });
129
+ this.ready = fs.mkdir(cacheDir, { recursive: true }).then(() => { });
108
130
  }
109
131
  get(key) {
110
132
  return __awaiter(this, void 0, void 0, function* () {
111
- const filePath = path.join(this.cacheDir, key);
133
+ yield this.ready;
134
+ const filePath = path.join(this.cacheDir, sanitizeKey(key));
112
135
  try {
113
136
  const data = yield fs.readFile(filePath, 'utf-8');
114
137
  const { value, expires } = JSON.parse(data);
@@ -123,7 +146,8 @@ class FsCache {
123
146
  }
124
147
  set(key, value, ttl) {
125
148
  return __awaiter(this, void 0, void 0, function* () {
126
- const filePath = path.join(this.cacheDir, key);
149
+ yield this.ready;
150
+ const filePath = path.join(this.cacheDir, sanitizeKey(key));
127
151
  const expires = Date.now() + (ttl !== null && ttl !== void 0 ? ttl : this.defaultTTL);
128
152
  yield fs.writeFile(filePath, JSON.stringify({ value, expires }), 'utf-8');
129
153
  });
@@ -165,7 +189,7 @@ class YoutubeTranscript {
165
189
  }
166
190
  fetchTranscript(videoId) {
167
191
  return __awaiter(this, void 0, void 0, function* () {
168
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
192
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
169
193
  const identifier = retrieveVideoId(videoId);
170
194
  const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
171
195
  const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
@@ -179,7 +203,7 @@ class YoutubeTranscript {
179
203
  try {
180
204
  return JSON.parse(cached);
181
205
  }
182
- catch (_p) {
206
+ catch (_q) {
183
207
  // ignore parse errors and continue
184
208
  }
185
209
  }
@@ -209,7 +233,7 @@ class YoutubeTranscript {
209
233
  }
210
234
  const apiKey = apiKeyMatch[1];
211
235
  // 3) Call Innertube player as ANDROID client to retrieve captionTracks
212
- const playerEndpoint = `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
236
+ const playerEndpoint = `${protocol}://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
213
237
  const playerBody = {
214
238
  context: {
215
239
  client: {
@@ -234,17 +258,17 @@ class YoutubeTranscript {
234
258
  if (!playerRes.ok) {
235
259
  throw new YoutubeTranscriptVideoUnavailableError(identifier);
236
260
  }
237
- const playerJson = yield playerRes.json();
238
- const tracklist = (_k = (_j = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _j === void 0 ? void 0 : _j.playerCaptionsTracklistRenderer) !== null && _k !== void 0 ? _k : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
261
+ const playerJson = (yield playerRes.json());
262
+ const tracklist = (_k = (_j = playerJson.captions) === null || _j === void 0 ? void 0 : _j.playerCaptionsTracklistRenderer) !== null && _k !== void 0 ? _k : playerJson.playerCaptionsTracklistRenderer;
239
263
  const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
240
- const isPlayableOk = ((_l = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _l === void 0 ? void 0 : _l.status) === 'OK';
264
+ const isPlayableOk = ((_l = playerJson.playabilityStatus) === null || _l === void 0 ? void 0 : _l.status) === 'OK';
241
265
  // If `captions` is entirely missing, treat as "not available"
242
- if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
243
- // If video is playable but captions arent provided, treat as "disabled"
266
+ if (!playerJson.captions || !tracklist) {
267
+ // If video is playable but captions aren't provided, treat as "disabled"
244
268
  if (isPlayableOk) {
245
269
  throw new YoutubeTranscriptDisabledError(identifier);
246
270
  }
247
- // Otherwise we cant assert theyre disabled; treat as "not available"
271
+ // Otherwise we can't assert they're disabled; treat as "not available"
248
272
  throw new YoutubeTranscriptNotAvailableError(identifier);
249
273
  }
250
274
  // If `captions` exists but there are zero tracks, treat as "disabled"
@@ -252,22 +276,25 @@ class YoutubeTranscript {
252
276
  throw new YoutubeTranscriptDisabledError(identifier);
253
277
  }
254
278
  // Respect requested language or fallback to first track
255
- const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
279
+ const selectedTrack = lang
280
+ ? tracks.find((t) => t.languageCode === lang)
281
+ : tracks[0];
256
282
  if (!selectedTrack) {
257
283
  const available = tracks.map((t) => t.languageCode).filter(Boolean);
258
284
  throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
259
285
  }
260
286
  // 4) Build transcript URL; prefer XML by stripping fmt if present
261
- let transcriptURL = selectedTrack.baseUrl || selectedTrack.url;
262
- if (!transcriptURL) {
287
+ const transcriptBaseURL = (_m = selectedTrack.baseUrl) !== null && _m !== void 0 ? _m : selectedTrack.url;
288
+ if (!transcriptBaseURL) {
263
289
  throw new YoutubeTranscriptNotAvailableError(identifier);
264
290
  }
265
- transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
266
- if ((_m = this.config) === null || _m === void 0 ? void 0 : _m.disableHttps) {
291
+ let transcriptURL = transcriptBaseURL;
292
+ transcriptURL = transcriptURL.replace(/&fmt=[^&]+/, '');
293
+ if ((_o = this.config) === null || _o === void 0 ? void 0 : _o.disableHttps) {
267
294
  transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
268
295
  }
269
296
  // 5) Fetch transcript XML using the same hook surface as before
270
- const transcriptResponse = ((_o = this.config) === null || _o === void 0 ? void 0 : _o.transcriptFetch)
297
+ const transcriptResponse = ((_p = this.config) === null || _p === void 0 ? void 0 : _p.transcriptFetch)
271
298
  ? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
272
299
  : yield defaultFetch({ url: transcriptURL, lang, userAgent });
273
300
  if (!transcriptResponse.ok) {
@@ -281,7 +308,7 @@ class YoutubeTranscript {
281
308
  // 6) Parse XML into the existing TranscriptResponse shape
282
309
  const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
283
310
  const transcript = results.map((m) => ({
284
- text: m[3],
311
+ text: decodeXmlEntities(m[3]),
285
312
  duration: parseFloat(m[2]),
286
313
  offset: parseFloat(m[1]),
287
314
  lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
@@ -294,7 +321,7 @@ class YoutubeTranscript {
294
321
  try {
295
322
  yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
296
323
  }
297
- catch (_q) {
324
+ catch (_r) {
298
325
  // non-fatal
299
326
  }
300
327
  }