youtube-transcript-plus 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +170 -15
- package/dist/__tests__/cache/fs-cache.test.d.ts +1 -0
- package/dist/__tests__/cache/in-memory-cache.test.d.ts +1 -0
- package/dist/__tests__/index.test.d.ts +1 -0
- package/dist/__tests__/integration.test.d.ts +1 -0
- package/dist/__tests__/utils.test.d.ts +1 -0
- package/dist/cache/fs-cache.d.ts +19 -0
- package/dist/cache/in-memory-cache.d.ts +14 -0
- package/dist/errors.d.ts +20 -0
- package/dist/formatters.d.ts +57 -0
- package/dist/index.d.ts +141 -10
- package/dist/types.d.ts +126 -2
- package/dist/utils.d.ts +21 -0
- package/dist/youtube-transcript-plus.cjs +732 -0
- package/dist/youtube-transcript-plus.js +50 -23
- package/dist/youtube-transcript-plus.mjs +716 -0
- package/package.json +26 -16
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import fs from 'fs/promises';
|
|
2
|
-
import path from 'path';
|
|
1
|
+
import fs from 'node:fs/promises';
|
|
2
|
+
import path from 'node:path';
|
|
3
3
|
|
|
4
4
|
/******************************************************************************
|
|
5
5
|
Copyright (c) Microsoft Corporation.
|
|
@@ -48,24 +48,30 @@ class YoutubeTranscriptVideoUnavailableError extends Error {
|
|
|
48
48
|
constructor(videoId) {
|
|
49
49
|
super(`The video with ID "${videoId}" is no longer available or has been removed. Please check the video URL or ID and try again.`);
|
|
50
50
|
this.name = 'YoutubeTranscriptVideoUnavailableError';
|
|
51
|
+
this.videoId = videoId;
|
|
51
52
|
}
|
|
52
53
|
}
|
|
53
54
|
class YoutubeTranscriptDisabledError extends Error {
|
|
54
55
|
constructor(videoId) {
|
|
55
56
|
super(`Transcripts are disabled for the video with ID "${videoId}". This may be due to the video owner disabling captions or the video not supporting transcripts.`);
|
|
56
57
|
this.name = 'YoutubeTranscriptDisabledError';
|
|
58
|
+
this.videoId = videoId;
|
|
57
59
|
}
|
|
58
60
|
}
|
|
59
61
|
class YoutubeTranscriptNotAvailableError extends Error {
|
|
60
62
|
constructor(videoId) {
|
|
61
63
|
super(`No transcripts are available for the video with ID "${videoId}". This may be because the video does not have captions or the captions are not accessible.`);
|
|
62
64
|
this.name = 'YoutubeTranscriptNotAvailableError';
|
|
65
|
+
this.videoId = videoId;
|
|
63
66
|
}
|
|
64
67
|
}
|
|
65
68
|
class YoutubeTranscriptNotAvailableLanguageError extends Error {
|
|
66
69
|
constructor(lang, availableLangs, videoId) {
|
|
67
70
|
super(`No transcripts are available in "${lang}" for the video with ID "${videoId}". Available languages: ${availableLangs.join(', ')}. Please try a different language.`);
|
|
68
71
|
this.name = 'YoutubeTranscriptNotAvailableLanguageError';
|
|
72
|
+
this.videoId = videoId;
|
|
73
|
+
this.lang = lang;
|
|
74
|
+
this.availableLangs = availableLangs;
|
|
69
75
|
}
|
|
70
76
|
}
|
|
71
77
|
class YoutubeTranscriptInvalidVideoIdError extends Error {
|
|
@@ -75,8 +81,21 @@ class YoutubeTranscriptInvalidVideoIdError extends Error {
|
|
|
75
81
|
}
|
|
76
82
|
}
|
|
77
83
|
|
|
84
|
+
const RE_VIDEO_ID = /^[a-zA-Z0-9_-]{11}$/;
|
|
85
|
+
const XML_ENTITIES = {
|
|
86
|
+
'&': '&',
|
|
87
|
+
'<': '<',
|
|
88
|
+
'>': '>',
|
|
89
|
+
'"': '"',
|
|
90
|
+
''': "'",
|
|
91
|
+
''': "'",
|
|
92
|
+
};
|
|
93
|
+
const RE_XML_ENTITY = /&(?:amp|lt|gt|quot|apos|#39);/g;
|
|
94
|
+
function decodeXmlEntities(text) {
|
|
95
|
+
return text.replace(RE_XML_ENTITY, (match) => { var _a; return (_a = XML_ENTITIES[match]) !== null && _a !== void 0 ? _a : match; });
|
|
96
|
+
}
|
|
78
97
|
function retrieveVideoId(videoId) {
|
|
79
|
-
if (videoId
|
|
98
|
+
if (RE_VIDEO_ID.test(videoId)) {
|
|
80
99
|
return videoId;
|
|
81
100
|
}
|
|
82
101
|
const matchId = videoId.match(RE_YOUTUBE);
|
|
@@ -100,15 +119,19 @@ function defaultFetch(params) {
|
|
|
100
119
|
});
|
|
101
120
|
}
|
|
102
121
|
|
|
122
|
+
function sanitizeKey(key) {
|
|
123
|
+
return key.replace(/[^a-zA-Z0-9_-]/g, '_');
|
|
124
|
+
}
|
|
103
125
|
class FsCache {
|
|
104
126
|
constructor(cacheDir = './cache', defaultTTL = DEFAULT_CACHE_TTL) {
|
|
105
127
|
this.cacheDir = cacheDir;
|
|
106
128
|
this.defaultTTL = defaultTTL;
|
|
107
|
-
fs.mkdir(cacheDir, { recursive: true }).
|
|
129
|
+
this.ready = fs.mkdir(cacheDir, { recursive: true }).then(() => { });
|
|
108
130
|
}
|
|
109
131
|
get(key) {
|
|
110
132
|
return __awaiter(this, void 0, void 0, function* () {
|
|
111
|
-
|
|
133
|
+
yield this.ready;
|
|
134
|
+
const filePath = path.join(this.cacheDir, sanitizeKey(key));
|
|
112
135
|
try {
|
|
113
136
|
const data = yield fs.readFile(filePath, 'utf-8');
|
|
114
137
|
const { value, expires } = JSON.parse(data);
|
|
@@ -123,7 +146,8 @@ class FsCache {
|
|
|
123
146
|
}
|
|
124
147
|
set(key, value, ttl) {
|
|
125
148
|
return __awaiter(this, void 0, void 0, function* () {
|
|
126
|
-
|
|
149
|
+
yield this.ready;
|
|
150
|
+
const filePath = path.join(this.cacheDir, sanitizeKey(key));
|
|
127
151
|
const expires = Date.now() + (ttl !== null && ttl !== void 0 ? ttl : this.defaultTTL);
|
|
128
152
|
yield fs.writeFile(filePath, JSON.stringify({ value, expires }), 'utf-8');
|
|
129
153
|
});
|
|
@@ -165,7 +189,7 @@ class YoutubeTranscript {
|
|
|
165
189
|
}
|
|
166
190
|
fetchTranscript(videoId) {
|
|
167
191
|
return __awaiter(this, void 0, void 0, function* () {
|
|
168
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
192
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
|
|
169
193
|
const identifier = retrieveVideoId(videoId);
|
|
170
194
|
const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
|
|
171
195
|
const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
|
|
@@ -179,7 +203,7 @@ class YoutubeTranscript {
|
|
|
179
203
|
try {
|
|
180
204
|
return JSON.parse(cached);
|
|
181
205
|
}
|
|
182
|
-
catch (
|
|
206
|
+
catch (_q) {
|
|
183
207
|
// ignore parse errors and continue
|
|
184
208
|
}
|
|
185
209
|
}
|
|
@@ -209,7 +233,7 @@ class YoutubeTranscript {
|
|
|
209
233
|
}
|
|
210
234
|
const apiKey = apiKeyMatch[1];
|
|
211
235
|
// 3) Call Innertube player as ANDROID client to retrieve captionTracks
|
|
212
|
-
const playerEndpoint =
|
|
236
|
+
const playerEndpoint = `${protocol}://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
|
|
213
237
|
const playerBody = {
|
|
214
238
|
context: {
|
|
215
239
|
client: {
|
|
@@ -234,17 +258,17 @@ class YoutubeTranscript {
|
|
|
234
258
|
if (!playerRes.ok) {
|
|
235
259
|
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
236
260
|
}
|
|
237
|
-
const playerJson = yield playerRes.json();
|
|
238
|
-
const tracklist = (_k = (_j = playerJson
|
|
261
|
+
const playerJson = (yield playerRes.json());
|
|
262
|
+
const tracklist = (_k = (_j = playerJson.captions) === null || _j === void 0 ? void 0 : _j.playerCaptionsTracklistRenderer) !== null && _k !== void 0 ? _k : playerJson.playerCaptionsTracklistRenderer;
|
|
239
263
|
const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
|
|
240
|
-
const isPlayableOk = ((_l = playerJson
|
|
264
|
+
const isPlayableOk = ((_l = playerJson.playabilityStatus) === null || _l === void 0 ? void 0 : _l.status) === 'OK';
|
|
241
265
|
// If `captions` is entirely missing, treat as "not available"
|
|
242
|
-
if (!
|
|
243
|
-
// If video is playable but captions aren
|
|
266
|
+
if (!playerJson.captions || !tracklist) {
|
|
267
|
+
// If video is playable but captions aren't provided, treat as "disabled"
|
|
244
268
|
if (isPlayableOk) {
|
|
245
269
|
throw new YoutubeTranscriptDisabledError(identifier);
|
|
246
270
|
}
|
|
247
|
-
// Otherwise we can
|
|
271
|
+
// Otherwise we can't assert they're disabled; treat as "not available"
|
|
248
272
|
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
249
273
|
}
|
|
250
274
|
// If `captions` exists but there are zero tracks, treat as "disabled"
|
|
@@ -252,22 +276,25 @@ class YoutubeTranscript {
|
|
|
252
276
|
throw new YoutubeTranscriptDisabledError(identifier);
|
|
253
277
|
}
|
|
254
278
|
// Respect requested language or fallback to first track
|
|
255
|
-
const selectedTrack = lang
|
|
279
|
+
const selectedTrack = lang
|
|
280
|
+
? tracks.find((t) => t.languageCode === lang)
|
|
281
|
+
: tracks[0];
|
|
256
282
|
if (!selectedTrack) {
|
|
257
283
|
const available = tracks.map((t) => t.languageCode).filter(Boolean);
|
|
258
284
|
throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
|
|
259
285
|
}
|
|
260
286
|
// 4) Build transcript URL; prefer XML by stripping fmt if present
|
|
261
|
-
|
|
262
|
-
if (!
|
|
287
|
+
const transcriptBaseURL = (_m = selectedTrack.baseUrl) !== null && _m !== void 0 ? _m : selectedTrack.url;
|
|
288
|
+
if (!transcriptBaseURL) {
|
|
263
289
|
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
264
290
|
}
|
|
265
|
-
transcriptURL =
|
|
266
|
-
|
|
291
|
+
let transcriptURL = transcriptBaseURL;
|
|
292
|
+
transcriptURL = transcriptURL.replace(/&fmt=[^&]+/, '');
|
|
293
|
+
if ((_o = this.config) === null || _o === void 0 ? void 0 : _o.disableHttps) {
|
|
267
294
|
transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
|
|
268
295
|
}
|
|
269
296
|
// 5) Fetch transcript XML using the same hook surface as before
|
|
270
|
-
const transcriptResponse = ((
|
|
297
|
+
const transcriptResponse = ((_p = this.config) === null || _p === void 0 ? void 0 : _p.transcriptFetch)
|
|
271
298
|
? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
|
|
272
299
|
: yield defaultFetch({ url: transcriptURL, lang, userAgent });
|
|
273
300
|
if (!transcriptResponse.ok) {
|
|
@@ -281,7 +308,7 @@ class YoutubeTranscript {
|
|
|
281
308
|
// 6) Parse XML into the existing TranscriptResponse shape
|
|
282
309
|
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
|
|
283
310
|
const transcript = results.map((m) => ({
|
|
284
|
-
text: m[3],
|
|
311
|
+
text: decodeXmlEntities(m[3]),
|
|
285
312
|
duration: parseFloat(m[2]),
|
|
286
313
|
offset: parseFloat(m[1]),
|
|
287
314
|
lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
|
|
@@ -294,7 +321,7 @@ class YoutubeTranscript {
|
|
|
294
321
|
try {
|
|
295
322
|
yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
|
|
296
323
|
}
|
|
297
|
-
catch (
|
|
324
|
+
catch (_r) {
|
|
298
325
|
// non-fatal
|
|
299
326
|
}
|
|
300
327
|
}
|