youtube-transcript-plus 1.0.4 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +6 -0
- package/dist/youtube-transcript-plus.js +112 -65
- package/package.json +12 -9
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
import { TranscriptConfig, TranscriptResponse } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Implementation notes:
|
|
4
|
+
* - Keeps the public surface identical.
|
|
5
|
+
* - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
|
|
6
|
+
* - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
|
|
7
|
+
*/
|
|
2
8
|
export declare class YoutubeTranscript {
|
|
3
9
|
private config?;
|
|
4
10
|
constructor(config?: TranscriptConfig & {
|
|
@@ -146,101 +146,148 @@ class InMemoryCache {
|
|
|
146
146
|
}
|
|
147
147
|
}
|
|
148
148
|
|
|
149
|
+
/**
|
|
150
|
+
* Implementation notes:
|
|
151
|
+
* - Keeps the public surface identical.
|
|
152
|
+
* - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
|
|
153
|
+
* - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
|
|
154
|
+
*/
|
|
149
155
|
class YoutubeTranscript {
|
|
150
156
|
constructor(config) {
|
|
151
157
|
this.config = config;
|
|
152
158
|
}
|
|
153
159
|
fetchTranscript(videoId) {
|
|
154
160
|
return __awaiter(this, void 0, void 0, function* () {
|
|
155
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m
|
|
161
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
156
162
|
const identifier = retrieveVideoId(videoId);
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
const
|
|
161
|
-
|
|
162
|
-
const cacheKey = `transcript:${identifier}:${
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
163
|
+
const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
|
|
164
|
+
const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
|
|
165
|
+
// Cache lookup (if provided)
|
|
166
|
+
const cache = (_d = this.config) === null || _d === void 0 ? void 0 : _d.cache;
|
|
167
|
+
const cacheTTL = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cacheTTL;
|
|
168
|
+
const cacheKey = `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
|
|
169
|
+
if (cache) {
|
|
170
|
+
const cached = yield cache.get(cacheKey);
|
|
171
|
+
if (cached) {
|
|
172
|
+
try {
|
|
173
|
+
return JSON.parse(cached);
|
|
174
|
+
}
|
|
175
|
+
catch (_o) {
|
|
176
|
+
// ignore parse errors and continue
|
|
177
|
+
}
|
|
168
178
|
}
|
|
169
179
|
}
|
|
180
|
+
// 1) Fetch the watch page to extract an Innertube API key (no interface change)
|
|
181
|
+
// Decide protocol once and reuse
|
|
170
182
|
const protocol = ((_f = this.config) === null || _f === void 0 ? void 0 : _f.disableHttps) ? 'http' : 'https';
|
|
171
|
-
|
|
172
|
-
const videoPageResponse =
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
userAgent,
|
|
176
|
-
});
|
|
183
|
+
const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
|
|
184
|
+
const videoPageResponse = ((_g = this.config) === null || _g === void 0 ? void 0 : _g.videoFetch)
|
|
185
|
+
? yield this.config.videoFetch({ url: watchUrl, lang, userAgent })
|
|
186
|
+
: yield defaultFetch({ url: watchUrl, lang, userAgent });
|
|
177
187
|
if (!videoPageResponse.ok) {
|
|
178
188
|
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
179
189
|
}
|
|
180
190
|
const videoPageBody = yield videoPageResponse.text();
|
|
181
|
-
//
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
if (videoPageBody.includes('class="g-recaptcha"')) {
|
|
185
|
-
throw new YoutubeTranscriptTooManyRequestError();
|
|
186
|
-
}
|
|
187
|
-
if (!videoPageBody.includes('"playabilityStatus":')) {
|
|
188
|
-
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
189
|
-
}
|
|
190
|
-
throw new YoutubeTranscriptDisabledError(identifier);
|
|
191
|
+
// Basic bot/recaptcha detection preserves old error behavior
|
|
192
|
+
if (videoPageBody.includes('class="g-recaptcha"')) {
|
|
193
|
+
throw new YoutubeTranscriptTooManyRequestError();
|
|
191
194
|
}
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
195
|
+
// 2) Extract Innertube API key from the page
|
|
196
|
+
const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
|
|
197
|
+
videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
|
|
198
|
+
if (!apiKeyMatch) {
|
|
199
|
+
// If captions JSON wasn't present previously and we also can't find an API key,
|
|
200
|
+
// retain the disabled semantics for compatibility.
|
|
201
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
202
|
+
}
|
|
203
|
+
const apiKey = apiKeyMatch[1];
|
|
204
|
+
// 3) Call Innertube player as ANDROID client to retrieve captionTracks
|
|
205
|
+
const playerEndpoint = `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
|
|
206
|
+
const playerBody = {
|
|
207
|
+
context: {
|
|
208
|
+
client: {
|
|
209
|
+
clientName: 'ANDROID',
|
|
210
|
+
clientVersion: '20.10.38',
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
videoId: identifier,
|
|
214
|
+
};
|
|
215
|
+
// Use global fetch for the POST. No public interface change.
|
|
216
|
+
const playerRes = yield fetch(playerEndpoint, {
|
|
217
|
+
method: 'POST',
|
|
218
|
+
headers: Object.assign({ 'Content-Type': 'application/json', 'User-Agent': userAgent }, (lang ? { 'Accept-Language': lang } : {})),
|
|
219
|
+
body: JSON.stringify(playerBody),
|
|
220
|
+
});
|
|
221
|
+
if (!playerRes.ok) {
|
|
222
|
+
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
223
|
+
}
|
|
224
|
+
const playerJson = yield playerRes.json();
|
|
225
|
+
const tracklist = (_j = (_h = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _h === void 0 ? void 0 : _h.playerCaptionsTracklistRenderer) !== null && _j !== void 0 ? _j : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
|
|
226
|
+
const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
|
|
227
|
+
const isPlayableOk = ((_k = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _k === void 0 ? void 0 : _k.status) === 'OK';
|
|
228
|
+
// If `captions` is entirely missing, treat as "not available"
|
|
229
|
+
if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
|
|
230
|
+
// If video is playable but captions aren’t provided, treat as "disabled"
|
|
231
|
+
if (isPlayableOk) {
|
|
232
|
+
throw new YoutubeTranscriptDisabledError(identifier);
|
|
198
233
|
}
|
|
199
|
-
|
|
200
|
-
|
|
234
|
+
// Otherwise we can’t assert they’re disabled; treat as "not available"
|
|
235
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
236
|
+
}
|
|
237
|
+
// If `captions` exists but there are zero tracks, treat as "disabled"
|
|
238
|
+
if (!Array.isArray(tracks) || tracks.length === 0) {
|
|
201
239
|
throw new YoutubeTranscriptDisabledError(identifier);
|
|
202
240
|
}
|
|
203
|
-
|
|
241
|
+
// Respect requested language or fallback to first track
|
|
242
|
+
const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
|
|
243
|
+
if (!selectedTrack) {
|
|
244
|
+
const available = tracks.map((t) => t.languageCode).filter(Boolean);
|
|
245
|
+
throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
|
|
246
|
+
}
|
|
247
|
+
// 4) Build transcript URL; prefer XML by stripping fmt if present
|
|
248
|
+
let transcriptURL = selectedTrack.baseUrl || selectedTrack.url;
|
|
249
|
+
if (!transcriptURL) {
|
|
204
250
|
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
205
251
|
}
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
252
|
+
transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
|
|
253
|
+
if ((_l = this.config) === null || _l === void 0 ? void 0 : _l.disableHttps) {
|
|
254
|
+
transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
|
|
209
255
|
}
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
:
|
|
213
|
-
|
|
214
|
-
? captionURL.replace('https://', 'http://')
|
|
215
|
-
: captionURL;
|
|
216
|
-
// Fetch the transcript
|
|
217
|
-
const transcriptResponse = yield transcriptFetch({
|
|
218
|
-
url: transcriptURL,
|
|
219
|
-
lang: (_o = this.config) === null || _o === void 0 ? void 0 : _o.lang,
|
|
220
|
-
userAgent,
|
|
221
|
-
});
|
|
256
|
+
// 5) Fetch transcript XML using the same hook surface as before
|
|
257
|
+
const transcriptResponse = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.transcriptFetch)
|
|
258
|
+
? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
|
|
259
|
+
: yield defaultFetch({ url: transcriptURL, lang, userAgent });
|
|
222
260
|
if (!transcriptResponse.ok) {
|
|
261
|
+
// Preserve legacy behavior
|
|
262
|
+
if (transcriptResponse.status === 429) {
|
|
263
|
+
throw new YoutubeTranscriptTooManyRequestError();
|
|
264
|
+
}
|
|
223
265
|
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
224
266
|
}
|
|
225
267
|
const transcriptBody = yield transcriptResponse.text();
|
|
268
|
+
// 6) Parse XML into the existing TranscriptResponse shape
|
|
226
269
|
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
|
|
227
|
-
const transcript = results.map((
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
}
|
|
236
|
-
//
|
|
237
|
-
if (
|
|
238
|
-
|
|
270
|
+
const transcript = results.map((m) => ({
|
|
271
|
+
text: m[3],
|
|
272
|
+
duration: parseFloat(m[2]),
|
|
273
|
+
offset: parseFloat(m[1]),
|
|
274
|
+
lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
|
|
275
|
+
}));
|
|
276
|
+
if (transcript.length === 0) {
|
|
277
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
278
|
+
}
|
|
279
|
+
// Cache store
|
|
280
|
+
if (cache) {
|
|
281
|
+
try {
|
|
282
|
+
yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
|
|
283
|
+
}
|
|
284
|
+
catch (_p) {
|
|
285
|
+
// non-fatal
|
|
286
|
+
}
|
|
239
287
|
}
|
|
240
288
|
return transcript;
|
|
241
289
|
});
|
|
242
290
|
}
|
|
243
|
-
// Add static method for new usage pattern
|
|
244
291
|
static fetchTranscript(videoId, config) {
|
|
245
292
|
return __awaiter(this, void 0, void 0, function* () {
|
|
246
293
|
const instance = new YoutubeTranscript(config);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "youtube-transcript-plus",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Fetch transcript from a YouTube video",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/youtube-transcript-plus.js",
|
|
@@ -29,23 +29,26 @@
|
|
|
29
29
|
]
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
|
-
"@types/jest": "^
|
|
32
|
+
"@types/jest": "^30.0.0",
|
|
33
33
|
"https-proxy-agent": "^7.0.6",
|
|
34
34
|
"husky": "^9.1.7",
|
|
35
|
-
"jest": "^
|
|
36
|
-
"lint-staged": "^
|
|
37
|
-
"prettier": "^3.
|
|
38
|
-
"rollup": "^4.
|
|
35
|
+
"jest": "^30.0.5",
|
|
36
|
+
"lint-staged": "^16.1.5",
|
|
37
|
+
"prettier": "^3.6.2",
|
|
38
|
+
"rollup": "^4.46.4",
|
|
39
39
|
"rollup-plugin-typescript": "^1.0.1",
|
|
40
40
|
"rollup-plugin-typescript2": "^0.36.0",
|
|
41
|
-
"ts-jest": "^29.
|
|
41
|
+
"ts-jest": "^29.4.1",
|
|
42
42
|
"tslib": "^2.8.1",
|
|
43
|
-
"typescript": "^5.
|
|
43
|
+
"typescript": "^5.9.2"
|
|
44
44
|
},
|
|
45
45
|
"files": [
|
|
46
46
|
"dist/*"
|
|
47
47
|
],
|
|
48
|
-
"repository":
|
|
48
|
+
"repository": {
|
|
49
|
+
"type": "git",
|
|
50
|
+
"url": "git+https://github.com/ericmmartin/youtube-transcript-plus.git"
|
|
51
|
+
},
|
|
49
52
|
"publishConfig": {
|
|
50
53
|
"access": "public"
|
|
51
54
|
},
|