youtube-transcript-plus 1.0.3 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/index.d.ts +6 -0
- package/dist/types.d.ts +1 -0
- package/dist/youtube-transcript-plus.js +112 -61
- package/package.json +12 -9
package/README.md
CHANGED
|
@@ -42,6 +42,20 @@ fetchTranscript('videoId_or_URL', {
|
|
|
42
42
|
.catch(console.error);
|
|
43
43
|
```
|
|
44
44
|
|
|
45
|
+
### HTTP Support
|
|
46
|
+
|
|
47
|
+
You can disable HTTPS and use HTTP instead for YouTube requests by setting the `disableHttps` option to `true`. This might be necessary in certain environments where HTTPS connections are restricted.
|
|
48
|
+
|
|
49
|
+
```javascript
|
|
50
|
+
fetchTranscript('videoId_or_URL', {
|
|
51
|
+
disableHttps: true, // Use HTTP instead of HTTPS
|
|
52
|
+
})
|
|
53
|
+
.then(console.log)
|
|
54
|
+
.catch(console.error);
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
**Security Warning:** Using HTTP instead of HTTPS removes transport layer security and is not recommended for production environments. Only use this option when absolutely necessary.
|
|
58
|
+
|
|
45
59
|
### Custom Fetch Functions
|
|
46
60
|
|
|
47
61
|
You can inject custom `videoFetch` and `transcriptFetch` functions to modify the fetch behavior, such as using a proxy or custom headers.
|
|
@@ -188,6 +202,7 @@ Fetches the transcript for a YouTube video.
|
|
|
188
202
|
- **`userAgent`**: Custom User-Agent string.
|
|
189
203
|
- **`cache`**: Custom caching strategy.
|
|
190
204
|
- **`cacheTTL`**: Time-to-live for cache entries in milliseconds.
|
|
205
|
+
- **`disableHttps`**: Set to `true` to use HTTP instead of HTTPS for YouTube requests.
|
|
191
206
|
- **`videoFetch`**: Custom fetch function for the video page request.
|
|
192
207
|
- **`transcriptFetch`**: Custom fetch function for the transcript request.
|
|
193
208
|
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,10 @@
|
|
|
1
1
|
import { TranscriptConfig, TranscriptResponse } from './types';
|
|
2
|
+
/**
|
|
3
|
+
* Implementation notes:
|
|
4
|
+
* - Keeps the public surface identical.
|
|
5
|
+
* - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
|
|
6
|
+
* - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
|
|
7
|
+
*/
|
|
2
8
|
export declare class YoutubeTranscript {
|
|
3
9
|
private config?;
|
|
4
10
|
constructor(config?: TranscriptConfig & {
|
package/dist/types.d.ts
CHANGED
|
@@ -146,6 +146,12 @@ class InMemoryCache {
|
|
|
146
146
|
}
|
|
147
147
|
}
|
|
148
148
|
|
|
149
|
+
/**
|
|
150
|
+
* Implementation notes:
|
|
151
|
+
* - Keeps the public surface identical.
|
|
152
|
+
* - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
|
|
153
|
+
* - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
|
|
154
|
+
*/
|
|
149
155
|
class YoutubeTranscript {
|
|
150
156
|
constructor(config) {
|
|
151
157
|
this.config = config;
|
|
@@ -154,89 +160,134 @@ class YoutubeTranscript {
|
|
|
154
160
|
return __awaiter(this, void 0, void 0, function* () {
|
|
155
161
|
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
156
162
|
const identifier = retrieveVideoId(videoId);
|
|
157
|
-
const
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
const
|
|
161
|
-
|
|
162
|
-
const cacheKey = `transcript:${identifier}:${
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
163
|
+
const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
|
|
164
|
+
const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
|
|
165
|
+
// Cache lookup (if provided)
|
|
166
|
+
const cache = (_d = this.config) === null || _d === void 0 ? void 0 : _d.cache;
|
|
167
|
+
const cacheTTL = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cacheTTL;
|
|
168
|
+
const cacheKey = `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
|
|
169
|
+
if (cache) {
|
|
170
|
+
const cached = yield cache.get(cacheKey);
|
|
171
|
+
if (cached) {
|
|
172
|
+
try {
|
|
173
|
+
return JSON.parse(cached);
|
|
174
|
+
}
|
|
175
|
+
catch (_o) {
|
|
176
|
+
// ignore parse errors and continue
|
|
177
|
+
}
|
|
168
178
|
}
|
|
169
179
|
}
|
|
170
|
-
// Fetch the
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
180
|
+
// 1) Fetch the watch page to extract an Innertube API key (no interface change)
|
|
181
|
+
// Decide protocol once and reuse
|
|
182
|
+
const protocol = ((_f = this.config) === null || _f === void 0 ? void 0 : _f.disableHttps) ? 'http' : 'https';
|
|
183
|
+
const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
|
|
184
|
+
const videoPageResponse = ((_g = this.config) === null || _g === void 0 ? void 0 : _g.videoFetch)
|
|
185
|
+
? yield this.config.videoFetch({ url: watchUrl, lang, userAgent })
|
|
186
|
+
: yield defaultFetch({ url: watchUrl, lang, userAgent });
|
|
176
187
|
if (!videoPageResponse.ok) {
|
|
177
188
|
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
178
189
|
}
|
|
179
190
|
const videoPageBody = yield videoPageResponse.text();
|
|
180
|
-
//
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
if (videoPageBody.includes('class="g-recaptcha"')) {
|
|
184
|
-
throw new YoutubeTranscriptTooManyRequestError();
|
|
185
|
-
}
|
|
186
|
-
if (!videoPageBody.includes('"playabilityStatus":')) {
|
|
187
|
-
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
188
|
-
}
|
|
189
|
-
throw new YoutubeTranscriptDisabledError(identifier);
|
|
191
|
+
// Basic bot/recaptcha detection preserves old error behavior
|
|
192
|
+
if (videoPageBody.includes('class="g-recaptcha"')) {
|
|
193
|
+
throw new YoutubeTranscriptTooManyRequestError();
|
|
190
194
|
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
195
|
+
// 2) Extract Innertube API key from the page
|
|
196
|
+
const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
|
|
197
|
+
videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
|
|
198
|
+
if (!apiKeyMatch) {
|
|
199
|
+
// If captions JSON wasn't present previously and we also can't find an API key,
|
|
200
|
+
// retain the disabled semantics for compatibility.
|
|
201
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
202
|
+
}
|
|
203
|
+
const apiKey = apiKeyMatch[1];
|
|
204
|
+
// 3) Call Innertube player as ANDROID client to retrieve captionTracks
|
|
205
|
+
const playerEndpoint = `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
|
|
206
|
+
const playerBody = {
|
|
207
|
+
context: {
|
|
208
|
+
client: {
|
|
209
|
+
clientName: 'ANDROID',
|
|
210
|
+
clientVersion: '20.10.38',
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
videoId: identifier,
|
|
214
|
+
};
|
|
215
|
+
// Use global fetch for the POST. No public interface change.
|
|
216
|
+
const playerRes = yield fetch(playerEndpoint, {
|
|
217
|
+
method: 'POST',
|
|
218
|
+
headers: Object.assign({ 'Content-Type': 'application/json', 'User-Agent': userAgent }, (lang ? { 'Accept-Language': lang } : {})),
|
|
219
|
+
body: JSON.stringify(playerBody),
|
|
220
|
+
});
|
|
221
|
+
if (!playerRes.ok) {
|
|
222
|
+
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
223
|
+
}
|
|
224
|
+
const playerJson = yield playerRes.json();
|
|
225
|
+
const tracklist = (_j = (_h = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _h === void 0 ? void 0 : _h.playerCaptionsTracklistRenderer) !== null && _j !== void 0 ? _j : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
|
|
226
|
+
const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
|
|
227
|
+
const isPlayableOk = ((_k = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _k === void 0 ? void 0 : _k.status) === 'OK';
|
|
228
|
+
// If `captions` is entirely missing, treat as "not available"
|
|
229
|
+
if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
|
|
230
|
+
// If video is playable but captions aren’t provided, treat as "disabled"
|
|
231
|
+
if (isPlayableOk) {
|
|
232
|
+
throw new YoutubeTranscriptDisabledError(identifier);
|
|
197
233
|
}
|
|
198
|
-
|
|
199
|
-
|
|
234
|
+
// Otherwise we can’t assert they’re disabled; treat as "not available"
|
|
235
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
236
|
+
}
|
|
237
|
+
// If `captions` exists but there are zero tracks, treat as "disabled"
|
|
238
|
+
if (!Array.isArray(tracks) || tracks.length === 0) {
|
|
200
239
|
throw new YoutubeTranscriptDisabledError(identifier);
|
|
201
240
|
}
|
|
202
|
-
|
|
241
|
+
// Respect requested language or fallback to first track
|
|
242
|
+
const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
|
|
243
|
+
if (!selectedTrack) {
|
|
244
|
+
const available = tracks.map((t) => t.languageCode).filter(Boolean);
|
|
245
|
+
throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
|
|
246
|
+
}
|
|
247
|
+
// 4) Build transcript URL; prefer XML by stripping fmt if present
|
|
248
|
+
let transcriptURL = selectedTrack.baseUrl || selectedTrack.url;
|
|
249
|
+
if (!transcriptURL) {
|
|
203
250
|
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
204
251
|
}
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
252
|
+
transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
|
|
253
|
+
if ((_l = this.config) === null || _l === void 0 ? void 0 : _l.disableHttps) {
|
|
254
|
+
transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
|
|
208
255
|
}
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
:
|
|
212
|
-
|
|
213
|
-
const transcriptResponse = yield transcriptFetch({
|
|
214
|
-
url: transcriptURL,
|
|
215
|
-
lang: (_l = this.config) === null || _l === void 0 ? void 0 : _l.lang,
|
|
216
|
-
userAgent,
|
|
217
|
-
});
|
|
256
|
+
// 5) Fetch transcript XML using the same hook surface as before
|
|
257
|
+
const transcriptResponse = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.transcriptFetch)
|
|
258
|
+
? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
|
|
259
|
+
: yield defaultFetch({ url: transcriptURL, lang, userAgent });
|
|
218
260
|
if (!transcriptResponse.ok) {
|
|
261
|
+
// Preserve legacy behavior
|
|
262
|
+
if (transcriptResponse.status === 429) {
|
|
263
|
+
throw new YoutubeTranscriptTooManyRequestError();
|
|
264
|
+
}
|
|
219
265
|
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
220
266
|
}
|
|
221
267
|
const transcriptBody = yield transcriptResponse.text();
|
|
268
|
+
// 6) Parse XML into the existing TranscriptResponse shape
|
|
222
269
|
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
|
|
223
|
-
const transcript = results.map((
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
}
|
|
232
|
-
//
|
|
233
|
-
if (
|
|
234
|
-
|
|
270
|
+
const transcript = results.map((m) => ({
|
|
271
|
+
text: m[3],
|
|
272
|
+
duration: parseFloat(m[2]),
|
|
273
|
+
offset: parseFloat(m[1]),
|
|
274
|
+
lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
|
|
275
|
+
}));
|
|
276
|
+
if (transcript.length === 0) {
|
|
277
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
278
|
+
}
|
|
279
|
+
// Cache store
|
|
280
|
+
if (cache) {
|
|
281
|
+
try {
|
|
282
|
+
yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
|
|
283
|
+
}
|
|
284
|
+
catch (_p) {
|
|
285
|
+
// non-fatal
|
|
286
|
+
}
|
|
235
287
|
}
|
|
236
288
|
return transcript;
|
|
237
289
|
});
|
|
238
290
|
}
|
|
239
|
-
// Add static method for new usage pattern
|
|
240
291
|
static fetchTranscript(videoId, config) {
|
|
241
292
|
return __awaiter(this, void 0, void 0, function* () {
|
|
242
293
|
const instance = new YoutubeTranscript(config);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "youtube-transcript-plus",
|
|
3
|
-
"version": "1.0
|
|
3
|
+
"version": "1.1.0",
|
|
4
4
|
"description": "Fetch transcript from a YouTube video",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/youtube-transcript-plus.js",
|
|
@@ -29,23 +29,26 @@
|
|
|
29
29
|
]
|
|
30
30
|
},
|
|
31
31
|
"devDependencies": {
|
|
32
|
-
"@types/jest": "^
|
|
32
|
+
"@types/jest": "^30.0.0",
|
|
33
33
|
"https-proxy-agent": "^7.0.6",
|
|
34
34
|
"husky": "^9.1.7",
|
|
35
|
-
"jest": "^
|
|
36
|
-
"lint-staged": "^
|
|
37
|
-
"prettier": "^3.
|
|
38
|
-
"rollup": "^4.
|
|
35
|
+
"jest": "^30.0.5",
|
|
36
|
+
"lint-staged": "^16.1.5",
|
|
37
|
+
"prettier": "^3.6.2",
|
|
38
|
+
"rollup": "^4.46.4",
|
|
39
39
|
"rollup-plugin-typescript": "^1.0.1",
|
|
40
40
|
"rollup-plugin-typescript2": "^0.36.0",
|
|
41
|
-
"ts-jest": "^29.
|
|
41
|
+
"ts-jest": "^29.4.1",
|
|
42
42
|
"tslib": "^2.8.1",
|
|
43
|
-
"typescript": "^5.
|
|
43
|
+
"typescript": "^5.9.2"
|
|
44
44
|
},
|
|
45
45
|
"files": [
|
|
46
46
|
"dist/*"
|
|
47
47
|
],
|
|
48
|
-
"repository":
|
|
48
|
+
"repository": {
|
|
49
|
+
"type": "git",
|
|
50
|
+
"url": "git+https://github.com/ericmmartin/youtube-transcript-plus.git"
|
|
51
|
+
},
|
|
49
52
|
"publishConfig": {
|
|
50
53
|
"access": "public"
|
|
51
54
|
},
|