youtube-transcript-plus 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,4 +1,10 @@
1
1
  import { TranscriptConfig, TranscriptResponse } from './types';
2
+ /**
3
+ * Implementation notes:
4
+ * - Keeps the public surface identical.
5
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
6
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
7
+ */
2
8
  export declare class YoutubeTranscript {
3
9
  private config?;
4
10
  constructor(config?: TranscriptConfig & {
@@ -146,101 +146,148 @@ class InMemoryCache {
146
146
  }
147
147
  }
148
148
 
149
+ /**
150
+ * Implementation notes:
151
+ * - Keeps the public surface identical.
152
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
153
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
154
+ */
149
155
  class YoutubeTranscript {
150
156
  constructor(config) {
151
157
  this.config = config;
152
158
  }
153
159
  fetchTranscript(videoId) {
154
160
  return __awaiter(this, void 0, void 0, function* () {
155
- var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
161
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
156
162
  const identifier = retrieveVideoId(videoId);
157
- const userAgent = ((_a = this.config) === null || _a === void 0 ? void 0 : _a.userAgent) || DEFAULT_USER_AGENT;
158
- // Use custom fetch functions if provided, otherwise use defaultFetch
159
- const videoFetch = ((_b = this.config) === null || _b === void 0 ? void 0 : _b.videoFetch) || defaultFetch;
160
- const transcriptFetch = ((_c = this.config) === null || _c === void 0 ? void 0 : _c.transcriptFetch) || defaultFetch;
161
- // Cache key based on video ID and language
162
- const cacheKey = `transcript:${identifier}:${((_d = this.config) === null || _d === void 0 ? void 0 : _d.lang) || 'default'}`;
163
- // Check cache first
164
- if ((_e = this.config) === null || _e === void 0 ? void 0 : _e.cache) {
165
- const cachedTranscript = yield this.config.cache.get(cacheKey);
166
- if (cachedTranscript) {
167
- return JSON.parse(cachedTranscript);
163
+ const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
164
+ const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
165
+ // Cache lookup (if provided)
166
+ const cache = (_d = this.config) === null || _d === void 0 ? void 0 : _d.cache;
167
+ const cacheTTL = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cacheTTL;
168
+ const cacheKey = `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
169
+ if (cache) {
170
+ const cached = yield cache.get(cacheKey);
171
+ if (cached) {
172
+ try {
173
+ return JSON.parse(cached);
174
+ }
175
+ catch (_o) {
176
+ // ignore parse errors and continue
177
+ }
168
178
  }
169
179
  }
180
+ // 1) Fetch the watch page to extract an Innertube API key (no interface change)
181
+ // Decide protocol once and reuse
170
182
  const protocol = ((_f = this.config) === null || _f === void 0 ? void 0 : _f.disableHttps) ? 'http' : 'https';
171
- // Fetch the video page
172
- const videoPageResponse = yield videoFetch({
173
- url: `${protocol}://www.youtube.com/watch?v=${identifier}`,
174
- lang: (_g = this.config) === null || _g === void 0 ? void 0 : _g.lang,
175
- userAgent,
176
- });
183
+ const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
184
+ const videoPageResponse = ((_g = this.config) === null || _g === void 0 ? void 0 : _g.videoFetch)
185
+ ? yield this.config.videoFetch({ url: watchUrl, lang, userAgent })
186
+ : yield defaultFetch({ url: watchUrl, lang, userAgent });
177
187
  if (!videoPageResponse.ok) {
178
188
  throw new YoutubeTranscriptVideoUnavailableError(identifier);
179
189
  }
180
190
  const videoPageBody = yield videoPageResponse.text();
181
- // Parse the video page to extract captions
182
- const splittedHTML = videoPageBody.split('"captions":');
183
- if (splittedHTML.length <= 1) {
184
- if (videoPageBody.includes('class="g-recaptcha"')) {
185
- throw new YoutubeTranscriptTooManyRequestError();
186
- }
187
- if (!videoPageBody.includes('"playabilityStatus":')) {
188
- throw new YoutubeTranscriptVideoUnavailableError(identifier);
189
- }
190
- throw new YoutubeTranscriptDisabledError(identifier);
191
+ // Basic bot/recaptcha detection preserves old error behavior
192
+ if (videoPageBody.includes('class="g-recaptcha"')) {
193
+ throw new YoutubeTranscriptTooManyRequestError();
191
194
  }
192
- const captions = (_h = (() => {
193
- try {
194
- return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', ''));
195
- }
196
- catch (e) {
197
- return undefined;
195
+ // 2) Extract Innertube API key from the page
196
+ const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
197
+ videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
198
+ if (!apiKeyMatch) {
199
+ // If captions JSON wasn't present previously and we also can't find an API key,
200
+ // retain the disabled semantics for compatibility.
201
+ throw new YoutubeTranscriptNotAvailableError(identifier);
202
+ }
203
+ const apiKey = apiKeyMatch[1];
204
+ // 3) Call Innertube player as ANDROID client to retrieve captionTracks
205
+ const playerEndpoint = `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
206
+ const playerBody = {
207
+ context: {
208
+ client: {
209
+ clientName: 'ANDROID',
210
+ clientVersion: '20.10.38',
211
+ },
212
+ },
213
+ videoId: identifier,
214
+ };
215
+ // Use global fetch for the POST. No public interface change.
216
+ const playerRes = yield fetch(playerEndpoint, {
217
+ method: 'POST',
218
+ headers: Object.assign({ 'Content-Type': 'application/json', 'User-Agent': userAgent }, (lang ? { 'Accept-Language': lang } : {})),
219
+ body: JSON.stringify(playerBody),
220
+ });
221
+ if (!playerRes.ok) {
222
+ throw new YoutubeTranscriptVideoUnavailableError(identifier);
223
+ }
224
+ const playerJson = yield playerRes.json();
225
+ const tracklist = (_j = (_h = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _h === void 0 ? void 0 : _h.playerCaptionsTracklistRenderer) !== null && _j !== void 0 ? _j : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
226
+ const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
227
+ const isPlayableOk = ((_k = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _k === void 0 ? void 0 : _k.status) === 'OK';
228
+ // If `captions` is entirely missing, treat as "not available"
229
+ if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
230
+ // If video is playable but captions aren’t provided, treat as "disabled"
231
+ if (isPlayableOk) {
232
+ throw new YoutubeTranscriptDisabledError(identifier);
198
233
  }
199
- })()) === null || _h === void 0 ? void 0 : _h['playerCaptionsTracklistRenderer'];
200
- if (!captions) {
234
+ // Otherwise we can’t assert they’re disabled; treat as "not available"
235
+ throw new YoutubeTranscriptNotAvailableError(identifier);
236
+ }
237
+ // If `captions` exists but there are zero tracks, treat as "disabled"
238
+ if (!Array.isArray(tracks) || tracks.length === 0) {
201
239
  throw new YoutubeTranscriptDisabledError(identifier);
202
240
  }
203
- if (!('captionTracks' in captions)) {
241
+ // Respect requested language or fallback to first track
242
+ const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
243
+ if (!selectedTrack) {
244
+ const available = tracks.map((t) => t.languageCode).filter(Boolean);
245
+ throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
246
+ }
247
+ // 4) Build transcript URL; prefer XML by stripping fmt if present
248
+ let transcriptURL = selectedTrack.baseUrl || selectedTrack.url;
249
+ if (!transcriptURL) {
204
250
  throw new YoutubeTranscriptNotAvailableError(identifier);
205
251
  }
206
- if (((_j = this.config) === null || _j === void 0 ? void 0 : _j.lang) &&
207
- !captions.captionTracks.some((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })) {
208
- throw new YoutubeTranscriptNotAvailableLanguageError((_k = this.config) === null || _k === void 0 ? void 0 : _k.lang, captions.captionTracks.map((track) => track.languageCode), identifier);
252
+ transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
253
+ if ((_l = this.config) === null || _l === void 0 ? void 0 : _l.disableHttps) {
254
+ transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
209
255
  }
210
- const captionURL = (((_l = this.config) === null || _l === void 0 ? void 0 : _l.lang)
211
- ? captions.captionTracks.find((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })
212
- : captions.captionTracks[0]).baseUrl;
213
- const transcriptURL = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.disableHttps)
214
- ? captionURL.replace('https://', 'http://')
215
- : captionURL;
216
- // Fetch the transcript
217
- const transcriptResponse = yield transcriptFetch({
218
- url: transcriptURL,
219
- lang: (_o = this.config) === null || _o === void 0 ? void 0 : _o.lang,
220
- userAgent,
221
- });
256
+ // 5) Fetch transcript XML using the same hook surface as before
257
+ const transcriptResponse = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.transcriptFetch)
258
+ ? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
259
+ : yield defaultFetch({ url: transcriptURL, lang, userAgent });
222
260
  if (!transcriptResponse.ok) {
261
+ // Preserve legacy behavior
262
+ if (transcriptResponse.status === 429) {
263
+ throw new YoutubeTranscriptTooManyRequestError();
264
+ }
223
265
  throw new YoutubeTranscriptNotAvailableError(identifier);
224
266
  }
225
267
  const transcriptBody = yield transcriptResponse.text();
268
+ // 6) Parse XML into the existing TranscriptResponse shape
226
269
  const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
227
- const transcript = results.map((result) => {
228
- var _a, _b;
229
- return ({
230
- text: result[3],
231
- duration: parseFloat(result[2]),
232
- offset: parseFloat(result[1]),
233
- lang: (_b = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang) !== null && _b !== void 0 ? _b : captions.captionTracks[0].languageCode,
234
- });
235
- });
236
- // Store in cache if a strategy is provided
237
- if ((_p = this.config) === null || _p === void 0 ? void 0 : _p.cache) {
238
- yield this.config.cache.set(cacheKey, JSON.stringify(transcript), this.config.cacheTTL);
270
+ const transcript = results.map((m) => ({
271
+ text: m[3],
272
+ duration: parseFloat(m[2]),
273
+ offset: parseFloat(m[1]),
274
+ lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
275
+ }));
276
+ if (transcript.length === 0) {
277
+ throw new YoutubeTranscriptNotAvailableError(identifier);
278
+ }
279
+ // Cache store
280
+ if (cache) {
281
+ try {
282
+ yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
283
+ }
284
+ catch (_p) {
285
+ // non-fatal
286
+ }
239
287
  }
240
288
  return transcript;
241
289
  });
242
290
  }
243
- // Add static method for new usage pattern
244
291
  static fetchTranscript(videoId, config) {
245
292
  return __awaiter(this, void 0, void 0, function* () {
246
293
  const instance = new YoutubeTranscript(config);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "youtube-transcript-plus",
3
- "version": "1.0.4",
3
+ "version": "1.1.0",
4
4
  "description": "Fetch transcript from a YouTube video",
5
5
  "type": "module",
6
6
  "main": "dist/youtube-transcript-plus.js",
@@ -29,23 +29,26 @@
29
29
  ]
30
30
  },
31
31
  "devDependencies": {
32
- "@types/jest": "^29.5.14",
32
+ "@types/jest": "^30.0.0",
33
33
  "https-proxy-agent": "^7.0.6",
34
34
  "husky": "^9.1.7",
35
- "jest": "^29.7.0",
36
- "lint-staged": "^15.5.0",
37
- "prettier": "^3.5.3",
38
- "rollup": "^4.37.0",
35
+ "jest": "^30.0.5",
36
+ "lint-staged": "^16.1.5",
37
+ "prettier": "^3.6.2",
38
+ "rollup": "^4.46.4",
39
39
  "rollup-plugin-typescript": "^1.0.1",
40
40
  "rollup-plugin-typescript2": "^0.36.0",
41
- "ts-jest": "^29.3.0",
41
+ "ts-jest": "^29.4.1",
42
42
  "tslib": "^2.8.1",
43
- "typescript": "^5.8.2"
43
+ "typescript": "^5.9.2"
44
44
  },
45
45
  "files": [
46
46
  "dist/*"
47
47
  ],
48
- "repository": "https://github.com/ericmmartin/youtube-transcript-plus.git",
48
+ "repository": {
49
+ "type": "git",
50
+ "url": "git+https://github.com/ericmmartin/youtube-transcript-plus.git"
51
+ },
49
52
  "publishConfig": {
50
53
  "access": "public"
51
54
  },