youtube-transcript-plus 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -42,6 +42,20 @@ fetchTranscript('videoId_or_URL', {
42
42
  .catch(console.error);
43
43
  ```
44
44
 
45
+ ### HTTP Support
46
+
47
+ You can disable HTTPS and use HTTP instead for YouTube requests by setting the `disableHttps` option to `true`. This might be necessary in certain environments where HTTPS connections are restricted.
48
+
49
+ ```javascript
50
+ fetchTranscript('videoId_or_URL', {
51
+ disableHttps: true, // Use HTTP instead of HTTPS
52
+ })
53
+ .then(console.log)
54
+ .catch(console.error);
55
+ ```
56
+
57
+ **Security Warning:** Using HTTP instead of HTTPS removes transport layer security and is not recommended for production environments. Only use this option when absolutely necessary.
58
+
45
59
  ### Custom Fetch Functions
46
60
 
47
61
  You can inject custom `videoFetch` and `transcriptFetch` functions to modify the fetch behavior, such as using a proxy or custom headers.
@@ -188,6 +202,7 @@ Fetches the transcript for a YouTube video.
188
202
  - **`userAgent`**: Custom User-Agent string.
189
203
  - **`cache`**: Custom caching strategy.
190
204
  - **`cacheTTL`**: Time-to-live for cache entries in milliseconds.
205
+ - **`disableHttps`**: Set to `true` to use HTTP instead of HTTPS for YouTube requests.
191
206
  - **`videoFetch`**: Custom fetch function for the video page request.
192
207
  - **`transcriptFetch`**: Custom fetch function for the transcript request.
193
208
 
package/dist/index.d.ts CHANGED
@@ -1,4 +1,10 @@
1
1
  import { TranscriptConfig, TranscriptResponse } from './types';
2
+ /**
3
+ * Implementation notes:
4
+ * - Keeps the public surface identical.
5
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
6
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
7
+ */
2
8
  export declare class YoutubeTranscript {
3
9
  private config?;
4
10
  constructor(config?: TranscriptConfig & {
package/dist/types.d.ts CHANGED
@@ -7,6 +7,7 @@ export interface TranscriptConfig {
7
7
  userAgent?: string;
8
8
  cache?: CacheStrategy;
9
9
  cacheTTL?: number;
10
+ disableHttps?: boolean;
10
11
  videoFetch?: (params: {
11
12
  url: string;
12
13
  lang?: string;
@@ -146,6 +146,12 @@ class InMemoryCache {
146
146
  }
147
147
  }
148
148
 
149
+ /**
150
+ * Implementation notes:
151
+ * - Keeps the public surface identical.
152
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
153
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
154
+ */
149
155
  class YoutubeTranscript {
150
156
  constructor(config) {
151
157
  this.config = config;
@@ -154,89 +160,134 @@ class YoutubeTranscript {
154
160
  return __awaiter(this, void 0, void 0, function* () {
155
161
  var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
156
162
  const identifier = retrieveVideoId(videoId);
157
- const userAgent = ((_a = this.config) === null || _a === void 0 ? void 0 : _a.userAgent) || DEFAULT_USER_AGENT;
158
- // Use custom fetch functions if provided, otherwise use defaultFetch
159
- const videoFetch = ((_b = this.config) === null || _b === void 0 ? void 0 : _b.videoFetch) || defaultFetch;
160
- const transcriptFetch = ((_c = this.config) === null || _c === void 0 ? void 0 : _c.transcriptFetch) || defaultFetch;
161
- // Cache key based on video ID and language
162
- const cacheKey = `transcript:${identifier}:${((_d = this.config) === null || _d === void 0 ? void 0 : _d.lang) || 'default'}`;
163
- // Check cache first
164
- if ((_e = this.config) === null || _e === void 0 ? void 0 : _e.cache) {
165
- const cachedTranscript = yield this.config.cache.get(cacheKey);
166
- if (cachedTranscript) {
167
- return JSON.parse(cachedTranscript);
163
+ const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
164
+ const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
165
+ // Cache lookup (if provided)
166
+ const cache = (_d = this.config) === null || _d === void 0 ? void 0 : _d.cache;
167
+ const cacheTTL = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cacheTTL;
168
+ const cacheKey = `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
169
+ if (cache) {
170
+ const cached = yield cache.get(cacheKey);
171
+ if (cached) {
172
+ try {
173
+ return JSON.parse(cached);
174
+ }
175
+ catch (_o) {
176
+ // ignore parse errors and continue
177
+ }
168
178
  }
169
179
  }
170
- // Fetch the video page
171
- const videoPageResponse = yield videoFetch({
172
- url: `https://www.youtube.com/watch?v=${identifier}`,
173
- lang: (_f = this.config) === null || _f === void 0 ? void 0 : _f.lang,
174
- userAgent,
175
- });
180
+ // 1) Fetch the watch page to extract an Innertube API key (no interface change)
181
+ // Decide protocol once and reuse
182
+ const protocol = ((_f = this.config) === null || _f === void 0 ? void 0 : _f.disableHttps) ? 'http' : 'https';
183
+ const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
184
+ const videoPageResponse = ((_g = this.config) === null || _g === void 0 ? void 0 : _g.videoFetch)
185
+ ? yield this.config.videoFetch({ url: watchUrl, lang, userAgent })
186
+ : yield defaultFetch({ url: watchUrl, lang, userAgent });
176
187
  if (!videoPageResponse.ok) {
177
188
  throw new YoutubeTranscriptVideoUnavailableError(identifier);
178
189
  }
179
190
  const videoPageBody = yield videoPageResponse.text();
180
- // Parse the video page to extract captions
181
- const splittedHTML = videoPageBody.split('"captions":');
182
- if (splittedHTML.length <= 1) {
183
- if (videoPageBody.includes('class="g-recaptcha"')) {
184
- throw new YoutubeTranscriptTooManyRequestError();
185
- }
186
- if (!videoPageBody.includes('"playabilityStatus":')) {
187
- throw new YoutubeTranscriptVideoUnavailableError(identifier);
188
- }
189
- throw new YoutubeTranscriptDisabledError(identifier);
191
+ // Basic bot/recaptcha detection preserves old error behavior
192
+ if (videoPageBody.includes('class="g-recaptcha"')) {
193
+ throw new YoutubeTranscriptTooManyRequestError();
190
194
  }
191
- const captions = (_g = (() => {
192
- try {
193
- return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', ''));
194
- }
195
- catch (e) {
196
- return undefined;
195
+ // 2) Extract Innertube API key from the page
196
+ const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
197
+ videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
198
+ if (!apiKeyMatch) {
199
+ // If captions JSON wasn't present previously and we also can't find an API key,
200
+ // retain the disabled semantics for compatibility.
201
+ throw new YoutubeTranscriptNotAvailableError(identifier);
202
+ }
203
+ const apiKey = apiKeyMatch[1];
204
+ // 3) Call Innertube player as ANDROID client to retrieve captionTracks
205
+ const playerEndpoint = `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
206
+ const playerBody = {
207
+ context: {
208
+ client: {
209
+ clientName: 'ANDROID',
210
+ clientVersion: '20.10.38',
211
+ },
212
+ },
213
+ videoId: identifier,
214
+ };
215
+ // Use global fetch for the POST. No public interface change.
216
+ const playerRes = yield fetch(playerEndpoint, {
217
+ method: 'POST',
218
+ headers: Object.assign({ 'Content-Type': 'application/json', 'User-Agent': userAgent }, (lang ? { 'Accept-Language': lang } : {})),
219
+ body: JSON.stringify(playerBody),
220
+ });
221
+ if (!playerRes.ok) {
222
+ throw new YoutubeTranscriptVideoUnavailableError(identifier);
223
+ }
224
+ const playerJson = yield playerRes.json();
225
+ const tracklist = (_j = (_h = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _h === void 0 ? void 0 : _h.playerCaptionsTracklistRenderer) !== null && _j !== void 0 ? _j : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
226
+ const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
227
+ const isPlayableOk = ((_k = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _k === void 0 ? void 0 : _k.status) === 'OK';
228
+ // If `captions` is entirely missing, treat as "not available"
229
+ if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
230
+ // If video is playable but captions aren’t provided, treat as "disabled"
231
+ if (isPlayableOk) {
232
+ throw new YoutubeTranscriptDisabledError(identifier);
197
233
  }
198
- })()) === null || _g === void 0 ? void 0 : _g['playerCaptionsTracklistRenderer'];
199
- if (!captions) {
234
+ // Otherwise we can’t assert they’re disabled; treat as "not available"
235
+ throw new YoutubeTranscriptNotAvailableError(identifier);
236
+ }
237
+ // If `captions` exists but there are zero tracks, treat as "disabled"
238
+ if (!Array.isArray(tracks) || tracks.length === 0) {
200
239
  throw new YoutubeTranscriptDisabledError(identifier);
201
240
  }
202
- if (!('captionTracks' in captions)) {
241
+ // Respect requested language or fallback to first track
242
+ const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
243
+ if (!selectedTrack) {
244
+ const available = tracks.map((t) => t.languageCode).filter(Boolean);
245
+ throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
246
+ }
247
+ // 4) Build transcript URL; prefer XML by stripping fmt if present
248
+ let transcriptURL = selectedTrack.baseUrl || selectedTrack.url;
249
+ if (!transcriptURL) {
203
250
  throw new YoutubeTranscriptNotAvailableError(identifier);
204
251
  }
205
- if (((_h = this.config) === null || _h === void 0 ? void 0 : _h.lang) &&
206
- !captions.captionTracks.some((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })) {
207
- throw new YoutubeTranscriptNotAvailableLanguageError((_j = this.config) === null || _j === void 0 ? void 0 : _j.lang, captions.captionTracks.map((track) => track.languageCode), identifier);
252
+ transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
253
+ if ((_l = this.config) === null || _l === void 0 ? void 0 : _l.disableHttps) {
254
+ transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
208
255
  }
209
- const transcriptURL = (((_k = this.config) === null || _k === void 0 ? void 0 : _k.lang)
210
- ? captions.captionTracks.find((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })
211
- : captions.captionTracks[0]).baseUrl;
212
- // Fetch the transcript
213
- const transcriptResponse = yield transcriptFetch({
214
- url: transcriptURL,
215
- lang: (_l = this.config) === null || _l === void 0 ? void 0 : _l.lang,
216
- userAgent,
217
- });
256
+ // 5) Fetch transcript XML using the same hook surface as before
257
+ const transcriptResponse = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.transcriptFetch)
258
+ ? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
259
+ : yield defaultFetch({ url: transcriptURL, lang, userAgent });
218
260
  if (!transcriptResponse.ok) {
261
+ // Preserve legacy behavior
262
+ if (transcriptResponse.status === 429) {
263
+ throw new YoutubeTranscriptTooManyRequestError();
264
+ }
219
265
  throw new YoutubeTranscriptNotAvailableError(identifier);
220
266
  }
221
267
  const transcriptBody = yield transcriptResponse.text();
268
+ // 6) Parse XML into the existing TranscriptResponse shape
222
269
  const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
223
- const transcript = results.map((result) => {
224
- var _a, _b;
225
- return ({
226
- text: result[3],
227
- duration: parseFloat(result[2]),
228
- offset: parseFloat(result[1]),
229
- lang: (_b = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang) !== null && _b !== void 0 ? _b : captions.captionTracks[0].languageCode,
230
- });
231
- });
232
- // Store in cache if a strategy is provided
233
- if ((_m = this.config) === null || _m === void 0 ? void 0 : _m.cache) {
234
- yield this.config.cache.set(cacheKey, JSON.stringify(transcript), this.config.cacheTTL);
270
+ const transcript = results.map((m) => ({
271
+ text: m[3],
272
+ duration: parseFloat(m[2]),
273
+ offset: parseFloat(m[1]),
274
+ lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
275
+ }));
276
+ if (transcript.length === 0) {
277
+ throw new YoutubeTranscriptNotAvailableError(identifier);
278
+ }
279
+ // Cache store
280
+ if (cache) {
281
+ try {
282
+ yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
283
+ }
284
+ catch (_p) {
285
+ // non-fatal
286
+ }
235
287
  }
236
288
  return transcript;
237
289
  });
238
290
  }
239
- // Add static method for new usage pattern
240
291
  static fetchTranscript(videoId, config) {
241
292
  return __awaiter(this, void 0, void 0, function* () {
242
293
  const instance = new YoutubeTranscript(config);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "youtube-transcript-plus",
3
- "version": "1.0.3",
3
+ "version": "1.1.0",
4
4
  "description": "Fetch transcript from a YouTube video",
5
5
  "type": "module",
6
6
  "main": "dist/youtube-transcript-plus.js",
@@ -29,23 +29,26 @@
29
29
  ]
30
30
  },
31
31
  "devDependencies": {
32
- "@types/jest": "^29.5.14",
32
+ "@types/jest": "^30.0.0",
33
33
  "https-proxy-agent": "^7.0.6",
34
34
  "husky": "^9.1.7",
35
- "jest": "^29.7.0",
36
- "lint-staged": "^15.5.0",
37
- "prettier": "^3.5.3",
38
- "rollup": "^4.37.0",
35
+ "jest": "^30.0.5",
36
+ "lint-staged": "^16.1.5",
37
+ "prettier": "^3.6.2",
38
+ "rollup": "^4.46.4",
39
39
  "rollup-plugin-typescript": "^1.0.1",
40
40
  "rollup-plugin-typescript2": "^0.36.0",
41
- "ts-jest": "^29.3.0",
41
+ "ts-jest": "^29.4.1",
42
42
  "tslib": "^2.8.1",
43
- "typescript": "^5.8.2"
43
+ "typescript": "^5.9.2"
44
44
  },
45
45
  "files": [
46
46
  "dist/*"
47
47
  ],
48
- "repository": "https://github.com/ericmmartin/youtube-transcript-plus.git",
48
+ "repository": {
49
+ "type": "git",
50
+ "url": "git+https://github.com/ericmmartin/youtube-transcript-plus.git"
51
+ },
49
52
  "publishConfig": {
50
53
  "access": "public"
51
54
  },