npm - youtube-transcript-plus - Versions diffs - 1.0.3 → 1.1.0 - Mend

youtube-transcript-plus 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md +15 -0
package/dist/index.d.ts +6 -0
package/dist/types.d.ts +1 -0
package/dist/youtube-transcript-plus.js +112 -61
package/package.json +12 -9

package/README.md CHANGED Viewed

@@ -42,6 +42,20 @@ fetchTranscript('videoId_or_URL', {
   .catch(console.error);
 ```
+### HTTP Support
+You can disable HTTPS and use HTTP instead for YouTube requests by setting the `disableHttps` option to `true`. This might be necessary in certain environments where HTTPS connections are restricted.
+```javascript
+fetchTranscript('videoId_or_URL', {
+  disableHttps: true, // Use HTTP instead of HTTPS
+})
+  .then(console.log)
+  .catch(console.error);
+```
+**Security Warning:** Using HTTP instead of HTTPS removes transport layer security and is not recommended for production environments. Only use this option when absolutely necessary.
 ### Custom Fetch Functions
 You can inject custom `videoFetch` and `transcriptFetch` functions to modify the fetch behavior, such as using a proxy or custom headers.
@@ -188,6 +202,7 @@ Fetches the transcript for a YouTube video.
   - **`userAgent`**: Custom User-Agent string.
   - **`cache`**: Custom caching strategy.
   - **`cacheTTL`**: Time-to-live for cache entries in milliseconds.
+  - **`disableHttps`**: Set to `true` to use HTTP instead of HTTPS for YouTube requests.
   - **`videoFetch`**: Custom fetch function for the video page request.
   - **`transcriptFetch`**: Custom fetch function for the transcript request.

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,10 @@
 import { TranscriptConfig, TranscriptResponse } from './types';
+/**
+ * Implementation notes:
+ * - Keeps the public surface identical.
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
+ */
 export declare class YoutubeTranscript {
     private config?;
     constructor(config?: TranscriptConfig & {

package/dist/types.d.ts CHANGED Viewed

@@ -7,6 +7,7 @@ export interface TranscriptConfig {
     userAgent?: string;
     cache?: CacheStrategy;
     cacheTTL?: number;
+    disableHttps?: boolean;
     videoFetch?: (params: {
         url: string;
         lang?: string;

package/dist/youtube-transcript-plus.js CHANGED Viewed

@@ -146,6 +146,12 @@ class InMemoryCache {
     }
 }
+/**
+ * Implementation notes:
+ * - Keeps the public surface identical.
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
+ */
 class YoutubeTranscript {
     constructor(config) {
         this.config = config;
@@ -154,89 +160,134 @@ class YoutubeTranscript {
         return __awaiter(this, void 0, void 0, function* () {
             var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
             const identifier = retrieveVideoId(videoId);
-            const userAgent = ((_a = this.config) === null || _a === void 0 ? void 0 : _a.userAgent) || DEFAULT_USER_AGENT;
-            // Use custom fetch functions if provided, otherwise use defaultFetch
-            const videoFetch = ((_b = this.config) === null || _b === void 0 ? void 0 : _b.videoFetch) || defaultFetch;
-            const transcriptFetch = ((_c = this.config) === null || _c === void 0 ? void 0 : _c.transcriptFetch) || defaultFetch;
-            // Cache key based on video ID and language
-            const cacheKey = `transcript:${identifier}:${((_d = this.config) === null || _d === void 0 ? void 0 : _d.lang) || 'default'}`;
-            // Check cache first
-            if ((_e = this.config) === null || _e === void 0 ? void 0 : _e.cache) {
-                const cachedTranscript = yield this.config.cache.get(cacheKey);
-                if (cachedTranscript) {
-                    return JSON.parse(cachedTranscript);
+            const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
+            const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
+            // Cache lookup (if provided)
+            const cache = (_d = this.config) === null || _d === void 0 ? void 0 : _d.cache;
+            const cacheTTL = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cacheTTL;
+            const cacheKey = `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
+            if (cache) {
+                const cached = yield cache.get(cacheKey);
+                if (cached) {
+                    try {
+                        return JSON.parse(cached);
+                    }
+                    catch (_o) {
+                        // ignore parse errors and continue
+                    }
                 }
             }
-            // Fetch the video page
-            const videoPageResponse = yield videoFetch({
-                url: `https://www.youtube.com/watch?v=${identifier}`,
-                lang: (_f = this.config) === null || _f === void 0 ? void 0 : _f.lang,
-                userAgent,
-            });
+            // 1) Fetch the watch page to extract an Innertube API key (no interface change)
+            // Decide protocol once and reuse
+            const protocol = ((_f = this.config) === null || _f === void 0 ? void 0 : _f.disableHttps) ? 'http' : 'https';
+            const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
+            const videoPageResponse = ((_g = this.config) === null || _g === void 0 ? void 0 : _g.videoFetch)
+                ? yield this.config.videoFetch({ url: watchUrl, lang, userAgent })
+                : yield defaultFetch({ url: watchUrl, lang, userAgent });
             if (!videoPageResponse.ok) {
                 throw new YoutubeTranscriptVideoUnavailableError(identifier);
             }
             const videoPageBody = yield videoPageResponse.text();
-            // Parse the video page to extract captions
-            const splittedHTML = videoPageBody.split('"captions":');
-            if (splittedHTML.length <= 1) {
-                if (videoPageBody.includes('class="g-recaptcha"')) {
-                    throw new YoutubeTranscriptTooManyRequestError();
-                }
-                if (!videoPageBody.includes('"playabilityStatus":')) {
-                    throw new YoutubeTranscriptVideoUnavailableError(identifier);
-                }
-                throw new YoutubeTranscriptDisabledError(identifier);
+            // Basic bot/recaptcha detection preserves old error behavior
+            if (videoPageBody.includes('class="g-recaptcha"')) {
+                throw new YoutubeTranscriptTooManyRequestError();
             }
-            const captions = (_g = (() => {
-                try {
-                    return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', ''));
-                }
-                catch (e) {
-                    return undefined;
+            // 2) Extract Innertube API key from the page
+            const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
+                videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
+            if (!apiKeyMatch) {
+                // If captions JSON wasn't present previously and we also can't find an API key,
+                // retain the disabled semantics for compatibility.
+                throw new YoutubeTranscriptNotAvailableError(identifier);
+            }
+            const apiKey = apiKeyMatch[1];
+            // 3) Call Innertube player as ANDROID client to retrieve captionTracks
+            const playerEndpoint = `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
+            const playerBody = {
+                context: {
+                    client: {
+                        clientName: 'ANDROID',
+                        clientVersion: '20.10.38',
+                    },
+                },
+                videoId: identifier,
+            };
+            // Use global fetch for the POST. No public interface change.
+            const playerRes = yield fetch(playerEndpoint, {
+                method: 'POST',
+                headers: Object.assign({ 'Content-Type': 'application/json', 'User-Agent': userAgent }, (lang ? { 'Accept-Language': lang } : {})),
+                body: JSON.stringify(playerBody),
+            });
+            if (!playerRes.ok) {
+                throw new YoutubeTranscriptVideoUnavailableError(identifier);
+            }
+            const playerJson = yield playerRes.json();
+            const tracklist = (_j = (_h = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _h === void 0 ? void 0 : _h.playerCaptionsTracklistRenderer) !== null && _j !== void 0 ? _j : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
+            const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
+            const isPlayableOk = ((_k = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _k === void 0 ? void 0 : _k.status) === 'OK';
+            // If `captions` is entirely missing, treat as "not available"
+            if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
+                // If video is playable but captions aren’t provided, treat as "disabled"
+                if (isPlayableOk) {
+                    throw new YoutubeTranscriptDisabledError(identifier);
                 }
-            })()) === null || _g === void 0 ? void 0 : _g['playerCaptionsTracklistRenderer'];
-            if (!captions) {
+                // Otherwise we can’t assert they’re disabled; treat as "not available"
+                throw new YoutubeTranscriptNotAvailableError(identifier);
+            }
+            // If `captions` exists but there are zero tracks, treat as "disabled"
+            if (!Array.isArray(tracks) || tracks.length === 0) {
                 throw new YoutubeTranscriptDisabledError(identifier);
             }
-            if (!('captionTracks' in captions)) {
+            // Respect requested language or fallback to first track
+            const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
+            if (!selectedTrack) {
+                const available = tracks.map((t) => t.languageCode).filter(Boolean);
+                throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
+            }
+            // 4) Build transcript URL; prefer XML by stripping fmt if present
+            let transcriptURL = selectedTrack.baseUrl || selectedTrack.url;
+            if (!transcriptURL) {
                 throw new YoutubeTranscriptNotAvailableError(identifier);
             }
-            if (((_h = this.config) === null || _h === void 0 ? void 0 : _h.lang) &&
-                !captions.captionTracks.some((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })) {
-                throw new YoutubeTranscriptNotAvailableLanguageError((_j = this.config) === null || _j === void 0 ? void 0 : _j.lang, captions.captionTracks.map((track) => track.languageCode), identifier);
+            transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
+            if ((_l = this.config) === null || _l === void 0 ? void 0 : _l.disableHttps) {
+                transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
             }
-            const transcriptURL = (((_k = this.config) === null || _k === void 0 ? void 0 : _k.lang)
-                ? captions.captionTracks.find((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })
-                : captions.captionTracks[0]).baseUrl;
-            // Fetch the transcript
-            const transcriptResponse = yield transcriptFetch({
-                url: transcriptURL,
-                lang: (_l = this.config) === null || _l === void 0 ? void 0 : _l.lang,
-                userAgent,
-            });
+            // 5) Fetch transcript XML using the same hook surface as before
+            const transcriptResponse = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.transcriptFetch)
+                ? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
+                : yield defaultFetch({ url: transcriptURL, lang, userAgent });
             if (!transcriptResponse.ok) {
+                // Preserve legacy behavior
+                if (transcriptResponse.status === 429) {
+                    throw new YoutubeTranscriptTooManyRequestError();
+                }
                 throw new YoutubeTranscriptNotAvailableError(identifier);
             }
             const transcriptBody = yield transcriptResponse.text();
+            // 6) Parse XML into the existing TranscriptResponse shape
             const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
-            const transcript = results.map((result) => {
-                var _a, _b;
-                return ({
-                    text: result[3],
-                    duration: parseFloat(result[2]),
-                    offset: parseFloat(result[1]),
-                    lang: (_b = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang) !== null && _b !== void 0 ? _b : captions.captionTracks[0].languageCode,
-                });
-            });
-            // Store in cache if a strategy is provided
-            if ((_m = this.config) === null || _m === void 0 ? void 0 : _m.cache) {
-                yield this.config.cache.set(cacheKey, JSON.stringify(transcript), this.config.cacheTTL);
+            const transcript = results.map((m) => ({
+                text: m[3],
+                duration: parseFloat(m[2]),
+                offset: parseFloat(m[1]),
+                lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
+            }));
+            if (transcript.length === 0) {
+                throw new YoutubeTranscriptNotAvailableError(identifier);
+            }
+            // Cache store
+            if (cache) {
+                try {
+                    yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
+                }
+                catch (_p) {
+                    // non-fatal
+                }
             }
             return transcript;
         });
     }
-    // Add static method for new usage pattern
     static fetchTranscript(videoId, config) {
         return __awaiter(this, void 0, void 0, function* () {
             const instance = new YoutubeTranscript(config);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "youtube-transcript-plus",
-  "version": "1.0.3",
+  "version": "1.1.0",
   "description": "Fetch transcript from a YouTube video",
   "type": "module",
   "main": "dist/youtube-transcript-plus.js",
@@ -29,23 +29,26 @@
     ]
   },
   "devDependencies": {
-    "@types/jest": "^29.5.14",
+    "@types/jest": "^30.0.0",
     "https-proxy-agent": "^7.0.6",
     "husky": "^9.1.7",
-    "jest": "^29.7.0",
-    "lint-staged": "^15.5.0",
-    "prettier": "^3.5.3",
-    "rollup": "^4.37.0",
+    "jest": "^30.0.5",
+    "lint-staged": "^16.1.5",
+    "prettier": "^3.6.2",
+    "rollup": "^4.46.4",
     "rollup-plugin-typescript": "^1.0.1",
     "rollup-plugin-typescript2": "^0.36.0",
-    "ts-jest": "^29.3.0",
+    "ts-jest": "^29.4.1",
     "tslib": "^2.8.1",
-    "typescript": "^5.8.2"
+    "typescript": "^5.9.2"
   },
   "files": [
     "dist/*"
   ],
-  "repository": "https://github.com/ericmmartin/youtube-transcript-plus.git",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ericmmartin/youtube-transcript-plus.git"
+  },
   "publishConfig": {
     "access": "public"
   },