npm - youtube-transcript-plus - Versions diffs - 1.0.4 → 1.1.0 - Mend

youtube-transcript-plus 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/index.d.ts +6 -0
package/dist/youtube-transcript-plus.js +112 -65
package/package.json +12 -9

package/dist/index.d.ts CHANGED Viewed

@@ -1,4 +1,10 @@
 import { TranscriptConfig, TranscriptResponse } from './types';
+/**
+ * Implementation notes:
+ * - Keeps the public surface identical.
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
+ */
 export declare class YoutubeTranscript {
     private config?;
     constructor(config?: TranscriptConfig & {

package/dist/youtube-transcript-plus.js CHANGED Viewed

@@ -146,101 +146,148 @@ class InMemoryCache {
     }
 }
+/**
+ * Implementation notes:
+ * - Keeps the public surface identical.
+ * - Internals now use YouTube Innertube `player` to discover captionTracks instead of scraping the watch HTML.
+ * - Honors `lang`, custom fetch hooks (`videoFetch`, `transcriptFetch`), and optional cache strategy.
+ */
 class YoutubeTranscript {
     constructor(config) {
         this.config = config;
     }
     fetchTranscript(videoId) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o, _p;
+            var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
             const identifier = retrieveVideoId(videoId);
-            const userAgent = ((_a = this.config) === null || _a === void 0 ? void 0 : _a.userAgent) || DEFAULT_USER_AGENT;
-            // Use custom fetch functions if provided, otherwise use defaultFetch
-            const videoFetch = ((_b = this.config) === null || _b === void 0 ? void 0 : _b.videoFetch) || defaultFetch;
-            const transcriptFetch = ((_c = this.config) === null || _c === void 0 ? void 0 : _c.transcriptFetch) || defaultFetch;
-            // Cache key based on video ID and language
-            const cacheKey = `transcript:${identifier}:${((_d = this.config) === null || _d === void 0 ? void 0 : _d.lang) || 'default'}`;
-            // Check cache first
-            if ((_e = this.config) === null || _e === void 0 ? void 0 : _e.cache) {
-                const cachedTranscript = yield this.config.cache.get(cacheKey);
-                if (cachedTranscript) {
-                    return JSON.parse(cachedTranscript);
+            const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
+            const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
+            // Cache lookup (if provided)
+            const cache = (_d = this.config) === null || _d === void 0 ? void 0 : _d.cache;
+            const cacheTTL = (_e = this.config) === null || _e === void 0 ? void 0 : _e.cacheTTL;
+            const cacheKey = `yt:transcript:${identifier}:${lang !== null && lang !== void 0 ? lang : ''}`;
+            if (cache) {
+                const cached = yield cache.get(cacheKey);
+                if (cached) {
+                    try {
+                        return JSON.parse(cached);
+                    }
+                    catch (_o) {
+                        // ignore parse errors and continue
+                    }
                 }
             }
+            // 1) Fetch the watch page to extract an Innertube API key (no interface change)
+            // Decide protocol once and reuse
             const protocol = ((_f = this.config) === null || _f === void 0 ? void 0 : _f.disableHttps) ? 'http' : 'https';
-            // Fetch the video page
-            const videoPageResponse = yield videoFetch({
-                url: `${protocol}://www.youtube.com/watch?v=${identifier}`,
-                lang: (_g = this.config) === null || _g === void 0 ? void 0 : _g.lang,
-                userAgent,
-            });
+            const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
+            const videoPageResponse = ((_g = this.config) === null || _g === void 0 ? void 0 : _g.videoFetch)
+                ? yield this.config.videoFetch({ url: watchUrl, lang, userAgent })
+                : yield defaultFetch({ url: watchUrl, lang, userAgent });
             if (!videoPageResponse.ok) {
                 throw new YoutubeTranscriptVideoUnavailableError(identifier);
             }
             const videoPageBody = yield videoPageResponse.text();
-            // Parse the video page to extract captions
-            const splittedHTML = videoPageBody.split('"captions":');
-            if (splittedHTML.length <= 1) {
-                if (videoPageBody.includes('class="g-recaptcha"')) {
-                    throw new YoutubeTranscriptTooManyRequestError();
-                }
-                if (!videoPageBody.includes('"playabilityStatus":')) {
-                    throw new YoutubeTranscriptVideoUnavailableError(identifier);
-                }
-                throw new YoutubeTranscriptDisabledError(identifier);
+            // Basic bot/recaptcha detection preserves old error behavior
+            if (videoPageBody.includes('class="g-recaptcha"')) {
+                throw new YoutubeTranscriptTooManyRequestError();
             }
-            const captions = (_h = (() => {
-                try {
-                    return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', ''));
-                }
-                catch (e) {
-                    return undefined;
+            // 2) Extract Innertube API key from the page
+            const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) ||
+                videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
+            if (!apiKeyMatch) {
+                // If captions JSON wasn't present previously and we also can't find an API key,
+                // retain the disabled semantics for compatibility.
+                throw new YoutubeTranscriptNotAvailableError(identifier);
+            }
+            const apiKey = apiKeyMatch[1];
+            // 3) Call Innertube player as ANDROID client to retrieve captionTracks
+            const playerEndpoint = `https://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
+            const playerBody = {
+                context: {
+                    client: {
+                        clientName: 'ANDROID',
+                        clientVersion: '20.10.38',
+                    },
+                },
+                videoId: identifier,
+            };
+            // Use global fetch for the POST. No public interface change.
+            const playerRes = yield fetch(playerEndpoint, {
+                method: 'POST',
+                headers: Object.assign({ 'Content-Type': 'application/json', 'User-Agent': userAgent }, (lang ? { 'Accept-Language': lang } : {})),
+                body: JSON.stringify(playerBody),
+            });
+            if (!playerRes.ok) {
+                throw new YoutubeTranscriptVideoUnavailableError(identifier);
+            }
+            const playerJson = yield playerRes.json();
+            const tracklist = (_j = (_h = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _h === void 0 ? void 0 : _h.playerCaptionsTracklistRenderer) !== null && _j !== void 0 ? _j : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
+            const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
+            const isPlayableOk = ((_k = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _k === void 0 ? void 0 : _k.status) === 'OK';
+            // If `captions` is entirely missing, treat as "not available"
+            if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
+                // If video is playable but captions aren’t provided, treat as "disabled"
+                if (isPlayableOk) {
+                    throw new YoutubeTranscriptDisabledError(identifier);
                 }
-            })()) === null || _h === void 0 ? void 0 : _h['playerCaptionsTracklistRenderer'];
-            if (!captions) {
+                // Otherwise we can’t assert they’re disabled; treat as "not available"
+                throw new YoutubeTranscriptNotAvailableError(identifier);
+            }
+            // If `captions` exists but there are zero tracks, treat as "disabled"
+            if (!Array.isArray(tracks) || tracks.length === 0) {
                 throw new YoutubeTranscriptDisabledError(identifier);
             }
-            if (!('captionTracks' in captions)) {
+            // Respect requested language or fallback to first track
+            const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
+            if (!selectedTrack) {
+                const available = tracks.map((t) => t.languageCode).filter(Boolean);
+                throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
+            }
+            // 4) Build transcript URL; prefer XML by stripping fmt if present
+            let transcriptURL = selectedTrack.baseUrl || selectedTrack.url;
+            if (!transcriptURL) {
                 throw new YoutubeTranscriptNotAvailableError(identifier);
             }
-            if (((_j = this.config) === null || _j === void 0 ? void 0 : _j.lang) &&
-                !captions.captionTracks.some((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })) {
-                throw new YoutubeTranscriptNotAvailableLanguageError((_k = this.config) === null || _k === void 0 ? void 0 : _k.lang, captions.captionTracks.map((track) => track.languageCode), identifier);
+            transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
+            if ((_l = this.config) === null || _l === void 0 ? void 0 : _l.disableHttps) {
+                transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
             }
-            const captionURL = (((_l = this.config) === null || _l === void 0 ? void 0 : _l.lang)
-                ? captions.captionTracks.find((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === void 0 ? void 0 : _a.lang); })
-                : captions.captionTracks[0]).baseUrl;
-            const transcriptURL = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.disableHttps)
-                ? captionURL.replace('https://', 'http://')
-                : captionURL;
-            // Fetch the transcript
-            const transcriptResponse = yield transcriptFetch({
-                url: transcriptURL,
-                lang: (_o = this.config) === null || _o === void 0 ? void 0 : _o.lang,
-                userAgent,
-            });
+            // 5) Fetch transcript XML using the same hook surface as before
+            const transcriptResponse = ((_m = this.config) === null || _m === void 0 ? void 0 : _m.transcriptFetch)
+                ? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
+                : yield defaultFetch({ url: transcriptURL, lang, userAgent });
             if (!transcriptResponse.ok) {
+                // Preserve legacy behavior
+                if (transcriptResponse.status === 429) {
+                    throw new YoutubeTranscriptTooManyRequestError();
+                }
                 throw new YoutubeTranscriptNotAvailableError(identifier);
             }
             const transcriptBody = yield transcriptResponse.text();
+            // 6) Parse XML into the existing TranscriptResponse shape
             const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
-            const transcript = results.map((result) => {
-                var _a, _b;
-                return ({
-                    text: result[3],
-                    duration: parseFloat(result[2]),
-                    offset: parseFloat(result[1]),
-                    lang: (_b = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang) !== null && _b !== void 0 ? _b : captions.captionTracks[0].languageCode,
-                });
-            });
-            // Store in cache if a strategy is provided
-            if ((_p = this.config) === null || _p === void 0 ? void 0 : _p.cache) {
-                yield this.config.cache.set(cacheKey, JSON.stringify(transcript), this.config.cacheTTL);
+            const transcript = results.map((m) => ({
+                text: m[3],
+                duration: parseFloat(m[2]),
+                offset: parseFloat(m[1]),
+                lang: lang !== null && lang !== void 0 ? lang : selectedTrack.languageCode,
+            }));
+            if (transcript.length === 0) {
+                throw new YoutubeTranscriptNotAvailableError(identifier);
+            }
+            // Cache store
+            if (cache) {
+                try {
+                    yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
+                }
+                catch (_p) {
+                    // non-fatal
+                }
             }
             return transcript;
         });
     }
-    // Add static method for new usage pattern
     static fetchTranscript(videoId, config) {
         return __awaiter(this, void 0, void 0, function* () {
             const instance = new YoutubeTranscript(config);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "youtube-transcript-plus",
-  "version": "1.0.4",
+  "version": "1.1.0",
   "description": "Fetch transcript from a YouTube video",
   "type": "module",
   "main": "dist/youtube-transcript-plus.js",
@@ -29,23 +29,26 @@
     ]
   },
   "devDependencies": {
-    "@types/jest": "^29.5.14",
+    "@types/jest": "^30.0.0",
     "https-proxy-agent": "^7.0.6",
     "husky": "^9.1.7",
-    "jest": "^29.7.0",
-    "lint-staged": "^15.5.0",
-    "prettier": "^3.5.3",
-    "rollup": "^4.37.0",
+    "jest": "^30.0.5",
+    "lint-staged": "^16.1.5",
+    "prettier": "^3.6.2",
+    "rollup": "^4.46.4",
     "rollup-plugin-typescript": "^1.0.1",
     "rollup-plugin-typescript2": "^0.36.0",
-    "ts-jest": "^29.3.0",
+    "ts-jest": "^29.4.1",
     "tslib": "^2.8.1",
-    "typescript": "^5.8.2"
+    "typescript": "^5.9.2"
   },
   "files": [
     "dist/*"
   ],
-  "repository": "https://github.com/ericmmartin/youtube-transcript-plus.git",
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/ericmmartin/youtube-transcript-plus.git"
+  },
   "publishConfig": {
     "access": "public"
   },