youtube-transcript-plus 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +23 -3
- package/dist/constants.d.ts +1 -0
- package/dist/index.d.ts +1 -3
- package/dist/types.d.ts +11 -10
- package/dist/utils.d.ts +2 -5
- package/dist/youtube-transcript-plus.js +33 -20
- package/package.json +10 -2
package/README.md
CHANGED
|
@@ -58,19 +58,37 @@ fetchTranscript('videoId_or_URL', {
|
|
|
58
58
|
|
|
59
59
|
### Custom Fetch Functions
|
|
60
60
|
|
|
61
|
-
You can inject custom `videoFetch` and `transcriptFetch` functions to modify the fetch behavior, such as using a proxy or custom headers.
|
|
61
|
+
You can inject custom `videoFetch`, `playerFetch`, and `transcriptFetch` functions to modify the fetch behavior, such as using a proxy or custom headers. The library makes three types of HTTP requests:
|
|
62
|
+
|
|
63
|
+
1. **`videoFetch`**: Fetches the YouTube video page (GET request)
|
|
64
|
+
2. **`playerFetch`**: Calls YouTube's Innertube API to get caption tracks (POST request)
|
|
65
|
+
3. **`transcriptFetch`**: Downloads the actual transcript data (GET request)
|
|
62
66
|
|
|
63
67
|
```javascript
|
|
64
68
|
fetchTranscript('videoId_or_URL', {
|
|
65
69
|
videoFetch: async ({ url, lang, userAgent }) => {
|
|
70
|
+
// Custom logic for video page fetch (GET)
|
|
71
|
+
return fetch(`https://my-proxy-server.com/?url=${encodeURIComponent(url)}`, {
|
|
72
|
+
headers: {
|
|
73
|
+
...(lang && { 'Accept-Language': lang }),
|
|
74
|
+
'User-Agent': userAgent,
|
|
75
|
+
},
|
|
76
|
+
});
|
|
77
|
+
},
|
|
78
|
+
playerFetch: async ({ url, method, body, headers, lang, userAgent }) => {
|
|
79
|
+
// Custom logic for Innertube API call (POST)
|
|
66
80
|
return fetch(`https://my-proxy-server.com/?url=${encodeURIComponent(url)}`, {
|
|
81
|
+
method,
|
|
67
82
|
headers: {
|
|
68
83
|
...(lang && { 'Accept-Language': lang }),
|
|
69
84
|
'User-Agent': userAgent,
|
|
85
|
+
...headers,
|
|
70
86
|
},
|
|
87
|
+
body,
|
|
71
88
|
});
|
|
72
89
|
},
|
|
73
90
|
transcriptFetch: async ({ url, lang, userAgent }) => {
|
|
91
|
+
// Custom logic for transcript data fetch (GET)
|
|
74
92
|
return fetch(`https://my-proxy-server.com/?url=${encodeURIComponent(url)}`, {
|
|
75
93
|
headers: {
|
|
76
94
|
...(lang && { 'Accept-Language': lang }),
|
|
@@ -187,6 +205,7 @@ The repository includes several example files in the `example/` directory to dem
|
|
|
187
205
|
3. **`fs-caching-usage.js`**: Demonstrates how to use the `FsCache` to cache transcripts on the file system with a 1-day TTL.
|
|
188
206
|
4. **`language-usage.js`**: Shows how to fetch a transcript in a specific language (e.g., French).
|
|
189
207
|
5. **`proxy-usage.js`**: Demonstrates how to use a proxy server to fetch transcripts, which can be useful for bypassing rate limits or accessing restricted content.
|
|
208
|
+
6. **`custom-fetch-usage.js`**: Shows how to use all three custom fetch functions (`videoFetch`, `playerFetch`, `transcriptFetch`) with logging and custom headers.
|
|
190
209
|
|
|
191
210
|
These examples can be found in the `example/` directory of the repository.
|
|
192
211
|
|
|
@@ -203,8 +222,9 @@ Fetches the transcript for a YouTube video.
|
|
|
203
222
|
- **`cache`**: Custom caching strategy.
|
|
204
223
|
- **`cacheTTL`**: Time-to-live for cache entries in milliseconds.
|
|
205
224
|
- **`disableHttps`**: Set to `true` to use HTTP instead of HTTPS for YouTube requests.
|
|
206
|
-
- **`videoFetch`**: Custom fetch function for the video page request.
|
|
207
|
-
- **`
|
|
225
|
+
- **`videoFetch`**: Custom fetch function for the video page request (GET).
|
|
226
|
+
- **`playerFetch`**: Custom fetch function for the YouTube Innertube API request (POST).
|
|
227
|
+
- **`transcriptFetch`**: Custom fetch function for the transcript data request (GET).
|
|
208
228
|
|
|
209
229
|
Returns a `Promise<TranscriptResponse[]>` where each item in the array represents a transcript segment with the following properties:
|
|
210
230
|
|
package/dist/constants.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
1
|
export declare const DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
2
2
|
export declare const RE_YOUTUBE: RegExp;
|
|
3
3
|
export declare const RE_XML_TRANSCRIPT: RegExp;
|
|
4
|
+
export declare const DEFAULT_CACHE_TTL = 3600000;
|
package/dist/index.d.ts
CHANGED
|
@@ -7,9 +7,7 @@ import { TranscriptConfig, TranscriptResponse } from './types';
|
|
|
7
7
|
*/
|
|
8
8
|
export declare class YoutubeTranscript {
|
|
9
9
|
private config?;
|
|
10
|
-
constructor(config?: TranscriptConfig
|
|
11
|
-
cacheTTL?: number;
|
|
12
|
-
});
|
|
10
|
+
constructor(config?: TranscriptConfig);
|
|
13
11
|
fetchTranscript(videoId: string): Promise<TranscriptResponse[]>;
|
|
14
12
|
static fetchTranscript(videoId: string, config?: TranscriptConfig): Promise<TranscriptResponse[]>;
|
|
15
13
|
}
|
package/dist/types.d.ts
CHANGED
|
@@ -2,22 +2,23 @@ export interface CacheStrategy {
|
|
|
2
2
|
get(key: string): Promise<string | null>;
|
|
3
3
|
set(key: string, value: string, ttl?: number): Promise<void>;
|
|
4
4
|
}
|
|
5
|
+
export interface FetchParams {
|
|
6
|
+
url: string;
|
|
7
|
+
lang?: string;
|
|
8
|
+
userAgent?: string;
|
|
9
|
+
method?: 'GET' | 'POST';
|
|
10
|
+
body?: string;
|
|
11
|
+
headers?: Record<string, string>;
|
|
12
|
+
}
|
|
5
13
|
export interface TranscriptConfig {
|
|
6
14
|
lang?: string;
|
|
7
15
|
userAgent?: string;
|
|
8
16
|
cache?: CacheStrategy;
|
|
9
17
|
cacheTTL?: number;
|
|
10
18
|
disableHttps?: boolean;
|
|
11
|
-
videoFetch?: (params:
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
userAgent?: string;
|
|
15
|
-
}) => Promise<Response>;
|
|
16
|
-
transcriptFetch?: (params: {
|
|
17
|
-
url: string;
|
|
18
|
-
lang?: string;
|
|
19
|
-
userAgent?: string;
|
|
20
|
-
}) => Promise<Response>;
|
|
19
|
+
videoFetch?: (params: FetchParams) => Promise<Response>;
|
|
20
|
+
transcriptFetch?: (params: FetchParams) => Promise<Response>;
|
|
21
|
+
playerFetch?: (params: FetchParams) => Promise<Response>;
|
|
21
22
|
}
|
|
22
23
|
export interface TranscriptResponse {
|
|
23
24
|
text: string;
|
package/dist/utils.d.ts
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
+
import { FetchParams } from './types';
|
|
1
2
|
export declare function retrieveVideoId(videoId: string): string;
|
|
2
|
-
export declare function defaultFetch(
|
|
3
|
-
url: string;
|
|
4
|
-
lang?: string;
|
|
5
|
-
userAgent?: string;
|
|
6
|
-
}): Promise<Response>;
|
|
3
|
+
export declare function defaultFetch(params: FetchParams): Promise<Response>;
|
|
@@ -36,6 +36,7 @@ typeof SuppressedError === "function" ? SuppressedError : function (error, suppr
|
|
|
36
36
|
const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
|
|
37
37
|
const RE_YOUTUBE = /(?:v=|\/|v\/|embed\/|watch\?.*v=|youtu\.be\/|\/v\/|e\/|watch\?.*vi?=|\/embed\/|\/v\/|vi?\/|watch\?.*vi?=|youtu\.be\/|\/vi?\/|\/e\/)([a-zA-Z0-9_-]{11})/i;
|
|
38
38
|
const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g;
|
|
39
|
+
const DEFAULT_CACHE_TTL = 3600000; // 1 hour in milliseconds
|
|
39
40
|
|
|
40
41
|
class YoutubeTranscriptTooManyRequestError extends Error {
|
|
41
42
|
constructor() {
|
|
@@ -84,16 +85,23 @@ function retrieveVideoId(videoId) {
|
|
|
84
85
|
}
|
|
85
86
|
throw new YoutubeTranscriptInvalidVideoIdError();
|
|
86
87
|
}
|
|
87
|
-
function defaultFetch(
|
|
88
|
-
return __awaiter(this,
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
88
|
+
function defaultFetch(params) {
|
|
89
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
90
|
+
const { url, lang, userAgent, method = 'GET', body, headers = {} } = params;
|
|
91
|
+
const fetchHeaders = Object.assign(Object.assign({ 'User-Agent': userAgent || DEFAULT_USER_AGENT }, (lang && { 'Accept-Language': lang })), headers);
|
|
92
|
+
const fetchOptions = {
|
|
93
|
+
method,
|
|
94
|
+
headers: fetchHeaders,
|
|
95
|
+
};
|
|
96
|
+
if (body && method === 'POST') {
|
|
97
|
+
fetchOptions.body = body;
|
|
98
|
+
}
|
|
99
|
+
return fetch(url, fetchOptions);
|
|
92
100
|
});
|
|
93
101
|
}
|
|
94
102
|
|
|
95
103
|
class FsCache {
|
|
96
|
-
constructor(cacheDir = './cache', defaultTTL =
|
|
104
|
+
constructor(cacheDir = './cache', defaultTTL = DEFAULT_CACHE_TTL) {
|
|
97
105
|
this.cacheDir = cacheDir;
|
|
98
106
|
this.defaultTTL = defaultTTL;
|
|
99
107
|
fs.mkdir(cacheDir, { recursive: true }).catch(() => { });
|
|
@@ -109,7 +117,7 @@ class FsCache {
|
|
|
109
117
|
}
|
|
110
118
|
yield fs.unlink(filePath);
|
|
111
119
|
}
|
|
112
|
-
catch (
|
|
120
|
+
catch (_error) { }
|
|
113
121
|
return null;
|
|
114
122
|
});
|
|
115
123
|
}
|
|
@@ -123,9 +131,8 @@ class FsCache {
|
|
|
123
131
|
}
|
|
124
132
|
|
|
125
133
|
class InMemoryCache {
|
|
126
|
-
constructor(defaultTTL =
|
|
134
|
+
constructor(defaultTTL = DEFAULT_CACHE_TTL) {
|
|
127
135
|
this.cache = new Map();
|
|
128
|
-
// 1 hour default TTL
|
|
129
136
|
this.defaultTTL = defaultTTL;
|
|
130
137
|
}
|
|
131
138
|
get(key) {
|
|
@@ -158,7 +165,7 @@ class YoutubeTranscript {
|
|
|
158
165
|
}
|
|
159
166
|
fetchTranscript(videoId) {
|
|
160
167
|
return __awaiter(this, void 0, void 0, function* () {
|
|
161
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
168
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
162
169
|
const identifier = retrieveVideoId(videoId);
|
|
163
170
|
const lang = (_a = this.config) === null || _a === void 0 ? void 0 : _a.lang;
|
|
164
171
|
const userAgent = (_c = (_b = this.config) === null || _b === void 0 ? void 0 : _b.userAgent) !== null && _c !== void 0 ? _c : DEFAULT_USER_AGENT;
|
|
@@ -172,7 +179,7 @@ class YoutubeTranscript {
|
|
|
172
179
|
try {
|
|
173
180
|
return JSON.parse(cached);
|
|
174
181
|
}
|
|
175
|
-
catch (
|
|
182
|
+
catch (_p) {
|
|
176
183
|
// ignore parse errors and continue
|
|
177
184
|
}
|
|
178
185
|
}
|
|
@@ -212,19 +219,25 @@ class YoutubeTranscript {
|
|
|
212
219
|
},
|
|
213
220
|
videoId: identifier,
|
|
214
221
|
};
|
|
215
|
-
// Use
|
|
216
|
-
const
|
|
222
|
+
// Use configurable playerFetch for the POST to allow custom fetch logic.
|
|
223
|
+
const playerFetchParams = {
|
|
224
|
+
url: playerEndpoint,
|
|
217
225
|
method: 'POST',
|
|
218
|
-
|
|
226
|
+
lang,
|
|
227
|
+
userAgent,
|
|
228
|
+
headers: { 'Content-Type': 'application/json' },
|
|
219
229
|
body: JSON.stringify(playerBody),
|
|
220
|
-
}
|
|
230
|
+
};
|
|
231
|
+
const playerRes = ((_h = this.config) === null || _h === void 0 ? void 0 : _h.playerFetch)
|
|
232
|
+
? yield this.config.playerFetch(playerFetchParams)
|
|
233
|
+
: yield defaultFetch(playerFetchParams);
|
|
221
234
|
if (!playerRes.ok) {
|
|
222
235
|
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
223
236
|
}
|
|
224
237
|
const playerJson = yield playerRes.json();
|
|
225
|
-
const tracklist = (
|
|
238
|
+
const tracklist = (_k = (_j = playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) === null || _j === void 0 ? void 0 : _j.playerCaptionsTracklistRenderer) !== null && _k !== void 0 ? _k : playerJson === null || playerJson === void 0 ? void 0 : playerJson.playerCaptionsTracklistRenderer;
|
|
226
239
|
const tracks = tracklist === null || tracklist === void 0 ? void 0 : tracklist.captionTracks;
|
|
227
|
-
const isPlayableOk = ((
|
|
240
|
+
const isPlayableOk = ((_l = playerJson === null || playerJson === void 0 ? void 0 : playerJson.playabilityStatus) === null || _l === void 0 ? void 0 : _l.status) === 'OK';
|
|
228
241
|
// If `captions` is entirely missing, treat as "not available"
|
|
229
242
|
if (!(playerJson === null || playerJson === void 0 ? void 0 : playerJson.captions) || !tracklist) {
|
|
230
243
|
// If video is playable but captions aren’t provided, treat as "disabled"
|
|
@@ -250,11 +263,11 @@ class YoutubeTranscript {
|
|
|
250
263
|
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
251
264
|
}
|
|
252
265
|
transcriptURL = transcriptURL.replace(/&fmt=[^&]+$/, '');
|
|
253
|
-
if ((
|
|
266
|
+
if ((_m = this.config) === null || _m === void 0 ? void 0 : _m.disableHttps) {
|
|
254
267
|
transcriptURL = transcriptURL.replace(/^https:\/\//, 'http://');
|
|
255
268
|
}
|
|
256
269
|
// 5) Fetch transcript XML using the same hook surface as before
|
|
257
|
-
const transcriptResponse = ((
|
|
270
|
+
const transcriptResponse = ((_o = this.config) === null || _o === void 0 ? void 0 : _o.transcriptFetch)
|
|
258
271
|
? yield this.config.transcriptFetch({ url: transcriptURL, lang, userAgent })
|
|
259
272
|
: yield defaultFetch({ url: transcriptURL, lang, userAgent });
|
|
260
273
|
if (!transcriptResponse.ok) {
|
|
@@ -281,7 +294,7 @@ class YoutubeTranscript {
|
|
|
281
294
|
try {
|
|
282
295
|
yield cache.set(cacheKey, JSON.stringify(transcript), cacheTTL);
|
|
283
296
|
}
|
|
284
|
-
catch (
|
|
297
|
+
catch (_q) {
|
|
285
298
|
// non-fatal
|
|
286
299
|
}
|
|
287
300
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "youtube-transcript-plus",
|
|
3
|
-
"version": "1.1.
|
|
3
|
+
"version": "1.1.2",
|
|
4
4
|
"description": "Fetch transcript from a YouTube video",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/youtube-transcript-plus.js",
|
|
@@ -9,8 +9,10 @@
|
|
|
9
9
|
"scripts": {
|
|
10
10
|
"build": "rollup -c",
|
|
11
11
|
"format": "prettier --write 'src/**/*.ts'",
|
|
12
|
+
"lint": "eslint .",
|
|
12
13
|
"test": "jest",
|
|
13
14
|
"test:watch": "jest --watch",
|
|
15
|
+
"typecheck": "tsc --noEmit",
|
|
14
16
|
"prepare": "husky"
|
|
15
17
|
},
|
|
16
18
|
"author": "ericmmartin",
|
|
@@ -29,18 +31,24 @@
|
|
|
29
31
|
]
|
|
30
32
|
},
|
|
31
33
|
"devDependencies": {
|
|
34
|
+
"@eslint/js": "^9.39.2",
|
|
32
35
|
"@types/jest": "^30.0.0",
|
|
36
|
+
"eslint": "^9.39.2",
|
|
37
|
+
"eslint-config-prettier": "^10.1.8",
|
|
38
|
+
"globals": "^16.5.0",
|
|
33
39
|
"https-proxy-agent": "^7.0.6",
|
|
34
40
|
"husky": "^9.1.7",
|
|
35
41
|
"jest": "^30.0.5",
|
|
36
42
|
"lint-staged": "^16.1.5",
|
|
43
|
+
"nock": "^14.0.10",
|
|
37
44
|
"prettier": "^3.6.2",
|
|
38
45
|
"rollup": "^4.46.4",
|
|
39
46
|
"rollup-plugin-typescript": "^1.0.1",
|
|
40
47
|
"rollup-plugin-typescript2": "^0.36.0",
|
|
41
48
|
"ts-jest": "^29.4.1",
|
|
42
49
|
"tslib": "^2.8.1",
|
|
43
|
-
"typescript": "^5.9.2"
|
|
50
|
+
"typescript": "^5.9.2",
|
|
51
|
+
"typescript-eslint": "^8.50.0"
|
|
44
52
|
},
|
|
45
53
|
"files": [
|
|
46
54
|
"dist/*"
|