youtube-transcript-plus 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +213 -0
- package/dist/cache/fs-cache.d.ts +8 -0
- package/dist/cache/in-memory-cache.d.ts +8 -0
- package/dist/cache/index.d.ts +2 -0
- package/dist/constants.d.ts +3 -0
- package/dist/errors.d.ts +18 -0
- package/dist/index.d.ts +12 -0
- package/dist/types.d.ts +26 -0
- package/dist/utils.d.ts +6 -0
- package/dist/youtube-transcript-plus.js +250 -0
- package/package.json +56 -0
package/README.md
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
# youtube-transcript-plus
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/js/youtube-transcript-plus)
|
|
4
|
+
|
|
5
|
+
A Node.js library to fetch transcripts from YouTube videos. This package uses YouTube's unofficial API, so it may break if YouTube changes its internal structure.
|
|
6
|
+
|
|
7
|
+
**Note:** This project was originally forked from [https://github.com/Kakulukian/youtube-transcript](https://github.com/Kakulukian/youtube-transcript).
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
$ npm install youtube-transcript-plus
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
or
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
$ yarn add youtube-transcript-plus
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
### Basic Usage
|
|
24
|
+
|
|
25
|
+
```javascript
|
|
26
|
+
import { fetchTranscript } from 'youtube-transcript-plus';
|
|
27
|
+
|
|
28
|
+
// Fetch transcript using default settings
|
|
29
|
+
fetchTranscript('videoId_or_URL').then(console.log).catch(console.error);
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
### Custom User-Agent
|
|
33
|
+
|
|
34
|
+
You can pass a custom `userAgent` string to mimic different browsers or devices.
|
|
35
|
+
|
|
36
|
+
```javascript
|
|
37
|
+
fetchTranscript('videoId_or_URL', {
|
|
38
|
+
userAgent:
|
|
39
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
|
40
|
+
})
|
|
41
|
+
.then(console.log)
|
|
42
|
+
.catch(console.error);
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Custom Fetch Functions
|
|
46
|
+
|
|
47
|
+
You can inject custom `videoFetch` and `transcriptFetch` functions to modify the fetch behavior, such as using a proxy or custom headers.
|
|
48
|
+
|
|
49
|
+
```javascript
|
|
50
|
+
fetchTranscript('videoId_or_URL', {
|
|
51
|
+
videoFetch: async ({ url, lang, userAgent }) => {
|
|
52
|
+
return fetch(`https://my-proxy-server.com/?url=${encodeURIComponent(url)}`, {
|
|
53
|
+
headers: {
|
|
54
|
+
...(lang && { 'Accept-Language': lang }),
|
|
55
|
+
'User-Agent': userAgent,
|
|
56
|
+
},
|
|
57
|
+
});
|
|
58
|
+
},
|
|
59
|
+
transcriptFetch: async ({ url, lang, userAgent }) => {
|
|
60
|
+
return fetch(`https://my-proxy-server.com/?url=${encodeURIComponent(url)}`, {
|
|
61
|
+
headers: {
|
|
62
|
+
...(lang && { 'Accept-Language': lang }),
|
|
63
|
+
'User-Agent': userAgent,
|
|
64
|
+
},
|
|
65
|
+
});
|
|
66
|
+
},
|
|
67
|
+
})
|
|
68
|
+
.then(console.log)
|
|
69
|
+
.catch(console.error);
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Language Support
|
|
73
|
+
|
|
74
|
+
You can specify the language for the transcript using the `lang` option.
|
|
75
|
+
|
|
76
|
+
```javascript
|
|
77
|
+
fetchTranscript('videoId_or_URL', {
|
|
78
|
+
lang: 'fr', // Fetch transcript in French
|
|
79
|
+
})
|
|
80
|
+
.then(console.log)
|
|
81
|
+
.catch(console.error);
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Caching
|
|
85
|
+
|
|
86
|
+
You can provide a custom caching strategy by implementing the `CacheStrategy` interface. The library also provides default implementations for in-memory and file system caching.
|
|
87
|
+
|
|
88
|
+
#### In-Memory Cache
|
|
89
|
+
|
|
90
|
+
```typescript
|
|
91
|
+
import { fetchTranscript, InMemoryCache } from 'youtube-transcript-plus';
|
|
92
|
+
|
|
93
|
+
fetchTranscript('videoId_or_URL', {
|
|
94
|
+
lang: 'en',
|
|
95
|
+
userAgent: 'FOO',
|
|
96
|
+
cache: new InMemoryCache(1800000), // 30 minutes TTL
|
|
97
|
+
})
|
|
98
|
+
.then(console.log)
|
|
99
|
+
.catch(console.error);
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
#### File System Cache
|
|
103
|
+
|
|
104
|
+
```typescript
|
|
105
|
+
import { fetchTranscript, FsCache } from 'youtube-transcript-plus';
|
|
106
|
+
|
|
107
|
+
fetchTranscript('videoId_or_URL', {
|
|
108
|
+
cache: new FsCache('./my-cache-dir', 86400000), // 1 day TTL
|
|
109
|
+
})
|
|
110
|
+
.then(console.log)
|
|
111
|
+
.catch(console.error);
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Custom Caching
|
|
115
|
+
|
|
116
|
+
If the default implementations don’t meet your needs, you can implement your own caching strategy:
|
|
117
|
+
|
|
118
|
+
```typescript
|
|
119
|
+
import { fetchTranscript, CacheStrategy } from 'youtube-transcript-plus';
|
|
120
|
+
|
|
121
|
+
class CustomCache implements CacheStrategy {
|
|
122
|
+
async get(key: string): Promise<string | null> {
|
|
123
|
+
// Custom logic
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
async set(key: string, value: string, ttl?: number): Promise<void> {
|
|
127
|
+
// Custom logic
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
fetchTranscript('videoId_or_URL', {
|
|
132
|
+
cache: new CustomCache(),
|
|
133
|
+
})
|
|
134
|
+
.then(console.log)
|
|
135
|
+
.catch(console.error);
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Error Handling
|
|
139
|
+
|
|
140
|
+
The library throws specific errors for different failure scenarios. Make sure to handle them appropriately.
|
|
141
|
+
|
|
142
|
+
```javascript
|
|
143
|
+
import {
|
|
144
|
+
YoutubeTranscriptVideoUnavailableError,
|
|
145
|
+
YoutubeTranscriptDisabledError,
|
|
146
|
+
YoutubeTranscriptNotAvailableError,
|
|
147
|
+
YoutubeTranscriptNotAvailableLanguageError,
|
|
148
|
+
} from 'youtube-transcript-plus';
|
|
149
|
+
|
|
150
|
+
fetchTranscript('videoId_or_URL')
|
|
151
|
+
.then(console.log)
|
|
152
|
+
.catch((error) => {
|
|
153
|
+
if (error instanceof YoutubeTranscriptVideoUnavailableError) {
|
|
154
|
+
console.error('Video is unavailable:', error.message);
|
|
155
|
+
} else if (error instanceof YoutubeTranscriptDisabledError) {
|
|
156
|
+
console.error('Transcripts are disabled for this video:', error.message);
|
|
157
|
+
} else if (error instanceof YoutubeTranscriptNotAvailableError) {
|
|
158
|
+
console.error('No transcript available:', error.message);
|
|
159
|
+
} else if (error instanceof YoutubeTranscriptNotAvailableLanguageError) {
|
|
160
|
+
console.error('Transcript not available in the specified language:', error.message);
|
|
161
|
+
} else {
|
|
162
|
+
console.error('An unexpected error occurred:', error.message);
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Example Usage Files
|
|
168
|
+
|
|
169
|
+
The repository includes several example files in the `example/` directory to demonstrate different use cases of the library:
|
|
170
|
+
|
|
171
|
+
1. **`basic-usage.js`**: Demonstrates the simplest way to fetch a transcript using the default settings.
|
|
172
|
+
2. **`caching-usage.js`**: Shows how to use the `InMemoryCache` to cache transcripts with a 30-minute TTL.
|
|
173
|
+
3. **`fs-caching-usage.js`**: Demonstrates how to use the `FsCache` to cache transcripts on the file system with a 1-day TTL.
|
|
174
|
+
4. **`language-usage.js`**: Shows how to fetch a transcript in a specific language (e.g., French).
|
|
175
|
+
5. **`proxy-usage.js`**: Demonstrates how to use a proxy server to fetch transcripts, which can be useful for bypassing rate limits or accessing restricted content.
|
|
176
|
+
|
|
177
|
+
These examples can be found in the `example/` directory of the repository.
|
|
178
|
+
|
|
179
|
+
### API
|
|
180
|
+
|
|
181
|
+
### `fetchTranscript(videoId: string, config?: TranscriptConfig)`
|
|
182
|
+
|
|
183
|
+
Fetches the transcript for a YouTube video.
|
|
184
|
+
|
|
185
|
+
- **`videoId`**: The YouTube video ID or URL.
|
|
186
|
+
- **`config`**: Optional configuration object with the following properties:
|
|
187
|
+
- **`lang`**: Language code (e.g., `'en'`, `'fr'`) for the transcript.
|
|
188
|
+
- **`userAgent`**: Custom User-Agent string.
|
|
189
|
+
- **`cache`**: Custom caching strategy.
|
|
190
|
+
- **`cacheTTL`**: Time-to-live for cache entries in milliseconds.
|
|
191
|
+
- **`videoFetch`**: Custom fetch function for the video page request.
|
|
192
|
+
- **`transcriptFetch`**: Custom fetch function for the transcript request.
|
|
193
|
+
|
|
194
|
+
Returns a `Promise<TranscriptResponse[]>` where each item in the array represents a transcript segment with the following properties:
|
|
195
|
+
|
|
196
|
+
- **`text`**: The text of the transcript segment.
|
|
197
|
+
- **`duration`**: The duration of the segment in seconds.
|
|
198
|
+
- **`offset`**: The start time of the segment in seconds.
|
|
199
|
+
- **`lang`**: The language of the transcript.
|
|
200
|
+
|
|
201
|
+
## Errors
|
|
202
|
+
|
|
203
|
+
The library throws the following errors:
|
|
204
|
+
|
|
205
|
+
- **`YoutubeTranscriptVideoUnavailableError`**: The video is unavailable or has been removed.
|
|
206
|
+
- **`YoutubeTranscriptDisabledError`**: Transcripts are disabled for the video.
|
|
207
|
+
- **`YoutubeTranscriptNotAvailableError`**: No transcript is available for the video.
|
|
208
|
+
- **`YoutubeTranscriptNotAvailableLanguageError`**: The transcript is not available in the specified language.
|
|
209
|
+
- **`YoutubeTranscriptInvalidVideoIdError`**: The provided video ID or URL is invalid.
|
|
210
|
+
|
|
211
|
+
## License
|
|
212
|
+
|
|
213
|
+
**[MIT](LICENSE)** Licensed
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { CacheStrategy } from '../types';
|
|
2
|
+
export declare class FsCache implements CacheStrategy {
|
|
3
|
+
private cacheDir;
|
|
4
|
+
private defaultTTL;
|
|
5
|
+
constructor(cacheDir?: string, defaultTTL?: number);
|
|
6
|
+
get(key: string): Promise<string | null>;
|
|
7
|
+
set(key: string, value: string, ttl?: number): Promise<void>;
|
|
8
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { CacheStrategy } from '../types';
|
|
2
|
+
export declare class InMemoryCache implements CacheStrategy {
|
|
3
|
+
private cache;
|
|
4
|
+
private defaultTTL;
|
|
5
|
+
constructor(defaultTTL?: number);
|
|
6
|
+
get(key: string): Promise<string | null>;
|
|
7
|
+
set(key: string, value: string, ttl?: number): Promise<void>;
|
|
8
|
+
}
|
package/dist/errors.d.ts
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
export declare class YoutubeTranscriptTooManyRequestError extends Error {
|
|
2
|
+
constructor();
|
|
3
|
+
}
|
|
4
|
+
export declare class YoutubeTranscriptVideoUnavailableError extends Error {
|
|
5
|
+
constructor(videoId: string);
|
|
6
|
+
}
|
|
7
|
+
export declare class YoutubeTranscriptDisabledError extends Error {
|
|
8
|
+
constructor(videoId: string);
|
|
9
|
+
}
|
|
10
|
+
export declare class YoutubeTranscriptNotAvailableError extends Error {
|
|
11
|
+
constructor(videoId: string);
|
|
12
|
+
}
|
|
13
|
+
export declare class YoutubeTranscriptNotAvailableLanguageError extends Error {
|
|
14
|
+
constructor(lang: string, availableLangs: string[], videoId: string);
|
|
15
|
+
}
|
|
16
|
+
export declare class YoutubeTranscriptInvalidVideoIdError extends Error {
|
|
17
|
+
constructor();
|
|
18
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { TranscriptConfig, TranscriptResponse } from './types';
|
|
2
|
+
export declare class YoutubeTranscript {
|
|
3
|
+
private config?;
|
|
4
|
+
constructor(config?: TranscriptConfig & {
|
|
5
|
+
cacheTTL?: number;
|
|
6
|
+
});
|
|
7
|
+
fetchTranscript(videoId: string): Promise<TranscriptResponse[]>;
|
|
8
|
+
static fetchTranscript(videoId: string, config?: TranscriptConfig): Promise<TranscriptResponse[]>;
|
|
9
|
+
}
|
|
10
|
+
export type { CacheStrategy } from './types';
|
|
11
|
+
export { InMemoryCache, FsCache } from './cache';
|
|
12
|
+
export declare const fetchTranscript: typeof YoutubeTranscript.fetchTranscript;
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
export interface CacheStrategy {
|
|
2
|
+
get(key: string): Promise<string | null>;
|
|
3
|
+
set(key: string, value: string, ttl?: number): Promise<void>;
|
|
4
|
+
}
|
|
5
|
+
export interface TranscriptConfig {
|
|
6
|
+
lang?: string;
|
|
7
|
+
userAgent?: string;
|
|
8
|
+
cache?: CacheStrategy;
|
|
9
|
+
cacheTTL?: number;
|
|
10
|
+
videoFetch?: (params: {
|
|
11
|
+
url: string;
|
|
12
|
+
lang?: string;
|
|
13
|
+
userAgent?: string;
|
|
14
|
+
}) => Promise<Response>;
|
|
15
|
+
transcriptFetch?: (params: {
|
|
16
|
+
url: string;
|
|
17
|
+
lang?: string;
|
|
18
|
+
userAgent?: string;
|
|
19
|
+
}) => Promise<Response>;
|
|
20
|
+
}
|
|
21
|
+
export interface TranscriptResponse {
|
|
22
|
+
text: string;
|
|
23
|
+
duration: number;
|
|
24
|
+
offset: number;
|
|
25
|
+
lang?: string;
|
|
26
|
+
}
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import fs from 'fs/promises';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
/******************************************************************************
|
|
5
|
+
Copyright (c) Microsoft Corporation.
|
|
6
|
+
|
|
7
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
|
8
|
+
purpose with or without fee is hereby granted.
|
|
9
|
+
|
|
10
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
|
11
|
+
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
|
12
|
+
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
|
13
|
+
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
|
14
|
+
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
|
15
|
+
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
|
16
|
+
PERFORMANCE OF THIS SOFTWARE.
|
|
17
|
+
***************************************************************************** */
|
|
18
|
+
/* global Reflect, Promise, SuppressedError, Symbol, Iterator */
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
function __awaiter(thisArg, _arguments, P, generator) {
|
|
22
|
+
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
|
|
23
|
+
return new (P || (P = Promise))(function (resolve, reject) {
|
|
24
|
+
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
|
|
25
|
+
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
|
|
26
|
+
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
|
|
27
|
+
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
typeof SuppressedError === "function" ? SuppressedError : function (error, suppressed, message) {
|
|
32
|
+
var e = new Error(message);
|
|
33
|
+
return e.name = "SuppressedError", e.error = error, e.suppressed = suppressed, e;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const DEFAULT_USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36';
|
|
37
|
+
const RE_YOUTUBE = /(?:youtube\.com\/(?:[^\/]+\/.+\/|(?:v|e(?:mbed)?)\/|.*[?&]v=)|youtu\.be\/)([^"&?\/\s]{11})/i;
|
|
38
|
+
const RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g;
|
|
39
|
+
|
|
40
|
+
class YoutubeTranscriptTooManyRequestError extends Error {
|
|
41
|
+
constructor() {
|
|
42
|
+
super('YouTube is receiving too many requests from your IP address. Please try again later or use a proxy. If the issue persists, consider reducing the frequency of requests.');
|
|
43
|
+
this.name = 'YoutubeTranscriptTooManyRequestError';
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
class YoutubeTranscriptVideoUnavailableError extends Error {
|
|
47
|
+
constructor(videoId) {
|
|
48
|
+
super(`The video with ID "${videoId}" is no longer available or has been removed. Please check the video URL or ID and try again.`);
|
|
49
|
+
this.name = 'YoutubeTranscriptVideoUnavailableError';
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
class YoutubeTranscriptDisabledError extends Error {
|
|
53
|
+
constructor(videoId) {
|
|
54
|
+
super(`Transcripts are disabled for the video with ID "${videoId}". This may be due to the video owner disabling captions or the video not supporting transcripts.`);
|
|
55
|
+
this.name = 'YoutubeTranscriptDisabledError';
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
class YoutubeTranscriptNotAvailableError extends Error {
|
|
59
|
+
constructor(videoId) {
|
|
60
|
+
super(`No transcripts are available for the video with ID "${videoId}". This may be because the video does not have captions or the captions are not accessible.`);
|
|
61
|
+
this.name = 'YoutubeTranscriptNotAvailableError';
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
class YoutubeTranscriptNotAvailableLanguageError extends Error {
|
|
65
|
+
constructor(lang, availableLangs, videoId) {
|
|
66
|
+
super(`No transcripts are available in "${lang}" for the video with ID "${videoId}". Available languages: ${availableLangs.join(', ')}. Please try a different language.`);
|
|
67
|
+
this.name = 'YoutubeTranscriptNotAvailableLanguageError';
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
class YoutubeTranscriptInvalidVideoIdError extends Error {
|
|
71
|
+
constructor() {
|
|
72
|
+
super('Invalid YouTube video ID or URL. Please provide a valid video ID or URL. Example: "dQw4w9WgXcQ" or "https://www.youtube.com/watch?v=dQw4w9WgXcQ".');
|
|
73
|
+
this.name = 'YoutubeTranscriptInvalidVideoIdError';
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
function retrieveVideoId(videoId) {
|
|
78
|
+
if (videoId.length === 11) {
|
|
79
|
+
return videoId;
|
|
80
|
+
}
|
|
81
|
+
const matchId = videoId.match(RE_YOUTUBE);
|
|
82
|
+
if (matchId && matchId.length) {
|
|
83
|
+
return matchId[1];
|
|
84
|
+
}
|
|
85
|
+
throw new YoutubeTranscriptInvalidVideoIdError();
|
|
86
|
+
}
|
|
87
|
+
function defaultFetch(_a) {
|
|
88
|
+
return __awaiter(this, arguments, undefined, function* ({ url, lang, userAgent, }) {
|
|
89
|
+
return fetch(url, {
|
|
90
|
+
headers: Object.assign(Object.assign({}, (lang && { 'Accept-Language': lang })), { 'User-Agent': userAgent || DEFAULT_USER_AGENT }),
|
|
91
|
+
});
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
class FsCache {
|
|
96
|
+
constructor(cacheDir = './cache', defaultTTL = 3600000) {
|
|
97
|
+
this.cacheDir = cacheDir;
|
|
98
|
+
this.defaultTTL = defaultTTL;
|
|
99
|
+
fs.mkdir(cacheDir, { recursive: true }).catch(() => { });
|
|
100
|
+
}
|
|
101
|
+
get(key) {
|
|
102
|
+
return __awaiter(this, undefined, undefined, function* () {
|
|
103
|
+
const filePath = path.join(this.cacheDir, key);
|
|
104
|
+
try {
|
|
105
|
+
const data = yield fs.readFile(filePath, 'utf-8');
|
|
106
|
+
const { value, expires } = JSON.parse(data);
|
|
107
|
+
if (expires > Date.now()) {
|
|
108
|
+
return value;
|
|
109
|
+
}
|
|
110
|
+
yield fs.unlink(filePath);
|
|
111
|
+
}
|
|
112
|
+
catch (error) { }
|
|
113
|
+
return null;
|
|
114
|
+
});
|
|
115
|
+
}
|
|
116
|
+
set(key, value, ttl) {
|
|
117
|
+
return __awaiter(this, undefined, undefined, function* () {
|
|
118
|
+
const filePath = path.join(this.cacheDir, key);
|
|
119
|
+
const expires = Date.now() + (ttl !== null && ttl !== undefined ? ttl : this.defaultTTL);
|
|
120
|
+
yield fs.writeFile(filePath, JSON.stringify({ value, expires }), 'utf-8');
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
class InMemoryCache {
|
|
126
|
+
constructor(defaultTTL = 3600000) {
|
|
127
|
+
this.cache = new Map();
|
|
128
|
+
// 1 hour default TTL
|
|
129
|
+
this.defaultTTL = defaultTTL;
|
|
130
|
+
}
|
|
131
|
+
get(key) {
|
|
132
|
+
return __awaiter(this, undefined, undefined, function* () {
|
|
133
|
+
const entry = this.cache.get(key);
|
|
134
|
+
if (entry && entry.expires > Date.now()) {
|
|
135
|
+
return entry.value;
|
|
136
|
+
}
|
|
137
|
+
this.cache.delete(key); // Clean up expired entries
|
|
138
|
+
return null;
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
set(key, value, ttl) {
|
|
142
|
+
return __awaiter(this, undefined, undefined, function* () {
|
|
143
|
+
const expires = Date.now() + (ttl !== null && ttl !== undefined ? ttl : this.defaultTTL);
|
|
144
|
+
this.cache.set(key, { value, expires });
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
class YoutubeTranscript {
|
|
150
|
+
constructor(config) {
|
|
151
|
+
this.config = config;
|
|
152
|
+
}
|
|
153
|
+
fetchTranscript(videoId) {
|
|
154
|
+
return __awaiter(this, undefined, undefined, function* () {
|
|
155
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m;
|
|
156
|
+
const identifier = retrieveVideoId(videoId);
|
|
157
|
+
const userAgent = ((_a = this.config) === null || _a === undefined ? undefined : _a.userAgent) || DEFAULT_USER_AGENT;
|
|
158
|
+
// Use custom fetch functions if provided, otherwise use defaultFetch
|
|
159
|
+
const videoFetch = ((_b = this.config) === null || _b === undefined ? undefined : _b.videoFetch) || defaultFetch;
|
|
160
|
+
const transcriptFetch = ((_c = this.config) === null || _c === undefined ? undefined : _c.transcriptFetch) || defaultFetch;
|
|
161
|
+
// Cache key based on video ID and language
|
|
162
|
+
const cacheKey = `transcript:${identifier}:${((_d = this.config) === null || _d === undefined ? undefined : _d.lang) || 'default'}`;
|
|
163
|
+
// Check cache first
|
|
164
|
+
if ((_e = this.config) === null || _e === undefined ? undefined : _e.cache) {
|
|
165
|
+
const cachedTranscript = yield this.config.cache.get(cacheKey);
|
|
166
|
+
if (cachedTranscript) {
|
|
167
|
+
return JSON.parse(cachedTranscript);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// Fetch the video page
|
|
171
|
+
const videoPageResponse = yield videoFetch({
|
|
172
|
+
url: `https://www.youtube.com/watch?v=${identifier}`,
|
|
173
|
+
lang: (_f = this.config) === null || _f === undefined ? undefined : _f.lang,
|
|
174
|
+
userAgent,
|
|
175
|
+
});
|
|
176
|
+
if (!videoPageResponse.ok) {
|
|
177
|
+
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
178
|
+
}
|
|
179
|
+
const videoPageBody = yield videoPageResponse.text();
|
|
180
|
+
// Parse the video page to extract captions
|
|
181
|
+
const splittedHTML = videoPageBody.split('"captions":');
|
|
182
|
+
if (splittedHTML.length <= 1) {
|
|
183
|
+
if (videoPageBody.includes('class="g-recaptcha"')) {
|
|
184
|
+
throw new YoutubeTranscriptTooManyRequestError();
|
|
185
|
+
}
|
|
186
|
+
if (!videoPageBody.includes('"playabilityStatus":')) {
|
|
187
|
+
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
188
|
+
}
|
|
189
|
+
throw new YoutubeTranscriptDisabledError(identifier);
|
|
190
|
+
}
|
|
191
|
+
const captions = (_g = (() => {
|
|
192
|
+
try {
|
|
193
|
+
return JSON.parse(splittedHTML[1].split(',"videoDetails')[0].replace('\n', ''));
|
|
194
|
+
}
|
|
195
|
+
catch (e) {
|
|
196
|
+
return undefined;
|
|
197
|
+
}
|
|
198
|
+
})()) === null || _g === undefined ? undefined : _g['playerCaptionsTracklistRenderer'];
|
|
199
|
+
if (!captions) {
|
|
200
|
+
throw new YoutubeTranscriptDisabledError(identifier);
|
|
201
|
+
}
|
|
202
|
+
if (!('captionTracks' in captions)) {
|
|
203
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
204
|
+
}
|
|
205
|
+
if (((_h = this.config) === null || _h === undefined ? undefined : _h.lang) &&
|
|
206
|
+
!captions.captionTracks.some((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === undefined ? undefined : _a.lang); })) {
|
|
207
|
+
throw new YoutubeTranscriptNotAvailableLanguageError((_j = this.config) === null || _j === undefined ? undefined : _j.lang, captions.captionTracks.map((track) => track.languageCode), identifier);
|
|
208
|
+
}
|
|
209
|
+
const transcriptURL = (((_k = this.config) === null || _k === undefined ? undefined : _k.lang)
|
|
210
|
+
? captions.captionTracks.find((track) => { var _a; return track.languageCode === ((_a = this.config) === null || _a === undefined ? undefined : _a.lang); })
|
|
211
|
+
: captions.captionTracks[0]).baseUrl;
|
|
212
|
+
// Fetch the transcript
|
|
213
|
+
const transcriptResponse = yield transcriptFetch({
|
|
214
|
+
url: transcriptURL,
|
|
215
|
+
lang: (_l = this.config) === null || _l === undefined ? undefined : _l.lang,
|
|
216
|
+
userAgent,
|
|
217
|
+
});
|
|
218
|
+
if (!transcriptResponse.ok) {
|
|
219
|
+
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
220
|
+
}
|
|
221
|
+
const transcriptBody = yield transcriptResponse.text();
|
|
222
|
+
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
|
|
223
|
+
const transcript = results.map((result) => {
|
|
224
|
+
var _a, _b;
|
|
225
|
+
return ({
|
|
226
|
+
text: result[3],
|
|
227
|
+
duration: parseFloat(result[2]),
|
|
228
|
+
offset: parseFloat(result[1]),
|
|
229
|
+
lang: (_b = (_a = this.config) === null || _a === undefined ? undefined : _a.lang) !== null && _b !== undefined ? _b : captions.captionTracks[0].languageCode,
|
|
230
|
+
});
|
|
231
|
+
});
|
|
232
|
+
// Store in cache if a strategy is provided
|
|
233
|
+
if ((_m = this.config) === null || _m === undefined ? undefined : _m.cache) {
|
|
234
|
+
yield this.config.cache.set(cacheKey, JSON.stringify(transcript), this.config.cacheTTL);
|
|
235
|
+
}
|
|
236
|
+
return transcript;
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
// Add static method for new usage pattern
|
|
240
|
+
static fetchTranscript(videoId, config) {
|
|
241
|
+
return __awaiter(this, undefined, undefined, function* () {
|
|
242
|
+
const instance = new YoutubeTranscript(config);
|
|
243
|
+
return instance.fetchTranscript(videoId);
|
|
244
|
+
});
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// Export the static method directly for convenience
|
|
248
|
+
const fetchTranscript = YoutubeTranscript.fetchTranscript;
|
|
249
|
+
|
|
250
|
+
export { FsCache, InMemoryCache, YoutubeTranscript, fetchTranscript };
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "youtube-transcript-plus",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "Fetch transcript from a youtube video",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/youtube-transcript-plus.js",
|
|
7
|
+
"module": "dist/youtube-transcript-plus.js",
|
|
8
|
+
"types": "dist/index.d.ts",
|
|
9
|
+
"scripts": {
|
|
10
|
+
"build": "rollup -c",
|
|
11
|
+
"format": "prettier --write 'src/**/*.ts'",
|
|
12
|
+
"test": "jest",
|
|
13
|
+
"test:watch": "jest --watch",
|
|
14
|
+
"prepare": "husky install"
|
|
15
|
+
},
|
|
16
|
+
"author": "ericmmartin",
|
|
17
|
+
"keywords": [
|
|
18
|
+
"youtube",
|
|
19
|
+
"transcript"
|
|
20
|
+
],
|
|
21
|
+
"license": "MIT",
|
|
22
|
+
"lint-staged": {
|
|
23
|
+
"src/**/*.ts": [
|
|
24
|
+
"eslint --fix",
|
|
25
|
+
"prettier --write"
|
|
26
|
+
],
|
|
27
|
+
"*.{js,json,md}": [
|
|
28
|
+
"prettier --write"
|
|
29
|
+
]
|
|
30
|
+
},
|
|
31
|
+
"devDependencies": {
|
|
32
|
+
"@types/jest": "^29.5.14",
|
|
33
|
+
"https-proxy-agent": "^7.0.6",
|
|
34
|
+
"husky": "^9.1.7",
|
|
35
|
+
"jest": "^29.7.0",
|
|
36
|
+
"lint-staged": "^15.3.0",
|
|
37
|
+
"prettier": "^3.4.2",
|
|
38
|
+
"rollup": "^4.30.1",
|
|
39
|
+
"rollup-plugin-typescript": "^1.0.1",
|
|
40
|
+
"rollup-plugin-typescript2": "^0.36.0",
|
|
41
|
+
"ts-jest": "^29.2.5",
|
|
42
|
+
"tslib": "^2.8.1",
|
|
43
|
+
"typescript": "^5.7.3"
|
|
44
|
+
},
|
|
45
|
+
"files": [
|
|
46
|
+
"dist/*"
|
|
47
|
+
],
|
|
48
|
+
"repository": "https://github.com/ericmmartin/youtube-transcript.git",
|
|
49
|
+
"publishConfig": {
|
|
50
|
+
"access": "public"
|
|
51
|
+
},
|
|
52
|
+
"homepage": "https://github.com/ericmmartin/youtube-transcript",
|
|
53
|
+
"engines": {
|
|
54
|
+
"node": ">=18.0.0"
|
|
55
|
+
}
|
|
56
|
+
}
|