youtube-transcript-api-ts 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +98 -0
- package/dist/Transcript.d.ts +17 -0
- package/dist/Transcript.js +81 -0
- package/dist/TranscriptList.d.ts +16 -0
- package/dist/TranscriptList.js +39 -0
- package/dist/TranscriptListFetcher.d.ts +11 -0
- package/dist/TranscriptListFetcher.js +77 -0
- package/dist/constants.d.ts +8 -0
- package/dist/constants.js +11 -0
- package/dist/errors.d.ts +38 -0
- package/dist/errors.js +77 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +37 -0
- package/dist/types.d.ts +9 -0
- package/dist/types.js +2 -0
- package/package.json +45 -0
package/README.md
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# youtube-transcript-api-ts
|
|
2
|
+
|
|
3
|
+
A simple, dependency-free (except axios) TypeScript library to fetch transcripts/captions for YouTube videos. This is a port of the popular Python [youtube-transcript-api](https://github.com/jdepoix/youtube-transcript-api).
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Fetch Transcripts**: Retrieve the full transcript of a YouTube video.
|
|
8
|
+
- **Support for Auto-Generated Captions**: Works with both manually created and auto-generated (ASR) captions.
|
|
9
|
+
- **Language Selection**: Automatically selects the most appropriate language (defaults to English).
|
|
10
|
+
- **Text Format**: Option to retrieve the transcript as a single block of text or as a list of timestamped objects.
|
|
11
|
+
- **No API Key Required**: Does not require a Google/YouTube Data API key.
|
|
12
|
+
|
|
13
|
+
## Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install youtube-transcript-api-ts
|
|
17
|
+
# or
|
|
18
|
+
yarn add youtube-transcript-api-ts
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
### Basic Usage (Get JSON Objects)
|
|
24
|
+
|
|
25
|
+
By default, the library returns an array of objects containing the text, duration, and offset.
|
|
26
|
+
|
|
27
|
+
```typescript
|
|
28
|
+
import { YoutubeTranscript } from 'youtube-transcript-ts';
|
|
29
|
+
|
|
30
|
+
async function main() {
|
|
31
|
+
try {
|
|
32
|
+
const transcript = await YoutubeTranscript.fetchTranscript('dQw4w9WgXcQ');
|
|
33
|
+
|
|
34
|
+
console.log(transcript);
|
|
35
|
+
/* Output:
|
|
36
|
+
[
|
|
37
|
+
{ text: 'Never gonna give you up', duration: 2.1, offset: 0.5, lang: 'en' },
|
|
38
|
+
{ text: 'Never gonna let you down', duration: 1.5, offset: 2.6, lang: 'en' },
|
|
39
|
+
...
|
|
40
|
+
]
|
|
41
|
+
*/
|
|
42
|
+
} catch (e) {
|
|
43
|
+
console.error('Error fetching transcript:', e);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
main();
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Get Transcript as Text
|
|
51
|
+
|
|
52
|
+
If you just want the full text of the video without timestamps:
|
|
53
|
+
|
|
54
|
+
```typescript
|
|
55
|
+
const text = await YoutubeTranscript.fetchTranscript('dQw4w9WgXcQ', { format: 'text' });
|
|
56
|
+
console.log(text);
|
|
57
|
+
// Output: "Never gonna give you up Never gonna let you down ..."
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Configuration Options
|
|
61
|
+
|
|
62
|
+
You can pass a configuration object as the second argument:
|
|
63
|
+
|
|
64
|
+
```typescript
|
|
65
|
+
YoutubeTranscript.fetchTranscript('video_id', {
|
|
66
|
+
lang: 'de', // Priority language (default: 'en')
|
|
67
|
+
country: 'DE', // Optional country code
|
|
68
|
+
format: 'text' // Optional: 'object' (default) or 'text'
|
|
69
|
+
});
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Error Handling
|
|
73
|
+
|
|
74
|
+
The library throws specific errors for various failure cases. You can import these error classes to handle them:
|
|
75
|
+
|
|
76
|
+
```typescript
|
|
77
|
+
import {
|
|
78
|
+
YoutubeTranscript,
|
|
79
|
+
VideoUnavailable,
|
|
80
|
+
TranscriptsDisabled
|
|
81
|
+
} from 'youtube-transcript-ts';
|
|
82
|
+
|
|
83
|
+
try {
|
|
84
|
+
await YoutubeTranscript.fetchTranscript('invalid-id');
|
|
85
|
+
} catch (e) {
|
|
86
|
+
if (e instanceof VideoUnavailable) {
|
|
87
|
+
console.error('Video is unavailable');
|
|
88
|
+
} else if (e instanceof TranscriptsDisabled) {
|
|
89
|
+
console.error('Transcripts are disabled for this video');
|
|
90
|
+
} else {
|
|
91
|
+
console.error('Unknown error:', e);
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## License
|
|
97
|
+
|
|
98
|
+
MIT
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { TranscriptResponse } from './types';
|
|
2
|
+
export declare class Transcript {
|
|
3
|
+
private videoId;
|
|
4
|
+
private url;
|
|
5
|
+
language: string;
|
|
6
|
+
languageCode: string;
|
|
7
|
+
isGenerated: boolean;
|
|
8
|
+
translationLanguages: {
|
|
9
|
+
language: string;
|
|
10
|
+
languageCode: string;
|
|
11
|
+
}[];
|
|
12
|
+
constructor(videoId: string, url: string, language: string, languageCode: string, isGenerated: boolean, translationLanguages: {
|
|
13
|
+
language: string;
|
|
14
|
+
languageCode: string;
|
|
15
|
+
}[]);
|
|
16
|
+
fetch(preserveFormatting?: boolean): Promise<TranscriptResponse[]>;
|
|
17
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.Transcript = void 0;
|
|
40
|
+
const axios_1 = __importDefault(require("axios"));
|
|
41
|
+
const fast_xml_parser_1 = require("fast-xml-parser");
|
|
42
|
+
const he = __importStar(require("he"));
|
|
43
|
+
class Transcript {
|
|
44
|
+
constructor(videoId, url, language, languageCode, isGenerated, translationLanguages) {
|
|
45
|
+
this.videoId = videoId;
|
|
46
|
+
this.url = url;
|
|
47
|
+
this.language = language;
|
|
48
|
+
this.languageCode = languageCode;
|
|
49
|
+
this.isGenerated = isGenerated;
|
|
50
|
+
this.translationLanguages = translationLanguages;
|
|
51
|
+
}
|
|
52
|
+
async fetch(preserveFormatting = false) {
|
|
53
|
+
const response = await axios_1.default.get(this.url);
|
|
54
|
+
const parser = new fast_xml_parser_1.XMLParser({
|
|
55
|
+
ignoreAttributes: false,
|
|
56
|
+
attributeNamePrefix: '',
|
|
57
|
+
});
|
|
58
|
+
const xml = parser.parse(response.data);
|
|
59
|
+
if (!xml.transcript || !xml.transcript.text) {
|
|
60
|
+
return [];
|
|
61
|
+
}
|
|
62
|
+
// Ensure it's an array even if single item
|
|
63
|
+
const textElements = Array.isArray(xml.transcript.text) ? xml.transcript.text : [xml.transcript.text];
|
|
64
|
+
return textElements.map((item) => {
|
|
65
|
+
let text = item['#text'] || '';
|
|
66
|
+
if (!preserveFormatting) {
|
|
67
|
+
// Remove HTML tags
|
|
68
|
+
text = text.replace(/<[^>]*>/g, '');
|
|
69
|
+
}
|
|
70
|
+
// Decode HTML entities
|
|
71
|
+
text = he.decode(text);
|
|
72
|
+
return {
|
|
73
|
+
text,
|
|
74
|
+
duration: parseFloat(item.dur) || 0.0,
|
|
75
|
+
offset: parseFloat(item.start) || 0.0,
|
|
76
|
+
lang: this.languageCode
|
|
77
|
+
};
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
exports.Transcript = Transcript;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import { Transcript } from './Transcript';
|
|
2
|
+
export declare class TranscriptList {
|
|
3
|
+
private videoId;
|
|
4
|
+
private manuallyCreatedTranscripts;
|
|
5
|
+
private generatedTranscripts;
|
|
6
|
+
constructor(videoId: string, manuallyCreatedTranscripts: {
|
|
7
|
+
[key: string]: Transcript;
|
|
8
|
+
}, generatedTranscripts: {
|
|
9
|
+
[key: string]: Transcript;
|
|
10
|
+
});
|
|
11
|
+
findTranscript(languageCodes: string[]): Transcript;
|
|
12
|
+
findGeneratedTranscript(languageCodes: string[]): Transcript;
|
|
13
|
+
findManuallyCreatedTranscript(languageCodes: string[]): Transcript;
|
|
14
|
+
private _findTranscript;
|
|
15
|
+
[Symbol.iterator](): Generator<Transcript, void, unknown>;
|
|
16
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TranscriptList = void 0;
|
|
4
|
+
const errors_1 = require("./errors");
|
|
5
|
+
class TranscriptList {
|
|
6
|
+
constructor(videoId, manuallyCreatedTranscripts, generatedTranscripts) {
|
|
7
|
+
this.videoId = videoId;
|
|
8
|
+
this.manuallyCreatedTranscripts = manuallyCreatedTranscripts;
|
|
9
|
+
this.generatedTranscripts = generatedTranscripts;
|
|
10
|
+
}
|
|
11
|
+
findTranscript(languageCodes) {
|
|
12
|
+
return this._findTranscript(languageCodes, [this.manuallyCreatedTranscripts, this.generatedTranscripts]);
|
|
13
|
+
}
|
|
14
|
+
findGeneratedTranscript(languageCodes) {
|
|
15
|
+
return this._findTranscript(languageCodes, [this.generatedTranscripts]);
|
|
16
|
+
}
|
|
17
|
+
findManuallyCreatedTranscript(languageCodes) {
|
|
18
|
+
return this._findTranscript(languageCodes, [this.manuallyCreatedTranscripts]);
|
|
19
|
+
}
|
|
20
|
+
_findTranscript(languageCodes, transcriptDicts) {
|
|
21
|
+
for (const lang of languageCodes) {
|
|
22
|
+
for (const transcriptDict of transcriptDicts) {
|
|
23
|
+
if (lang in transcriptDict) {
|
|
24
|
+
return transcriptDict[lang];
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
// If unable to find exact match
|
|
29
|
+
throw new errors_1.NoTranscriptFound(this.videoId, languageCodes);
|
|
30
|
+
}
|
|
31
|
+
// Iterate over all transcripts
|
|
32
|
+
*[Symbol.iterator]() {
|
|
33
|
+
for (const t of Object.values(this.manuallyCreatedTranscripts))
|
|
34
|
+
yield t;
|
|
35
|
+
for (const t of Object.values(this.generatedTranscripts))
|
|
36
|
+
yield t;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.TranscriptList = TranscriptList;
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { AxiosInstance } from 'axios';
|
|
2
|
+
import { TranscriptList } from './TranscriptList';
|
|
3
|
+
export declare class TranscriptListFetcher {
|
|
4
|
+
private http_client;
|
|
5
|
+
constructor(http_client: AxiosInstance);
|
|
6
|
+
fetch(videoId: string): Promise<TranscriptList>;
|
|
7
|
+
private _fetchVideoHtml;
|
|
8
|
+
private _fetchCaptionsJson;
|
|
9
|
+
private _extractInnertubeApiKey;
|
|
10
|
+
private _buildTranscriptList;
|
|
11
|
+
}
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TranscriptListFetcher = void 0;
|
|
4
|
+
const TranscriptList_1 = require("./TranscriptList");
|
|
5
|
+
const Transcript_1 = require("./Transcript");
|
|
6
|
+
const constants_1 = require("./constants");
|
|
7
|
+
const errors_1 = require("./errors");
|
|
8
|
+
class TranscriptListFetcher {
|
|
9
|
+
constructor(http_client) {
|
|
10
|
+
this.http_client = http_client;
|
|
11
|
+
}
|
|
12
|
+
async fetch(videoId) {
|
|
13
|
+
const html = await this._fetchVideoHtml(videoId);
|
|
14
|
+
const captionsJson = await this._fetchCaptionsJson(videoId, html);
|
|
15
|
+
return this._buildTranscriptList(videoId, captionsJson);
|
|
16
|
+
}
|
|
17
|
+
async _fetchVideoHtml(videoId) {
|
|
18
|
+
try {
|
|
19
|
+
const response = await this.http_client.get(constants_1.WATCH_URL.replace('{videoId}', videoId));
|
|
20
|
+
return response.data;
|
|
21
|
+
}
|
|
22
|
+
catch (e) {
|
|
23
|
+
if (e.response && e.response.status === 429) {
|
|
24
|
+
throw new errors_1.TooManyRequests(videoId);
|
|
25
|
+
}
|
|
26
|
+
throw new errors_1.YouTubeRequestFailed(videoId, e);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
async _fetchCaptionsJson(videoId, html) {
|
|
30
|
+
var _a;
|
|
31
|
+
const apiKey = this._extractInnertubeApiKey(html);
|
|
32
|
+
// Check if playability status allows (simulated)
|
|
33
|
+
if (html.includes('class="g-recaptcha"')) {
|
|
34
|
+
throw new errors_1.TooManyRequests(videoId);
|
|
35
|
+
}
|
|
36
|
+
if (!apiKey) {
|
|
37
|
+
throw new errors_1.VideoUnavailable(videoId);
|
|
38
|
+
}
|
|
39
|
+
const response = await this.http_client.post(constants_1.INNERTUBE_API_URL.replace('{apiKey}', apiKey), {
|
|
40
|
+
context: constants_1.INNERTUBE_CONTEXT,
|
|
41
|
+
videoId: videoId,
|
|
42
|
+
});
|
|
43
|
+
const data = response.data;
|
|
44
|
+
if (!data.playabilityStatus || data.playabilityStatus.status !== 'OK') {
|
|
45
|
+
if (data.playabilityStatus.reason) {
|
|
46
|
+
throw new errors_1.VideoUnavailable(videoId);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
const captions = (_a = data.captions) === null || _a === void 0 ? void 0 : _a.playerCaptionsTracklistRenderer;
|
|
50
|
+
if (!captions) {
|
|
51
|
+
throw new errors_1.TranscriptsDisabled(videoId);
|
|
52
|
+
}
|
|
53
|
+
return captions;
|
|
54
|
+
}
|
|
55
|
+
_extractInnertubeApiKey(html) {
|
|
56
|
+
const match = html.match(/"INNERTUBE_API_KEY":\s*"([a-zA-Z0-9_-]+)"/);
|
|
57
|
+
return match ? match[1] : null;
|
|
58
|
+
}
|
|
59
|
+
_buildTranscriptList(videoId, captionsJson) {
|
|
60
|
+
const translationLanguages = (captionsJson.translationLanguages || []).map((lang) => ({
|
|
61
|
+
language: lang.languageName.runs[0].text,
|
|
62
|
+
languageCode: lang.languageCode,
|
|
63
|
+
}));
|
|
64
|
+
const manuallyCreatedTranscripts = {};
|
|
65
|
+
const generatedTranscripts = {};
|
|
66
|
+
for (const caption of captionsJson.captionTracks || []) {
|
|
67
|
+
const isGenerated = caption.kind === 'asr';
|
|
68
|
+
const transcriptDict = isGenerated
|
|
69
|
+
? generatedTranscripts
|
|
70
|
+
: manuallyCreatedTranscripts;
|
|
71
|
+
const languageCode = caption.languageCode;
|
|
72
|
+
transcriptDict[languageCode] = new Transcript_1.Transcript(videoId, caption.baseUrl.replace(/&fmt=srv3/, ''), caption.name.runs[0].text, languageCode, isGenerated, caption.isTranslatable ? translationLanguages : []);
|
|
73
|
+
}
|
|
74
|
+
return new TranscriptList_1.TranscriptList(videoId, manuallyCreatedTranscripts, generatedTranscripts);
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.TranscriptListFetcher = TranscriptListFetcher;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
export declare const WATCH_URL = "https://www.youtube.com/watch?v={videoId}";
|
|
2
|
+
export declare const INNERTUBE_API_URL = "https://www.youtube.com/youtubei/v1/player?key={apiKey}";
|
|
3
|
+
export declare const INNERTUBE_CONTEXT: {
|
|
4
|
+
client: {
|
|
5
|
+
clientName: string;
|
|
6
|
+
clientVersion: string;
|
|
7
|
+
};
|
|
8
|
+
};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.INNERTUBE_CONTEXT = exports.INNERTUBE_API_URL = exports.WATCH_URL = void 0;
|
|
4
|
+
exports.WATCH_URL = 'https://www.youtube.com/watch?v={videoId}';
|
|
5
|
+
exports.INNERTUBE_API_URL = 'https://www.youtube.com/youtubei/v1/player?key={apiKey}';
|
|
6
|
+
exports.INNERTUBE_CONTEXT = {
|
|
7
|
+
client: {
|
|
8
|
+
clientName: 'ANDROID',
|
|
9
|
+
clientVersion: '20.10.38',
|
|
10
|
+
},
|
|
11
|
+
};
|
package/dist/errors.d.ts
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export declare class YouTubeTranscriptError extends Error {
|
|
2
|
+
message: string;
|
|
3
|
+
constructor(message: string);
|
|
4
|
+
}
|
|
5
|
+
export declare class VideoUnavailable extends YouTubeTranscriptError {
|
|
6
|
+
videoId: string;
|
|
7
|
+
constructor(videoId: string);
|
|
8
|
+
}
|
|
9
|
+
export declare class YouTubeRequestFailed extends YouTubeTranscriptError {
|
|
10
|
+
videoId: string;
|
|
11
|
+
cause: any;
|
|
12
|
+
constructor(videoId: string, cause: any);
|
|
13
|
+
}
|
|
14
|
+
export declare class TranscriptsDisabled extends YouTubeTranscriptError {
|
|
15
|
+
videoId: string;
|
|
16
|
+
constructor(videoId: string);
|
|
17
|
+
}
|
|
18
|
+
export declare class NoTranscriptFound extends YouTubeTranscriptError {
|
|
19
|
+
videoId: string;
|
|
20
|
+
languages: string[];
|
|
21
|
+
constructor(videoId: string, languages: string[]);
|
|
22
|
+
}
|
|
23
|
+
export declare class NotTranslatable extends YouTubeTranscriptError {
|
|
24
|
+
videoId: string;
|
|
25
|
+
constructor(videoId: string);
|
|
26
|
+
}
|
|
27
|
+
export declare class TranslationLanguageNotAvailable extends YouTubeTranscriptError {
|
|
28
|
+
videoId: string;
|
|
29
|
+
constructor(videoId: string);
|
|
30
|
+
}
|
|
31
|
+
export declare class InvalidVideoId extends YouTubeTranscriptError {
|
|
32
|
+
videoId: string;
|
|
33
|
+
constructor(videoId: string);
|
|
34
|
+
}
|
|
35
|
+
export declare class TooManyRequests extends YouTubeTranscriptError {
|
|
36
|
+
videoId: string;
|
|
37
|
+
constructor(videoId: string);
|
|
38
|
+
}
|
package/dist/errors.js
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.TooManyRequests = exports.InvalidVideoId = exports.TranslationLanguageNotAvailable = exports.NotTranslatable = exports.NoTranscriptFound = exports.TranscriptsDisabled = exports.YouTubeRequestFailed = exports.VideoUnavailable = exports.YouTubeTranscriptError = void 0;
|
|
4
|
+
class YouTubeTranscriptError extends Error {
|
|
5
|
+
constructor(message) {
|
|
6
|
+
super(message);
|
|
7
|
+
this.message = message;
|
|
8
|
+
this.name = 'YouTubeTranscriptError';
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
exports.YouTubeTranscriptError = YouTubeTranscriptError;
|
|
12
|
+
class VideoUnavailable extends YouTubeTranscriptError {
|
|
13
|
+
constructor(videoId) {
|
|
14
|
+
super(`The video ${videoId} is unavailable`);
|
|
15
|
+
this.videoId = videoId;
|
|
16
|
+
this.name = 'VideoUnavailable';
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
exports.VideoUnavailable = VideoUnavailable;
|
|
20
|
+
class YouTubeRequestFailed extends YouTubeTranscriptError {
|
|
21
|
+
constructor(videoId, cause) {
|
|
22
|
+
super(`Request failed for video ${videoId}: ${cause.message}`);
|
|
23
|
+
this.videoId = videoId;
|
|
24
|
+
this.cause = cause;
|
|
25
|
+
this.name = 'YouTubeRequestFailed';
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
exports.YouTubeRequestFailed = YouTubeRequestFailed;
|
|
29
|
+
class TranscriptsDisabled extends YouTubeTranscriptError {
|
|
30
|
+
constructor(videoId) {
|
|
31
|
+
super(`Transcripts are disabled for video ${videoId}`);
|
|
32
|
+
this.videoId = videoId;
|
|
33
|
+
this.name = 'TranscriptsDisabled';
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
exports.TranscriptsDisabled = TranscriptsDisabled;
|
|
37
|
+
class NoTranscriptFound extends YouTubeTranscriptError {
|
|
38
|
+
constructor(videoId, languages) {
|
|
39
|
+
super(`No transcript found for video ${videoId} in languages: ${languages.join(', ')}`);
|
|
40
|
+
this.videoId = videoId;
|
|
41
|
+
this.languages = languages;
|
|
42
|
+
this.name = 'NoTranscriptFound';
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
exports.NoTranscriptFound = NoTranscriptFound;
|
|
46
|
+
class NotTranslatable extends YouTubeTranscriptError {
|
|
47
|
+
constructor(videoId) {
|
|
48
|
+
super(`Video ${videoId} is not translatable`);
|
|
49
|
+
this.videoId = videoId;
|
|
50
|
+
this.name = 'NotTranslatable';
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
exports.NotTranslatable = NotTranslatable;
|
|
54
|
+
class TranslationLanguageNotAvailable extends YouTubeTranscriptError {
|
|
55
|
+
constructor(videoId) {
|
|
56
|
+
super(`Translation language not available for video ${videoId}`);
|
|
57
|
+
this.videoId = videoId;
|
|
58
|
+
this.name = 'TranslationLanguageNotAvailable';
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
exports.TranslationLanguageNotAvailable = TranslationLanguageNotAvailable;
|
|
62
|
+
class InvalidVideoId extends YouTubeTranscriptError {
|
|
63
|
+
constructor(videoId) {
|
|
64
|
+
super(`Invalid video id: ${videoId}`);
|
|
65
|
+
this.videoId = videoId;
|
|
66
|
+
this.name = 'InvalidVideoId';
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
exports.InvalidVideoId = InvalidVideoId;
|
|
70
|
+
class TooManyRequests extends YouTubeTranscriptError {
|
|
71
|
+
constructor(videoId) {
|
|
72
|
+
super(`YouTube is receiving too many requests from this IP and now requires solving a captcha to fetch the transcript for video ${videoId}`);
|
|
73
|
+
this.videoId = videoId;
|
|
74
|
+
this.name = 'TooManyRequests';
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.TooManyRequests = TooManyRequests;
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { TranscriptResponse } from './types';
|
|
2
|
+
export interface TranscriptConfig {
|
|
3
|
+
lang?: string;
|
|
4
|
+
country?: string;
|
|
5
|
+
}
|
|
6
|
+
export declare class YoutubeTranscript {
|
|
7
|
+
/**
|
|
8
|
+
* Fetch transcript as a list of objects (default).
|
|
9
|
+
*/
|
|
10
|
+
static fetchTranscript(videoId: string, config?: TranscriptConfig & {
|
|
11
|
+
format?: 'object';
|
|
12
|
+
}): Promise<TranscriptResponse[]>;
|
|
13
|
+
/**
|
|
14
|
+
* Fetch transcript as a single string.
|
|
15
|
+
*/
|
|
16
|
+
static fetchTranscript(videoId: string, config?: TranscriptConfig & {
|
|
17
|
+
format: 'text';
|
|
18
|
+
}): Promise<string>;
|
|
19
|
+
static fetchTranscriptList(videoId: string): Promise<import("./TranscriptList").TranscriptList>;
|
|
20
|
+
}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.YoutubeTranscript = void 0;
|
|
7
|
+
const axios_1 = __importDefault(require("axios"));
|
|
8
|
+
const TranscriptListFetcher_1 = require("./TranscriptListFetcher");
|
|
9
|
+
class YoutubeTranscript {
|
|
10
|
+
static async fetchTranscript(videoId, config) {
|
|
11
|
+
const httpClient = axios_1.default.create();
|
|
12
|
+
const fetcher = new TranscriptListFetcher_1.TranscriptListFetcher(httpClient);
|
|
13
|
+
const list = await fetcher.fetch(videoId);
|
|
14
|
+
// Default behavior: try to find 'en' or use the first available?
|
|
15
|
+
// The Python logic: find_transcript(['en']) is default.
|
|
16
|
+
// We will stick to 'en' as default or provided lang.
|
|
17
|
+
const langList = (config === null || config === void 0 ? void 0 : config.lang) ? [config.lang] : ['en'];
|
|
18
|
+
let transcriptItems;
|
|
19
|
+
try {
|
|
20
|
+
const transcript = list.findTranscript(langList);
|
|
21
|
+
transcriptItems = await transcript.fetch();
|
|
22
|
+
}
|
|
23
|
+
catch (e) {
|
|
24
|
+
throw e;
|
|
25
|
+
}
|
|
26
|
+
if ((config === null || config === void 0 ? void 0 : config.format) === 'text') {
|
|
27
|
+
return transcriptItems.map(t => t.text).join(' ');
|
|
28
|
+
}
|
|
29
|
+
return transcriptItems;
|
|
30
|
+
}
|
|
31
|
+
static async fetchTranscriptList(videoId) {
|
|
32
|
+
const httpClient = axios_1.default.create();
|
|
33
|
+
const fetcher = new TranscriptListFetcher_1.TranscriptListFetcher(httpClient);
|
|
34
|
+
return await fetcher.fetch(videoId);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
exports.YoutubeTranscript = YoutubeTranscript;
|
package/dist/types.d.ts
ADDED
package/dist/types.js
ADDED
package/package.json
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "youtube-transcript-api-ts",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "A lightweight TypeScript library to fetch YouTube video transcripts.",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"files": [
|
|
8
|
+
"dist",
|
|
9
|
+
"README.md"
|
|
10
|
+
],
|
|
11
|
+
"scripts": {
|
|
12
|
+
"build": "tsc",
|
|
13
|
+
"test": "ts-node test/test_fetch.ts",
|
|
14
|
+
"prepublishOnly": "npm run build"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"youtube",
|
|
18
|
+
"transcript",
|
|
19
|
+
"captions",
|
|
20
|
+
"subtitle",
|
|
21
|
+
"scraper",
|
|
22
|
+
"typescript"
|
|
23
|
+
],
|
|
24
|
+
"author": "",
|
|
25
|
+
"license": "MIT",
|
|
26
|
+
"repository": {
|
|
27
|
+
"type": "git",
|
|
28
|
+
"url": "git+https://github.com/Yogesh-G-3468/youtube-transcript-ts.git"
|
|
29
|
+
},
|
|
30
|
+
"bugs": {
|
|
31
|
+
"url": "https://github.com/Yogesh-G-3468/youtube-transcript-ts/issues"
|
|
32
|
+
},
|
|
33
|
+
"homepage": "https://github.com/Yogesh-G-3468/youtube-transcript-ts#readme",
|
|
34
|
+
"dependencies": {
|
|
35
|
+
"axios": "^1.13.5",
|
|
36
|
+
"fast-xml-parser": "^5.3.5",
|
|
37
|
+
"he": "^1.2.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/he": "^1.2.3",
|
|
41
|
+
"@types/node": "^25.2.2",
|
|
42
|
+
"ts-node": "^10.9.2",
|
|
43
|
+
"typescript": "^5.9.3"
|
|
44
|
+
}
|
|
45
|
+
}
|