@tacone/prosey 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -31
- package/bin/prosey +14317 -0
- package/package.json +5 -5
- package/src/cache.test.ts +92 -0
- package/src/cache.ts +48 -0
- package/src/config.test.ts +98 -0
- package/src/config.ts +93 -0
- package/src/debug.ts +13 -0
- package/src/default-config.toml +16 -0
- package/src/index.ts +146 -19
- package/src/summarize.test.ts +44 -0
- package/src/summarize.ts +51 -0
- package/bin/prosey.js +0 -604
package/src/summarize.ts
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { spawn } from "node:child_process";
|
|
2
|
+
|
|
3
|
+
export interface SummarizeOptions {
|
|
4
|
+
prompt: string;
|
|
5
|
+
command: string;
|
|
6
|
+
transcript: string;
|
|
7
|
+
cwd?: string;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function executeCommand(command: string, input: string, cwd?: string): Promise<string> {
|
|
11
|
+
return new Promise((resolve, reject) => {
|
|
12
|
+
const proc = spawn(command, [], { shell: true, stdio: "pipe", cwd });
|
|
13
|
+
|
|
14
|
+
let stdout = "";
|
|
15
|
+
let stderr = "";
|
|
16
|
+
|
|
17
|
+
proc.stdout!.on("data", (data: Buffer) => {
|
|
18
|
+
stdout += data.toString();
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
proc.stderr!.on("data", (data: Buffer) => {
|
|
22
|
+
stderr += data.toString();
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
proc.on("close", (code: number | null) => {
|
|
26
|
+
if (code === 0) resolve(stdout);
|
|
27
|
+
else reject(new Error(`Command exited with code ${code}: ${stderr}`));
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
proc.on("error", (err: Error) => reject(err));
|
|
31
|
+
|
|
32
|
+
proc.stdin!.write(input);
|
|
33
|
+
proc.stdin!.end();
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export async function summarize(options: SummarizeOptions): Promise<string> {
|
|
38
|
+
const { prompt, command, transcript, cwd } = options;
|
|
39
|
+
const fullPrompt = `${prompt}\n\n${transcript}`;
|
|
40
|
+
const output = await executeCommand(command, fullPrompt, cwd);
|
|
41
|
+
|
|
42
|
+
const cleaned = output.startsWith(fullPrompt)
|
|
43
|
+
? output.slice(fullPrompt.length).replace(/\n+$/, "")
|
|
44
|
+
: output.replace(/\n+$/, "");
|
|
45
|
+
|
|
46
|
+
if (!cleaned || cleaned === transcript) {
|
|
47
|
+
throw new Error("Summarization command returned no meaningful output");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return cleaned;
|
|
51
|
+
}
|
package/bin/prosey.js
DELETED
|
@@ -1,604 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
// src/index.ts
|
|
4
|
-
import { writeFile } from "node:fs/promises";
|
|
5
|
-
|
|
6
|
-
// node_modules/youtube-transcript-plus/dist/youtube-transcript-plus.mjs
|
|
7
|
-
function __awaiter(thisArg, _arguments, P, generator) {
|
|
8
|
-
function adopt(value) {
|
|
9
|
-
return value instanceof P ? value : new P(function(resolve) {
|
|
10
|
-
resolve(value);
|
|
11
|
-
});
|
|
12
|
-
}
|
|
13
|
-
return new (P || (P = Promise))(function(resolve, reject) {
|
|
14
|
-
function fulfilled(value) {
|
|
15
|
-
try {
|
|
16
|
-
step(generator.next(value));
|
|
17
|
-
} catch (e) {
|
|
18
|
-
reject(e);
|
|
19
|
-
}
|
|
20
|
-
}
|
|
21
|
-
function rejected(value) {
|
|
22
|
-
try {
|
|
23
|
-
step(generator["throw"](value));
|
|
24
|
-
} catch (e) {
|
|
25
|
-
reject(e);
|
|
26
|
-
}
|
|
27
|
-
}
|
|
28
|
-
function step(result) {
|
|
29
|
-
result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected);
|
|
30
|
-
}
|
|
31
|
-
step((generator = generator.apply(thisArg, _arguments || [])).next());
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
var DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36";
|
|
35
|
-
var RE_YOUTUBE = /(?:v=|\/|v\/|embed\/|watch\?.*v=|youtu\.be\/|\/v\/|e\/|watch\?.*vi?=|\/embed\/|\/v\/|vi?\/|watch\?.*vi?=|youtu\.be\/|\/vi?\/|\/e\/)([a-zA-Z0-9_-]{11})/i;
|
|
36
|
-
var RE_XML_TRANSCRIPT = /<text start="([^"]*)" dur="([^"]*)">([^<]*)<\/text>/g;
|
|
37
|
-
class YoutubeTranscriptTooManyRequestError extends Error {
|
|
38
|
-
constructor() {
|
|
39
|
-
super("YouTube is receiving too many requests from your IP address. Please try again later or use a proxy. If the issue persists, consider reducing the frequency of requests.");
|
|
40
|
-
this.name = "YoutubeTranscriptTooManyRequestError";
|
|
41
|
-
}
|
|
42
|
-
}
|
|
43
|
-
|
|
44
|
-
class YoutubeTranscriptVideoUnavailableError extends Error {
|
|
45
|
-
constructor(videoId) {
|
|
46
|
-
super(`The video with ID "${videoId}" is no longer available or has been removed. Please check the video URL or ID and try again.`);
|
|
47
|
-
this.name = "YoutubeTranscriptVideoUnavailableError";
|
|
48
|
-
this.videoId = videoId;
|
|
49
|
-
}
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
class YoutubeTranscriptDisabledError extends Error {
|
|
53
|
-
constructor(videoId) {
|
|
54
|
-
super(`Transcripts are disabled for the video with ID "${videoId}". This may be due to the video owner disabling captions or the video not supporting transcripts.`);
|
|
55
|
-
this.name = "YoutubeTranscriptDisabledError";
|
|
56
|
-
this.videoId = videoId;
|
|
57
|
-
}
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
class YoutubeTranscriptNotAvailableError extends Error {
|
|
61
|
-
constructor(videoId) {
|
|
62
|
-
super(`No transcripts are available for the video with ID "${videoId}". This may be because the video does not have captions or the captions are not accessible.`);
|
|
63
|
-
this.name = "YoutubeTranscriptNotAvailableError";
|
|
64
|
-
this.videoId = videoId;
|
|
65
|
-
}
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
class YoutubeTranscriptNotAvailableLanguageError extends Error {
|
|
69
|
-
constructor(lang, availableLangs, videoId) {
|
|
70
|
-
super(`No transcripts are available in "${lang}" for the video with ID "${videoId}". Available languages: ${availableLangs.join(", ")}. Please try a different language.`);
|
|
71
|
-
this.name = "YoutubeTranscriptNotAvailableLanguageError";
|
|
72
|
-
this.videoId = videoId;
|
|
73
|
-
this.lang = lang;
|
|
74
|
-
this.availableLangs = availableLangs;
|
|
75
|
-
}
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
class YoutubeTranscriptInvalidLangError extends Error {
|
|
79
|
-
constructor(lang) {
|
|
80
|
-
super(`Invalid language code "${lang}". Please provide a valid BCP 47 language code (e.g., "en", "fr", "pt-BR").`);
|
|
81
|
-
this.name = "YoutubeTranscriptInvalidLangError";
|
|
82
|
-
this.lang = lang;
|
|
83
|
-
}
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
class YoutubeTranscriptInvalidVideoIdError extends Error {
|
|
87
|
-
constructor() {
|
|
88
|
-
super('Invalid YouTube video ID or URL. Please provide a valid video ID or URL. Example: "dQw4w9WgXcQ" or "https://www.youtube.com/watch?v=dQw4w9WgXcQ".');
|
|
89
|
-
this.name = "YoutubeTranscriptInvalidVideoIdError";
|
|
90
|
-
}
|
|
91
|
-
}
|
|
92
|
-
var RE_VIDEO_ID = /^[a-zA-Z0-9_-]{11}$/;
|
|
93
|
-
var RE_BCP47_LANG = /^[a-zA-Z]{2,3}(-[a-zA-Z0-9]{2,8})*$/;
|
|
94
|
-
var XML_ENTITIES = {
|
|
95
|
-
"&": "&",
|
|
96
|
-
"<": "<",
|
|
97
|
-
">": ">",
|
|
98
|
-
""": '"',
|
|
99
|
-
"'": "'",
|
|
100
|
-
"'": "'"
|
|
101
|
-
};
|
|
102
|
-
var RE_XML_ENTITY = /&(?:amp|lt|gt|quot|apos|#39);/g;
|
|
103
|
-
function decodeXmlEntities(text) {
|
|
104
|
-
return text.replace(RE_XML_ENTITY, (match) => {
|
|
105
|
-
var _a;
|
|
106
|
-
return (_a = XML_ENTITIES[match]) !== null && _a !== undefined ? _a : match;
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
function retrieveVideoId(videoId) {
|
|
110
|
-
if (RE_VIDEO_ID.test(videoId)) {
|
|
111
|
-
return videoId;
|
|
112
|
-
}
|
|
113
|
-
const matchId = videoId.match(RE_YOUTUBE);
|
|
114
|
-
if (matchId && matchId.length) {
|
|
115
|
-
return matchId[1];
|
|
116
|
-
}
|
|
117
|
-
throw new YoutubeTranscriptInvalidVideoIdError;
|
|
118
|
-
}
|
|
119
|
-
function validateLang(lang) {
|
|
120
|
-
if (!RE_BCP47_LANG.test(lang)) {
|
|
121
|
-
throw new YoutubeTranscriptInvalidLangError(lang);
|
|
122
|
-
}
|
|
123
|
-
}
|
|
124
|
-
function defaultFetch(params) {
|
|
125
|
-
return __awaiter(this, undefined, undefined, function* () {
|
|
126
|
-
const { url, lang, userAgent, method = "GET", body, headers = {}, signal } = params;
|
|
127
|
-
const fetchHeaders = Object.assign(Object.assign({ "User-Agent": userAgent || DEFAULT_USER_AGENT }, lang && { "Accept-Language": lang }), headers);
|
|
128
|
-
const fetchOptions = {
|
|
129
|
-
method,
|
|
130
|
-
headers: fetchHeaders,
|
|
131
|
-
signal
|
|
132
|
-
};
|
|
133
|
-
if (body && method === "POST") {
|
|
134
|
-
fetchOptions.body = body;
|
|
135
|
-
}
|
|
136
|
-
return fetch(url, fetchOptions);
|
|
137
|
-
});
|
|
138
|
-
}
|
|
139
|
-
function isRetryableStatus(status) {
|
|
140
|
-
return status === 429 || status >= 500 && status <= 599;
|
|
141
|
-
}
|
|
142
|
-
function sleep(ms, signal) {
|
|
143
|
-
return new Promise((resolve, reject) => {
|
|
144
|
-
signal === null || signal === undefined || signal.throwIfAborted();
|
|
145
|
-
const timer = setTimeout(resolve, ms);
|
|
146
|
-
if (signal) {
|
|
147
|
-
const onAbort = () => {
|
|
148
|
-
clearTimeout(timer);
|
|
149
|
-
reject(signal.reason);
|
|
150
|
-
};
|
|
151
|
-
signal.addEventListener("abort", onAbort, { once: true });
|
|
152
|
-
}
|
|
153
|
-
});
|
|
154
|
-
}
|
|
155
|
-
function fetchWithRetry(fetchFn, retries, retryDelay, signal) {
|
|
156
|
-
return __awaiter(this, undefined, undefined, function* () {
|
|
157
|
-
for (let attempt = 0;attempt <= retries; attempt++) {
|
|
158
|
-
signal === null || signal === undefined || signal.throwIfAborted();
|
|
159
|
-
const response = yield fetchFn();
|
|
160
|
-
if (!isRetryableStatus(response.status) || attempt === retries) {
|
|
161
|
-
return response;
|
|
162
|
-
}
|
|
163
|
-
const delay = retryDelay * Math.pow(2, attempt);
|
|
164
|
-
yield sleep(delay, signal);
|
|
165
|
-
}
|
|
166
|
-
throw new Error("Unexpected: retry loop exited without returning");
|
|
167
|
-
});
|
|
168
|
-
}
|
|
169
|
-
class YoutubeTranscript {
|
|
170
|
-
constructor(config) {
|
|
171
|
-
this.config = config;
|
|
172
|
-
}
|
|
173
|
-
_fetchCaptionTracks(identifier, lang) {
|
|
174
|
-
return __awaiter(this, undefined, undefined, function* () {
|
|
175
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
|
|
176
|
-
const userAgent = (_b = (_a = this.config) === null || _a === undefined ? undefined : _a.userAgent) !== null && _b !== undefined ? _b : DEFAULT_USER_AGENT;
|
|
177
|
-
const protocol = ((_c = this.config) === null || _c === undefined ? undefined : _c.disableHttps) ? "http" : "https";
|
|
178
|
-
const retries = (_e = (_d = this.config) === null || _d === undefined ? undefined : _d.retries) !== null && _e !== undefined ? _e : 0;
|
|
179
|
-
const retryDelay = (_g = (_f = this.config) === null || _f === undefined ? undefined : _f.retryDelay) !== null && _g !== undefined ? _g : 1000;
|
|
180
|
-
const signal = (_h = this.config) === null || _h === undefined ? undefined : _h.signal;
|
|
181
|
-
const watchUrl = `${protocol}://www.youtube.com/watch?v=${identifier}`;
|
|
182
|
-
const watchFetchParams = { url: watchUrl, lang, userAgent, signal };
|
|
183
|
-
const videoPageResponse = yield fetchWithRetry(() => {
|
|
184
|
-
var _a2;
|
|
185
|
-
return ((_a2 = this.config) === null || _a2 === undefined ? undefined : _a2.videoFetch) ? this.config.videoFetch(watchFetchParams) : defaultFetch(watchFetchParams);
|
|
186
|
-
}, retries, retryDelay, signal);
|
|
187
|
-
if (!videoPageResponse.ok) {
|
|
188
|
-
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
189
|
-
}
|
|
190
|
-
const videoPageBody = yield videoPageResponse.text();
|
|
191
|
-
if (videoPageBody.includes('class="g-recaptcha"')) {
|
|
192
|
-
throw new YoutubeTranscriptTooManyRequestError;
|
|
193
|
-
}
|
|
194
|
-
const apiKeyMatch = videoPageBody.match(/"INNERTUBE_API_KEY":"([^"]+)"/) || videoPageBody.match(/INNERTUBE_API_KEY\\":\\"([^\\"]+)\\"/);
|
|
195
|
-
if (!apiKeyMatch) {
|
|
196
|
-
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
197
|
-
}
|
|
198
|
-
const apiKey = apiKeyMatch[1];
|
|
199
|
-
const playerEndpoint = `${protocol}://www.youtube.com/youtubei/v1/player?key=${apiKey}`;
|
|
200
|
-
const playerBody = {
|
|
201
|
-
context: {
|
|
202
|
-
client: {
|
|
203
|
-
clientName: "ANDROID",
|
|
204
|
-
clientVersion: "20.10.38"
|
|
205
|
-
}
|
|
206
|
-
},
|
|
207
|
-
videoId: identifier
|
|
208
|
-
};
|
|
209
|
-
const playerFetchParams = {
|
|
210
|
-
url: playerEndpoint,
|
|
211
|
-
method: "POST",
|
|
212
|
-
lang,
|
|
213
|
-
userAgent,
|
|
214
|
-
headers: { "Content-Type": "application/json" },
|
|
215
|
-
body: JSON.stringify(playerBody),
|
|
216
|
-
signal
|
|
217
|
-
};
|
|
218
|
-
const playerRes = yield fetchWithRetry(() => {
|
|
219
|
-
var _a2;
|
|
220
|
-
return ((_a2 = this.config) === null || _a2 === undefined ? undefined : _a2.playerFetch) ? this.config.playerFetch(playerFetchParams) : defaultFetch(playerFetchParams);
|
|
221
|
-
}, retries, retryDelay, signal);
|
|
222
|
-
if (!playerRes.ok) {
|
|
223
|
-
throw new YoutubeTranscriptVideoUnavailableError(identifier);
|
|
224
|
-
}
|
|
225
|
-
const playerJson = yield playerRes.json();
|
|
226
|
-
const tracklist = (_k = (_j = playerJson.captions) === null || _j === undefined ? undefined : _j.playerCaptionsTracklistRenderer) !== null && _k !== undefined ? _k : playerJson.playerCaptionsTracklistRenderer;
|
|
227
|
-
const tracks = tracklist === null || tracklist === undefined ? undefined : tracklist.captionTracks;
|
|
228
|
-
const isPlayableOk = ((_l = playerJson.playabilityStatus) === null || _l === undefined ? undefined : _l.status) === "OK";
|
|
229
|
-
if (!playerJson.captions || !tracklist) {
|
|
230
|
-
if (isPlayableOk) {
|
|
231
|
-
throw new YoutubeTranscriptDisabledError(identifier);
|
|
232
|
-
}
|
|
233
|
-
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
234
|
-
}
|
|
235
|
-
if (!Array.isArray(tracks) || tracks.length === 0) {
|
|
236
|
-
throw new YoutubeTranscriptDisabledError(identifier);
|
|
237
|
-
}
|
|
238
|
-
return { tracks, playerJson };
|
|
239
|
-
});
|
|
240
|
-
}
|
|
241
|
-
_extractVideoDetails(playerJson, identifier) {
|
|
242
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l;
|
|
243
|
-
const raw = playerJson.videoDetails;
|
|
244
|
-
return {
|
|
245
|
-
videoId: (_a = raw === null || raw === undefined ? undefined : raw.videoId) !== null && _a !== undefined ? _a : identifier,
|
|
246
|
-
title: (_b = raw === null || raw === undefined ? undefined : raw.title) !== null && _b !== undefined ? _b : "",
|
|
247
|
-
author: (_c = raw === null || raw === undefined ? undefined : raw.author) !== null && _c !== undefined ? _c : "",
|
|
248
|
-
channelId: (_d = raw === null || raw === undefined ? undefined : raw.channelId) !== null && _d !== undefined ? _d : "",
|
|
249
|
-
lengthSeconds: parseInt((_e = raw === null || raw === undefined ? undefined : raw.lengthSeconds) !== null && _e !== undefined ? _e : "0", 10),
|
|
250
|
-
viewCount: parseInt((_f = raw === null || raw === undefined ? undefined : raw.viewCount) !== null && _f !== undefined ? _f : "0", 10),
|
|
251
|
-
description: (_g = raw === null || raw === undefined ? undefined : raw.shortDescription) !== null && _g !== undefined ? _g : "",
|
|
252
|
-
keywords: (_h = raw === null || raw === undefined ? undefined : raw.keywords) !== null && _h !== undefined ? _h : [],
|
|
253
|
-
thumbnails: (_k = (_j = raw === null || raw === undefined ? undefined : raw.thumbnail) === null || _j === undefined ? undefined : _j.thumbnails) !== null && _k !== undefined ? _k : [],
|
|
254
|
-
isLiveContent: (_l = raw === null || raw === undefined ? undefined : raw.isLiveContent) !== null && _l !== undefined ? _l : false
|
|
255
|
-
};
|
|
256
|
-
}
|
|
257
|
-
fetchTranscript(videoId) {
|
|
258
|
-
return __awaiter(this, undefined, undefined, function* () {
|
|
259
|
-
var _a, _b, _c, _d, _e, _f, _g, _h, _j, _k, _l, _m, _o;
|
|
260
|
-
const identifier = retrieveVideoId(videoId);
|
|
261
|
-
const lang = (_a = this.config) === null || _a === undefined ? undefined : _a.lang;
|
|
262
|
-
if (lang) {
|
|
263
|
-
validateLang(lang);
|
|
264
|
-
}
|
|
265
|
-
const userAgent = (_c = (_b = this.config) === null || _b === undefined ? undefined : _b.userAgent) !== null && _c !== undefined ? _c : DEFAULT_USER_AGENT;
|
|
266
|
-
const includeDetails = ((_d = this.config) === null || _d === undefined ? undefined : _d.videoDetails) === true;
|
|
267
|
-
const cache = (_e = this.config) === null || _e === undefined ? undefined : _e.cache;
|
|
268
|
-
const cacheTTL = (_f = this.config) === null || _f === undefined ? undefined : _f.cacheTTL;
|
|
269
|
-
const cacheKey = includeDetails ? `yt:transcript+details:${identifier}:${lang !== null && lang !== undefined ? lang : ""}` : `yt:transcript:${identifier}:${lang !== null && lang !== undefined ? lang : ""}`;
|
|
270
|
-
if (cache) {
|
|
271
|
-
const cached = yield cache.get(cacheKey);
|
|
272
|
-
if (cached) {
|
|
273
|
-
try {
|
|
274
|
-
return JSON.parse(cached);
|
|
275
|
-
} catch (_p) {}
|
|
276
|
-
}
|
|
277
|
-
}
|
|
278
|
-
const { tracks, playerJson } = yield this._fetchCaptionTracks(identifier, lang);
|
|
279
|
-
const selectedTrack = lang ? tracks.find((t) => t.languageCode === lang) : tracks[0];
|
|
280
|
-
if (!selectedTrack) {
|
|
281
|
-
const available = tracks.map((t) => t.languageCode).filter(Boolean);
|
|
282
|
-
throw new YoutubeTranscriptNotAvailableLanguageError(lang, available, identifier);
|
|
283
|
-
}
|
|
284
|
-
const transcriptBaseURL = (_g = selectedTrack.baseUrl) !== null && _g !== undefined ? _g : selectedTrack.url;
|
|
285
|
-
if (!transcriptBaseURL) {
|
|
286
|
-
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
287
|
-
}
|
|
288
|
-
let transcriptURL = transcriptBaseURL;
|
|
289
|
-
transcriptURL = transcriptURL.replace(/&fmt=[^&]+/, "");
|
|
290
|
-
if ((_h = this.config) === null || _h === undefined ? undefined : _h.disableHttps) {
|
|
291
|
-
transcriptURL = transcriptURL.replace(/^https:\/\//, "http://");
|
|
292
|
-
}
|
|
293
|
-
const retries = (_k = (_j = this.config) === null || _j === undefined ? undefined : _j.retries) !== null && _k !== undefined ? _k : 0;
|
|
294
|
-
const retryDelay = (_m = (_l = this.config) === null || _l === undefined ? undefined : _l.retryDelay) !== null && _m !== undefined ? _m : 1000;
|
|
295
|
-
const signal = (_o = this.config) === null || _o === undefined ? undefined : _o.signal;
|
|
296
|
-
const transcriptFetchParams = { url: transcriptURL, lang, userAgent, signal };
|
|
297
|
-
const transcriptResponse = yield fetchWithRetry(() => {
|
|
298
|
-
var _a2;
|
|
299
|
-
return ((_a2 = this.config) === null || _a2 === undefined ? undefined : _a2.transcriptFetch) ? this.config.transcriptFetch(transcriptFetchParams) : defaultFetch(transcriptFetchParams);
|
|
300
|
-
}, retries, retryDelay, signal);
|
|
301
|
-
if (!transcriptResponse.ok) {
|
|
302
|
-
if (transcriptResponse.status === 429) {
|
|
303
|
-
throw new YoutubeTranscriptTooManyRequestError;
|
|
304
|
-
}
|
|
305
|
-
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
306
|
-
}
|
|
307
|
-
const transcriptBody = yield transcriptResponse.text();
|
|
308
|
-
const results = [...transcriptBody.matchAll(RE_XML_TRANSCRIPT)];
|
|
309
|
-
const segments = results.map((m) => ({
|
|
310
|
-
text: decodeXmlEntities(m[3]),
|
|
311
|
-
duration: parseFloat(m[2]),
|
|
312
|
-
offset: parseFloat(m[1]),
|
|
313
|
-
lang: lang !== null && lang !== undefined ? lang : selectedTrack.languageCode
|
|
314
|
-
}));
|
|
315
|
-
if (segments.length === 0) {
|
|
316
|
-
throw new YoutubeTranscriptNotAvailableError(identifier);
|
|
317
|
-
}
|
|
318
|
-
const result = includeDetails ? { videoDetails: this._extractVideoDetails(playerJson, identifier), segments } : segments;
|
|
319
|
-
if (cache) {
|
|
320
|
-
try {
|
|
321
|
-
yield cache.set(cacheKey, JSON.stringify(result), cacheTTL);
|
|
322
|
-
} catch (_q) {}
|
|
323
|
-
}
|
|
324
|
-
return result;
|
|
325
|
-
});
|
|
326
|
-
}
|
|
327
|
-
listLanguages(videoId) {
|
|
328
|
-
return __awaiter(this, undefined, undefined, function* () {
|
|
329
|
-
const identifier = retrieveVideoId(videoId);
|
|
330
|
-
const { tracks } = yield this._fetchCaptionTracks(identifier);
|
|
331
|
-
return tracks.map((track) => {
|
|
332
|
-
var _a, _b;
|
|
333
|
-
return {
|
|
334
|
-
languageCode: track.languageCode,
|
|
335
|
-
languageName: (_b = (_a = track.name) === null || _a === undefined ? undefined : _a.simpleText) !== null && _b !== undefined ? _b : track.languageCode,
|
|
336
|
-
isAutoGenerated: track.kind === "asr"
|
|
337
|
-
};
|
|
338
|
-
});
|
|
339
|
-
});
|
|
340
|
-
}
|
|
341
|
-
static fetchTranscript(videoId, config) {
|
|
342
|
-
return __awaiter(this, undefined, undefined, function* () {
|
|
343
|
-
const instance = new YoutubeTranscript(config);
|
|
344
|
-
return instance.fetchTranscript(videoId);
|
|
345
|
-
});
|
|
346
|
-
}
|
|
347
|
-
static listLanguages(videoId, config) {
|
|
348
|
-
return __awaiter(this, undefined, undefined, function* () {
|
|
349
|
-
const instance = new YoutubeTranscript(config);
|
|
350
|
-
return instance.listLanguages(videoId);
|
|
351
|
-
});
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
function fetchTranscript(videoId, config) {
|
|
355
|
-
return YoutubeTranscript.fetchTranscript(videoId, config);
|
|
356
|
-
}
|
|
357
|
-
var listLanguages = YoutubeTranscript.listLanguages;
|
|
358
|
-
|
|
359
|
-
// src/format.ts
|
|
360
|
-
var namedEntities = {
|
|
361
|
-
amp: "&",
|
|
362
|
-
lt: "<",
|
|
363
|
-
gt: ">",
|
|
364
|
-
quot: '"',
|
|
365
|
-
apos: "'"
|
|
366
|
-
};
|
|
367
|
-
function formatTime(seconds) {
|
|
368
|
-
const m = Math.floor(seconds / 60);
|
|
369
|
-
const s = Math.floor(seconds % 60);
|
|
370
|
-
return `${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
|
|
371
|
-
}
|
|
372
|
-
function formatDuration(seconds) {
|
|
373
|
-
const h = Math.floor(seconds / 3600);
|
|
374
|
-
const m = Math.floor(seconds % 3600 / 60);
|
|
375
|
-
const s = Math.floor(seconds % 60);
|
|
376
|
-
if (h > 0) {
|
|
377
|
-
return `${h}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}`;
|
|
378
|
-
}
|
|
379
|
-
return `${m}:${String(s).padStart(2, "0")}`;
|
|
380
|
-
}
|
|
381
|
-
function decodeEntities(text) {
|
|
382
|
-
return text.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(Number(code))).replace(/&(\w+);/g, (_, name) => namedEntities[name] ?? `&${name};`);
|
|
383
|
-
}
|
|
384
|
-
function formatWithTimestamps(segments, decode) {
|
|
385
|
-
return segments.map((s) => {
|
|
386
|
-
const text = decode ? decodeEntities(s.text) : s.text;
|
|
387
|
-
return `[${formatTime(s.offset)}] ${text}`;
|
|
388
|
-
}).join(`
|
|
389
|
-
`);
|
|
390
|
-
}
|
|
391
|
-
function toText(segments, decode) {
|
|
392
|
-
return segments.map((s) => decode ? decodeEntities(s.text) : s.text).join(" ").replace(/ +/g, " ");
|
|
393
|
-
}
|
|
394
|
-
function toJSON(segments, decode) {
|
|
395
|
-
const data = segments.map((s) => ({
|
|
396
|
-
text: decode ? decodeEntities(s.text) : s.text,
|
|
397
|
-
offset: s.offset,
|
|
398
|
-
duration: s.duration,
|
|
399
|
-
timestamp: formatTime(s.offset)
|
|
400
|
-
}));
|
|
401
|
-
return JSON.stringify(data, null, 2);
|
|
402
|
-
}
|
|
403
|
-
|
|
404
|
-
// src/index.ts
|
|
405
|
-
var NAME = "prosey";
|
|
406
|
-
var VERSION = "0.1.0";
|
|
407
|
-
function help() {
|
|
408
|
-
return `${NAME} v${VERSION}
|
|
409
|
-
|
|
410
|
-
Usage: ${NAME} [options] <video-url-or-id>
|
|
411
|
-
${NAME} info [options] <video-url-or-id>
|
|
412
|
-
|
|
413
|
-
Download a YouTube video transcript or show video details.
|
|
414
|
-
|
|
415
|
-
Commands:
|
|
416
|
-
info Show video metadata (title, channel, duration, etc.)
|
|
417
|
-
|
|
418
|
-
Arguments:
|
|
419
|
-
video-url-or-id YouTube URL (full or short) or bare video ID
|
|
420
|
-
|
|
421
|
-
Options:
|
|
422
|
-
--lang <code> Language code (e.g. en, fr). Auto-detect if omitted.
|
|
423
|
-
-t, --timestamps Include timestamps [MM:SS] in output.
|
|
424
|
-
--list List available transcript languages and exit.
|
|
425
|
-
-o, --output <path> Write output to file instead of stdout.
|
|
426
|
-
--json Output as JSON (suppresses details).
|
|
427
|
-
--text Output as plain text (default).
|
|
428
|
-
--details Prepend video details to transcript (default, text only).
|
|
429
|
-
--no-details Suppress video details, transcript only.
|
|
430
|
-
--no-decode-entities Preserve HTML entities (decoded by default).
|
|
431
|
-
--help Show this help message.
|
|
432
|
-
--version Show version.
|
|
433
|
-
|
|
434
|
-
Examples:
|
|
435
|
-
${NAME} dQw4w9WgXcQ
|
|
436
|
-
${NAME} https://www.youtube.com/watch?v=dQw4w9WgXcQ --lang es
|
|
437
|
-
${NAME} dQw4w9WgXcQ -t -o transcript.txt
|
|
438
|
-
${NAME} dQw4w9WgXcQ --list
|
|
439
|
-
${NAME} dQw4w9WgXcQ --json
|
|
440
|
-
${NAME} dQw4w9WgXcQ --no-details
|
|
441
|
-
${NAME} info dQw4w9WgXcQ`;
|
|
442
|
-
}
|
|
443
|
-
function formatDetailsBlock(details) {
|
|
444
|
-
const lines = [
|
|
445
|
-
`Title: ${decodeEntities(details.title)}`,
|
|
446
|
-
`Channel: ${details.author}`,
|
|
447
|
-
`Duration: ${formatDuration(details.lengthSeconds)}`,
|
|
448
|
-
`Views: ${details.viewCount.toLocaleString()}`
|
|
449
|
-
];
|
|
450
|
-
if (details.description) {
|
|
451
|
-
const desc = details.description.length > 500 ? details.description.slice(0, 500) + "…" : details.description;
|
|
452
|
-
lines.push(`Description:
|
|
453
|
-
${desc.replace(/\n/g, `
|
|
454
|
-
`)}`);
|
|
455
|
-
}
|
|
456
|
-
return lines.join(`
|
|
457
|
-
`);
|
|
458
|
-
}
|
|
459
|
-
function printVideoInfo(details) {
|
|
460
|
-
const w = Math.max("Title:".length, "Channel:".length, "Duration:".length, "Views:".length, "Video ID:".length, "Channel ID:".length, "Keywords:".length, "Description:".length);
|
|
461
|
-
const pad = (s) => s.padEnd(w);
|
|
462
|
-
const lines = [
|
|
463
|
-
`${pad("Title:")} ${decodeEntities(details.title)}`,
|
|
464
|
-
`${pad("Channel:")} ${details.author}`,
|
|
465
|
-
`${pad("Duration:")} ${formatDuration(details.lengthSeconds)}`,
|
|
466
|
-
`${pad("Views:")} ${details.viewCount.toLocaleString()}`,
|
|
467
|
-
`${pad("Video ID:")} ${details.videoId}`,
|
|
468
|
-
`${pad("Channel ID:")} ${details.channelId}`
|
|
469
|
-
];
|
|
470
|
-
if (details.keywords.length > 0) {
|
|
471
|
-
lines.push(`${pad("Keywords:")} ${details.keywords.join(", ")}`);
|
|
472
|
-
}
|
|
473
|
-
if (details.description) {
|
|
474
|
-
lines.push(`${pad("Description:")}`);
|
|
475
|
-
const descLines = details.description.split(`
|
|
476
|
-
`).filter(Boolean);
|
|
477
|
-
for (const line of descLines) {
|
|
478
|
-
lines.push(` ${line}`);
|
|
479
|
-
}
|
|
480
|
-
}
|
|
481
|
-
console.log(lines.join(`
|
|
482
|
-
`));
|
|
483
|
-
}
|
|
484
|
-
function printLanguages(languages) {
|
|
485
|
-
const rows = languages.map((l) => {
|
|
486
|
-
const auto = l.isAutoGenerated ? " (auto-generated)" : "";
|
|
487
|
-
return ` ${l.languageCode.padEnd(8)}${l.languageName}${auto}`;
|
|
488
|
-
});
|
|
489
|
-
console.log(`Available transcripts (${languages.length}):
|
|
490
|
-
${rows.join(`
|
|
491
|
-
`)}`);
|
|
492
|
-
}
|
|
493
|
-
var args = process.argv.slice(2);
|
|
494
|
-
if (args.length === 0 || args.includes("--help")) {
|
|
495
|
-
console.log(help());
|
|
496
|
-
process.exit(0);
|
|
497
|
-
}
|
|
498
|
-
if (args.includes("--version")) {
|
|
499
|
-
console.log(VERSION);
|
|
500
|
-
process.exit(0);
|
|
501
|
-
}
|
|
502
|
-
var mode = "transcript";
|
|
503
|
-
if (args[0] === "info") {
|
|
504
|
-
mode = "info";
|
|
505
|
-
args.splice(0, 1);
|
|
506
|
-
}
|
|
507
|
-
var videoId = "";
|
|
508
|
-
var lang;
|
|
509
|
-
var timestamps = false;
|
|
510
|
-
var listOnly = false;
|
|
511
|
-
var outputPath;
|
|
512
|
-
var outputJson = false;
|
|
513
|
-
var noDecode = false;
|
|
514
|
-
var showDetails = true;
|
|
515
|
-
for (let i = 0;i < args.length; i++) {
|
|
516
|
-
const arg = args[i];
|
|
517
|
-
if (!arg)
|
|
518
|
-
continue;
|
|
519
|
-
if (arg === "--lang") {
|
|
520
|
-
lang = args[++i] ?? undefined;
|
|
521
|
-
if (!lang) {
|
|
522
|
-
console.error("Error: --lang requires a language code");
|
|
523
|
-
process.exit(1);
|
|
524
|
-
}
|
|
525
|
-
} else if (arg === "--timestamps" || arg === "-t") {
|
|
526
|
-
timestamps = true;
|
|
527
|
-
} else if (arg === "--list") {
|
|
528
|
-
listOnly = true;
|
|
529
|
-
} else if (arg === "-o" || arg === "--output") {
|
|
530
|
-
outputPath = args[++i] ?? undefined;
|
|
531
|
-
if (!outputPath) {
|
|
532
|
-
console.error("Error: -o/--output requires a file path");
|
|
533
|
-
process.exit(1);
|
|
534
|
-
}
|
|
535
|
-
} else if (arg === "--json") {
|
|
536
|
-
outputJson = true;
|
|
537
|
-
} else if (arg === "--text") {
|
|
538
|
-
outputJson = false;
|
|
539
|
-
} else if (arg === "--details") {
|
|
540
|
-
showDetails = true;
|
|
541
|
-
} else if (arg === "--no-details") {
|
|
542
|
-
showDetails = false;
|
|
543
|
-
} else if (arg === "--no-decode-entities") {
|
|
544
|
-
noDecode = true;
|
|
545
|
-
} else if (arg.startsWith("-")) {
|
|
546
|
-
console.error(`Unknown option: ${arg}`);
|
|
547
|
-
process.exit(1);
|
|
548
|
-
} else {
|
|
549
|
-
videoId = arg;
|
|
550
|
-
}
|
|
551
|
-
}
|
|
552
|
-
if (!videoId) {
|
|
553
|
-
console.error("Error: missing video URL or ID");
|
|
554
|
-
console.log(help());
|
|
555
|
-
process.exit(1);
|
|
556
|
-
}
|
|
557
|
-
try {
|
|
558
|
-
if (mode === "info") {
|
|
559
|
-
const result = await fetchTranscript(videoId, { videoDetails: true, lang });
|
|
560
|
-
if (outputJson) {
|
|
561
|
-
console.log(JSON.stringify(result.videoDetails, null, 2));
|
|
562
|
-
} else {
|
|
563
|
-
printVideoInfo(result.videoDetails);
|
|
564
|
-
}
|
|
565
|
-
process.exit(0);
|
|
566
|
-
}
|
|
567
|
-
if (listOnly) {
|
|
568
|
-
const languages = await listLanguages(videoId);
|
|
569
|
-
printLanguages(languages);
|
|
570
|
-
process.exit(0);
|
|
571
|
-
}
|
|
572
|
-
const decode = !noDecode;
|
|
573
|
-
if (showDetails && !outputJson) {
|
|
574
|
-
const config = lang ? { lang, videoDetails: true } : { videoDetails: true };
|
|
575
|
-
const result = await fetchTranscript(videoId, config);
|
|
576
|
-
const detailsBlock = formatDetailsBlock(result.videoDetails);
|
|
577
|
-
const transcript = timestamps ? formatWithTimestamps(result.segments, decode) : toText(result.segments, decode);
|
|
578
|
-
const output = detailsBlock + `
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
` + transcript + `
|
|
582
|
-
`;
|
|
583
|
-
if (outputPath) {
|
|
584
|
-
await writeFile(outputPath, output, "utf8");
|
|
585
|
-
} else {
|
|
586
|
-
console.log(output);
|
|
587
|
-
}
|
|
588
|
-
} else {
|
|
589
|
-
const segments = lang ? await fetchTranscript(videoId, { lang }) : await fetchTranscript(videoId);
|
|
590
|
-
const output = outputJson ? toJSON(segments, decode) + `
|
|
591
|
-
` : timestamps ? formatWithTimestamps(segments, decode) + `
|
|
592
|
-
` : toText(segments, decode) + `
|
|
593
|
-
`;
|
|
594
|
-
if (outputPath) {
|
|
595
|
-
await writeFile(outputPath, output, "utf8");
|
|
596
|
-
} else {
|
|
597
|
-
console.log(output);
|
|
598
|
-
}
|
|
599
|
-
}
|
|
600
|
-
} catch (err) {
|
|
601
|
-
const message = err instanceof Error ? err.message : String(err);
|
|
602
|
-
console.error(`Error: ${message}`);
|
|
603
|
-
process.exit(1);
|
|
604
|
-
}
|