@mkterswingman/5mghost-yonder 0.0.38 → 0.0.40

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,169 @@
1
+ import { existsSync, mkdirSync, readFileSync, readdirSync, renameSync, statSync, unlinkSync, writeFileSync } from "node:fs";
2
+ import { dirname, join } from "node:path";
3
+ import { PATHS } from "../../utils/config.js";
4
+ import { runYtDlp } from "../../utils/ytdlp.js";
5
+ import { appendDiagnosticLog } from "../../utils/ytdlpFailures.js";
6
+ import { classifyYtDlpCookieFailure, COOKIE_EXPIRED_MESSAGE, COOKIE_INVALID_MESSAGE, COOKIE_JOB_MESSAGE, RATE_LIMITED_MESSAGE, SIGN_IN_REQUIRED_MESSAGE, ensureSubtitleCookiesReady, isCookieFailureText, tryRefreshSubtitleCookies } from "./cookieSession.js";
7
+ import { vttToCsv } from "./parse.js";
8
+ function todayDateStr() {
9
+ const d = new Date();
10
+ const yyyy = d.getFullYear();
11
+ const mm = String(d.getMonth() + 1).padStart(2, "0");
12
+ const dd = String(d.getDate()).padStart(2, "0");
13
+ return `${yyyy}-${mm}-${dd}`;
14
+ }
15
+ export async function downloadSubtitle(videoId, lang, format, options = {}) {
16
+ const outputDir = options.outputDir ?? PATHS.subtitlesDir;
17
+ mkdirSync(outputDir, { recursive: true });
18
+ if (options.targetFile) {
19
+ mkdirSync(dirname(options.targetFile), { recursive: true });
20
+ }
21
+ const outTemplate = join(outputDir, options.outputStem ?? `${todayDateStr()}_${videoId}_${lang}`);
22
+ const dlFormat = format === "csv" ? "vtt" : format;
23
+ const result = await runYtDlp([
24
+ "--skip-download",
25
+ "-f", "mhtml",
26
+ "--write-sub",
27
+ "--write-auto-sub",
28
+ "--sub-langs",
29
+ lang,
30
+ "--sub-format",
31
+ dlFormat,
32
+ "--output",
33
+ outTemplate,
34
+ options.sourceUrl ?? `https://www.youtube.com/watch?v=${videoId}`,
35
+ ]);
36
+ const cookieFailure = result.exitCode !== 0 ? classifyYtDlpCookieFailure(result.stderr) : null;
37
+ if (cookieFailure) {
38
+ return {
39
+ ok: false,
40
+ cookieFailureCode: cookieFailure.code,
41
+ diagnosticLogPath: result.failureLogPath,
42
+ error: appendDiagnosticLog(cookieFailure.message, result.failureLogPath),
43
+ };
44
+ }
45
+ if (result.exitCode !== 0) {
46
+ return {
47
+ ok: false,
48
+ diagnosticLogPath: result.failureLogPath,
49
+ error: appendDiagnosticLog(result.stderr.slice(0, 500) || `yt-dlp exited with ${result.exitCode}`, result.failureLogPath),
50
+ };
51
+ }
52
+ const searchFormat = format === "csv" ? "vtt" : format;
53
+ const possibleExts = [`${lang}.${searchFormat}`, `${lang}.vtt`, `${lang}.srt`, `${lang}.ttml`, `${lang}.srv3`];
54
+ let foundFile;
55
+ for (const ext of possibleExts) {
56
+ const candidate = `${outTemplate}.${ext}`;
57
+ if (existsSync(candidate)) {
58
+ foundFile = candidate;
59
+ break;
60
+ }
61
+ }
62
+ if (!foundFile) {
63
+ try {
64
+ const files = readdirSync(outputDir);
65
+ const prefix = options.outputStem ?? `${todayDateStr()}_${videoId}_${lang}`;
66
+ const match = files.find((f) => f.startsWith(prefix));
67
+ if (match) {
68
+ foundFile = join(outputDir, match);
69
+ }
70
+ }
71
+ catch {
72
+ // ignore
73
+ }
74
+ }
75
+ if (!foundFile) {
76
+ return {
77
+ ok: false,
78
+ error: `No subtitle file found for language '${lang}'`,
79
+ };
80
+ }
81
+ if (format === "csv") {
82
+ const vttPath = foundFile;
83
+ const vttContent = readFileSync(foundFile, "utf8");
84
+ const csvContent = vttToCsv(vttContent);
85
+ const csvPath = foundFile.replace(/\.vtt$/, ".csv");
86
+ writeFileSync(csvPath, csvContent, "utf8");
87
+ foundFile = csvPath;
88
+ if (vttPath !== csvPath) {
89
+ try {
90
+ unlinkSync(vttPath);
91
+ }
92
+ catch {
93
+ // ignore
94
+ }
95
+ }
96
+ }
97
+ if (options.targetFile && foundFile !== options.targetFile) {
98
+ renameSync(foundFile, options.targetFile);
99
+ foundFile = options.targetFile;
100
+ }
101
+ const stat = statSync(foundFile);
102
+ if (stat.size <= 100 * 1024) {
103
+ const text = readFileSync(foundFile, "utf8");
104
+ return { ok: true, text, filePath: foundFile };
105
+ }
106
+ return { ok: true, filePath: foundFile };
107
+ }
108
+ function isUnavailableSubtitleError(error) {
109
+ if (!error) {
110
+ return false;
111
+ }
112
+ const normalized = error.toLowerCase();
113
+ return (normalized.includes("no subtitle file found") ||
114
+ normalized.includes("no subtitles") ||
115
+ normalized.includes("subtitle is not available") ||
116
+ normalized.includes("requested subtitles are not available"));
117
+ }
118
+ export async function downloadSubtitlesForLanguages(input) {
119
+ const cookieCheck = await ensureSubtitleCookiesReady();
120
+ if (!cookieCheck.ok) {
121
+ throw new Error(cookieCheck.jobMessage);
122
+ }
123
+ const filesByFormat = {};
124
+ const total = input.formats.length * input.languages.length;
125
+ let completed = 0;
126
+ for (const format of input.formats) {
127
+ filesByFormat[format] = [];
128
+ for (const lang of input.languages) {
129
+ const targetFile = join(input.subtitlesDir, `${lang}.${format}`);
130
+ let result = await downloadSubtitle(input.videoId, lang, format, {
131
+ sourceUrl: input.sourceUrl,
132
+ outputDir: input.subtitlesDir,
133
+ outputStem: `${lang}`,
134
+ targetFile,
135
+ });
136
+ if (result.cookieFailureCode && result.cookieFailureCode !== "RATE_LIMITED") {
137
+ const refreshed = await tryRefreshSubtitleCookies();
138
+ if (refreshed) {
139
+ result = await downloadSubtitle(input.videoId, lang, format, {
140
+ sourceUrl: input.sourceUrl,
141
+ outputDir: input.subtitlesDir,
142
+ outputStem: `${lang}`,
143
+ targetFile,
144
+ });
145
+ }
146
+ }
147
+ if (!result.ok || !result.filePath) {
148
+ if (isCookieFailureText(result.error)) {
149
+ throw new Error(COOKIE_JOB_MESSAGE);
150
+ }
151
+ if (input.skipMissingLanguages && isUnavailableSubtitleError(result.error)) {
152
+ completed += 1;
153
+ input.onProgress?.(completed, total);
154
+ continue;
155
+ }
156
+ throw new Error(result.error ?? `Failed to download ${format} subtitle for ${lang}`);
157
+ }
158
+ filesByFormat[format].push(result.filePath);
159
+ completed += 1;
160
+ input.onProgress?.(completed, total);
161
+ }
162
+ }
163
+ const hasAnySubtitleFiles = Object.values(filesByFormat).some((files) => files.length > 0);
164
+ if (!hasAnySubtitleFiles) {
165
+ throw new Error(`No subtitles found for requested languages: ${input.languages.join(", ")}`);
166
+ }
167
+ return filesByFormat;
168
+ }
169
+ export { COOKIE_EXPIRED_MESSAGE, COOKIE_INVALID_MESSAGE, SIGN_IN_REQUIRED_MESSAGE, RATE_LIMITED_MESSAGE };
@@ -0,0 +1 @@
1
+ export declare function vttToCsv(vtt: string): string;
@@ -0,0 +1,106 @@
1
+ function decodeHtmlEntities(text) {
2
+ return text
3
+ .replace(/&gt;/g, ">")
4
+ .replace(/&lt;/g, "<")
5
+ .replace(/&amp;/g, "&")
6
+ .replace(/&quot;/g, '"')
7
+ .replace(/&#39;/g, "'")
8
+ .replace(/&nbsp;/g, " ");
9
+ }
10
+ function parseTimestamp(line) {
11
+ const match = line.match(/(\d{1,2}:\d{2}:\d{2}\.\d{3})\s*-->\s*(\d{1,2}:\d{2}:\d{2}\.\d{3})/);
12
+ if (!match)
13
+ return null;
14
+ const toSec = (t) => {
15
+ const parts = t.split(":");
16
+ return Number(parts[0]) * 3600 + Number(parts[1]) * 60 + Number(parts[2]);
17
+ };
18
+ return {
19
+ startStr: match[1],
20
+ endStr: match[2],
21
+ startSec: toSec(match[1]),
22
+ endSec: toSec(match[2]),
23
+ };
24
+ }
25
+ function csvEscapeField(value) {
26
+ if (/[",\n\r]/.test(value)) {
27
+ return `"${value.replace(/"/g, '""')}"`;
28
+ }
29
+ return value;
30
+ }
31
+ export function vttToCsv(vtt) {
32
+ const lines = vtt.split("\n");
33
+ const isAutoCaption = /<\d{2}:\d{2}:\d{2}\.\d{3}><c>/.test(vtt);
34
+ const rawCues = [];
35
+ let currentTs = null;
36
+ let currentTextLines = [];
37
+ for (const line of lines) {
38
+ const trimmed = line.trim();
39
+ if (trimmed.includes(" --> ")) {
40
+ if (currentTs && currentTextLines.length > 0) {
41
+ let text;
42
+ if (isAutoCaption && currentTextLines.length >= 2) {
43
+ text = decodeHtmlEntities(currentTextLines[currentTextLines.length - 1]
44
+ .replace(/<[^>]*>/g, "")
45
+ .trim());
46
+ }
47
+ else {
48
+ text = decodeHtmlEntities(currentTextLines
49
+ .map((l) => l.replace(/<[^>]*>/g, "").trim())
50
+ .filter(Boolean)
51
+ .join(" "));
52
+ }
53
+ if (text) {
54
+ rawCues.push({ ...currentTs, text });
55
+ }
56
+ }
57
+ currentTs = parseTimestamp(trimmed);
58
+ currentTextLines = [];
59
+ }
60
+ else if (trimmed &&
61
+ !trimmed.startsWith("WEBVTT") &&
62
+ !trimmed.startsWith("Kind:") &&
63
+ !trimmed.startsWith("Language:") &&
64
+ !/^\d+$/.test(trimmed)) {
65
+ currentTextLines.push(trimmed);
66
+ }
67
+ }
68
+ if (currentTs && currentTextLines.length > 0) {
69
+ let text;
70
+ if (isAutoCaption && currentTextLines.length >= 2) {
71
+ text = decodeHtmlEntities(currentTextLines[currentTextLines.length - 1]
72
+ .replace(/<[^>]*>/g, "")
73
+ .trim());
74
+ }
75
+ else {
76
+ text = decodeHtmlEntities(currentTextLines
77
+ .map((l) => l.replace(/<[^>]*>/g, "").trim())
78
+ .filter(Boolean)
79
+ .join(" "));
80
+ }
81
+ if (text) {
82
+ rawCues.push({ ...currentTs, text });
83
+ }
84
+ }
85
+ if (rawCues.length === 0) {
86
+ return "start_time,end_time,text\n";
87
+ }
88
+ const deduped = [];
89
+ for (let i = 0; i < rawCues.length; i++) {
90
+ const cur = rawCues[i];
91
+ const duration = cur.endSec - cur.startSec;
92
+ if (duration < 0.05)
93
+ continue;
94
+ if (deduped.length > 0 && deduped[deduped.length - 1].text === cur.text) {
95
+ deduped[deduped.length - 1].endSec = cur.endSec;
96
+ deduped[deduped.length - 1].endStr = cur.endStr;
97
+ continue;
98
+ }
99
+ deduped.push({ ...cur });
100
+ }
101
+ const csvRows = ["start_time,end_time,text"];
102
+ for (const cue of deduped) {
103
+ csvRows.push(`${cue.startStr},${cue.endStr},${csvEscapeField(cue.text)}`);
104
+ }
105
+ return csvRows.join("\n") + "\n";
106
+ }
@@ -1,29 +1,8 @@
1
1
  import type { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
2
- import type { YtMcpConfig } from "../utils/config.js";
3
2
  import type { TokenManager } from "@mkterswingman/5mghost-shared-client/auth";
4
- export declare function toReadableSubtitleJobError(error: unknown): string;
5
- /**
6
- * Convert VTT subtitle content to clean, human-readable CSV.
7
- *
8
- * YouTube auto-captions use a "rolling" VTT format where each cue has two
9
- * lines: the first line repeats the previous cue's text, and the second line
10
- * contains new words (marked with <c> tags for word-level timing). This
11
- * function detects and handles this pattern:
12
- *
13
- * 1. Detects auto-caption format (presence of <c> word-timing tags)
14
- * 2. For auto-captions: extracts only the NEW text from each cue's second
15
- * line, skips transition cues, and concatenates into clean sentences
16
- * 3. For manual subtitles: passes through cleanly with no data loss
17
- * 4. Outputs: start_time, end_time, text
18
- */
19
- export declare function vttToCsv(vtt: string): string;
20
- export declare function downloadSubtitlesForLanguages(input: {
21
- videoId: string;
22
- sourceUrl?: string;
23
- languages: string[];
24
- formats: string[];
25
- subtitlesDir: string;
26
- skipMissingLanguages?: boolean;
27
- onProgress?: (completed: number, total: number) => void;
28
- }): Promise<Record<string, string[]>>;
3
+ import type { YtMcpConfig } from "../utils/config.js";
4
+ import { toReadableSubtitleJobError } from "./subtitles/cookieSession.js";
5
+ import { downloadSubtitlesForLanguages } from "./subtitles/download.js";
6
+ import { vttToCsv } from "./subtitles/parse.js";
7
+ export { vttToCsv, downloadSubtitlesForLanguages, toReadableSubtitleJobError };
29
8
  export declare function registerSubtitleTools(server: McpServer, config: YtMcpConfig, tokenManager: TokenManager): void;