summd 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/url-to-md.js +52 -21
- package/package.json +1 -1
package/dist/url-to-md.js
CHANGED
|
@@ -80,16 +80,29 @@ async function fetchYouTubeTitle(url, videoId) {
|
|
|
80
80
|
}
|
|
81
81
|
// Browser cookie sources — tried in order, first working one is reused for all runs.
|
|
82
82
|
const BROWSERS = ['chrome', 'chromium', 'firefox', 'safari', 'edge'];
|
|
83
|
-
// Subtitle strategies
|
|
84
|
-
//
|
|
85
|
-
const
|
|
86
|
-
// 1.
|
|
87
|
-
['--write-auto-subs', '--sub-langs', 'orig'],
|
|
88
|
-
// 2. English manual + ASR
|
|
83
|
+
// Subtitle fallback strategies used when native language detection fails.
|
|
84
|
+
// Tried in order after the native-language attempt.
|
|
85
|
+
const SUB_FALLBACKS = [
|
|
86
|
+
// 1. English manual + ASR
|
|
89
87
|
['--write-subs', '--write-auto-subs', '--sub-langs', 'en'],
|
|
90
|
-
//
|
|
88
|
+
// 2. Most-spoken languages by global user count (covers remaining cases)
|
|
91
89
|
['--write-subs', '--write-auto-subs', '--sub-langs', 'zh-Hans,zh,ja,ko,fr,de,es,pt,hi,ar,ru'],
|
|
92
90
|
];
|
|
91
|
+
// Detect the video's primary language via yt-dlp --dump-json.
|
|
92
|
+
// Returns a language code like 'zh-Hans', 'en', 'ja', etc., or null on failure.
|
|
93
|
+
async function getVideoLanguage(url, cookieArgs) {
|
|
94
|
+
try {
|
|
95
|
+
const { stdout } = await execFileAsync('yt-dlp', [
|
|
96
|
+
'--dump-json', '--no-playlist', '--quiet',
|
|
97
|
+
...cookieArgs, url,
|
|
98
|
+
], { timeout: 15_000 });
|
|
99
|
+
const meta = JSON.parse(stdout);
|
|
100
|
+
return meta.language ?? null;
|
|
101
|
+
}
|
|
102
|
+
catch {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
93
106
|
async function ytDlpTranscript(url, videoId) {
|
|
94
107
|
const dir = tmpdir();
|
|
95
108
|
const outTemplate = join(dir, videoId);
|
|
@@ -139,25 +152,43 @@ async function ytDlpTranscript(url, videoId) {
|
|
|
139
152
|
return null;
|
|
140
153
|
}
|
|
141
154
|
};
|
|
142
|
-
// Phase 1: find a working browser cookie source
|
|
143
|
-
//
|
|
155
|
+
// Phase 1: find a working browser cookie source.
|
|
156
|
+
// Try each browser with a no-op probe (--dump-json is light and confirms auth works).
|
|
144
157
|
let cookieArgs = [];
|
|
145
158
|
for (const browser of BROWSERS) {
|
|
146
|
-
|
|
159
|
+
try {
|
|
160
|
+
await execFileAsync('yt-dlp', [
|
|
161
|
+
'--dump-json', '--no-playlist', '--quiet',
|
|
162
|
+
'--cookies-from-browser', browser, url,
|
|
163
|
+
], { timeout: 15_000 });
|
|
164
|
+
cookieArgs = ['--cookies-from-browser', browser];
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
167
|
+
catch (e) {
|
|
168
|
+
const err = e;
|
|
169
|
+
if (err.code === 'ENOENT')
|
|
170
|
+
return { transcript: null, reason: 'not-installed' };
|
|
171
|
+
const stderr = err.stderr ?? '';
|
|
172
|
+
if (stderr.includes('Could not find') || stderr.includes('cookies from browser'))
|
|
173
|
+
continue;
|
|
174
|
+
// Other errors (bot-detection without cookies, etc.) — stop trying browsers
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
// cookieArgs is empty if no browser found — fall back to cookie-less requests
|
|
179
|
+
// Phase 2: detect the video's primary language from metadata.
|
|
180
|
+
const nativeLang = await getVideoLanguage(url, cookieArgs);
|
|
181
|
+
// Phase 3: try native language subtitles first (most accurate).
|
|
182
|
+
if (nativeLang) {
|
|
183
|
+
const outcome = await run(['--write-subs', '--write-auto-subs', '--sub-langs', nativeLang], cookieArgs);
|
|
147
184
|
if (outcome === 'not-installed')
|
|
148
185
|
return { transcript: null, reason: 'not-installed' };
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
break;
|
|
186
|
+
const t = readAndClean();
|
|
187
|
+
if (t)
|
|
188
|
+
return { transcript: t };
|
|
153
189
|
}
|
|
154
|
-
//
|
|
155
|
-
|
|
156
|
-
const t1 = readAndClean();
|
|
157
|
-
if (t1)
|
|
158
|
-
return { transcript: t1 };
|
|
159
|
-
// Phase 2: try remaining subtitle strategies with the confirmed cookie source
|
|
160
|
-
for (const subArgs of SUB_STRATEGIES.slice(1)) {
|
|
190
|
+
// Phase 4: fallback strategies with the confirmed cookie source.
|
|
191
|
+
for (const subArgs of SUB_FALLBACKS) {
|
|
161
192
|
const outcome = await run(subArgs, cookieArgs);
|
|
162
193
|
if (outcome === 'not-installed')
|
|
163
194
|
return { transcript: null, reason: 'not-installed' };
|