summd 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/url-to-md.js +41 -16
- package/package.json +1 -1
package/dist/url-to-md.js
CHANGED
|
@@ -78,29 +78,54 @@ async function fetchYouTubeTitle(url, videoId) {
|
|
|
78
78
|
catch { }
|
|
79
79
|
return `YouTube: ${videoId}`;
|
|
80
80
|
}
|
|
81
|
+
// Subtitle language priority: English first, then common languages, then anything available.
|
|
82
|
+
// yt-dlp picks the first match; --write-auto-subs covers ASR variants (en-orig etc.)
|
|
83
|
+
const SUB_LANGS = 'en,zh-Hans,zh-Hant,zh,ja,ko,fr,de,es,pt,ru';
|
|
81
84
|
// yt-dlp — works from any IP, supports auth/geo-block via --cookies-from-browser
|
|
82
85
|
async function ytDlpTranscript(url, videoId) {
|
|
83
86
|
const dir = tmpdir();
|
|
84
|
-
const outTemplate = join(dir, videoId);
|
|
87
|
+
const outTemplate = join(dir, videoId);
|
|
88
|
+
const run = (...extra) => execFileAsync('yt-dlp', [
|
|
89
|
+
'--write-subs',
|
|
90
|
+
'--write-auto-subs',
|
|
91
|
+
'--sub-langs', SUB_LANGS,
|
|
92
|
+
'--sub-format', 'json3',
|
|
93
|
+
'--skip-download',
|
|
94
|
+
'--quiet',
|
|
95
|
+
'--no-progress',
|
|
96
|
+
...extra,
|
|
97
|
+
'-o', outTemplate,
|
|
98
|
+
url,
|
|
99
|
+
], { timeout: 30_000 });
|
|
85
100
|
try {
|
|
86
|
-
await
|
|
87
|
-
'--write-subs',
|
|
88
|
-
'--write-auto-subs',
|
|
89
|
-
'--sub-langs', 'en',
|
|
90
|
-
'--sub-format', 'json3',
|
|
91
|
-
'--skip-download',
|
|
92
|
-
'--quiet',
|
|
93
|
-
'--no-progress',
|
|
94
|
-
'-o', outTemplate,
|
|
95
|
-
url,
|
|
96
|
-
], { timeout: 30_000 });
|
|
101
|
+
await run();
|
|
97
102
|
}
|
|
98
103
|
catch (e) {
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
104
|
+
const err = e;
|
|
105
|
+
if (err.code === 'ENOENT')
|
|
106
|
+
return { transcript: null, reason: 'not-installed' };
|
|
107
|
+
// YouTube bot-detection: retry with browser cookies
|
|
108
|
+
// Triggered when running from cloud/residential IPs without a session
|
|
109
|
+
const stderr = err.stderr ?? '';
|
|
110
|
+
if (stderr.includes('Sign in') || stderr.includes('bot')) {
|
|
111
|
+
const browsers = ['chrome', 'chromium', 'firefox', 'safari', 'edge'];
|
|
112
|
+
let authed = false;
|
|
113
|
+
for (const browser of browsers) {
|
|
114
|
+
try {
|
|
115
|
+
await run('--cookies-from-browser', browser);
|
|
116
|
+
authed = true;
|
|
117
|
+
break;
|
|
118
|
+
}
|
|
119
|
+
catch {
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
if (!authed)
|
|
124
|
+
return { transcript: null, reason: 'no-transcript' };
|
|
125
|
+
}
|
|
126
|
+
// Other errors (geo-block, video unavailable): fall through to file check
|
|
102
127
|
}
|
|
103
|
-
// Find output file: {videoId}.en.json3, {videoId}.
|
|
128
|
+
// Find output file: {videoId}.en.json3, {videoId}.zh-Hans.json3, etc.
|
|
104
129
|
try {
|
|
105
130
|
const files = readdirSync(dir).filter(f => f.startsWith(videoId) && f.endsWith('.json3'));
|
|
106
131
|
for (const file of files) {
|