summd 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/url-to-md.js +41 -16
  2. package/package.json +1 -1
package/dist/url-to-md.js CHANGED
@@ -78,29 +78,54 @@ async function fetchYouTubeTitle(url, videoId) {
78
78
  catch { }
79
79
  return `YouTube: ${videoId}`;
80
80
  }
81
+ // Subtitle language priority: English first, then common languages, then anything available.
82
+ // yt-dlp picks the first match; --write-auto-subs covers ASR variants (en-orig etc.)
83
+ const SUB_LANGS = 'en,zh-Hans,zh-Hant,zh,ja,ko,fr,de,es,pt,ru';
81
84
  // yt-dlp — works from any IP, supports auth/geo-block via --cookies-from-browser
82
85
  async function ytDlpTranscript(url, videoId) {
83
86
  const dir = tmpdir();
84
- const outTemplate = join(dir, videoId); // output: {videoId}.en.json3
87
+ const outTemplate = join(dir, videoId);
88
+ const run = (...extra) => execFileAsync('yt-dlp', [
89
+ '--write-subs',
90
+ '--write-auto-subs',
91
+ '--sub-langs', SUB_LANGS,
92
+ '--sub-format', 'json3',
93
+ '--skip-download',
94
+ '--quiet',
95
+ '--no-progress',
96
+ ...extra,
97
+ '-o', outTemplate,
98
+ url,
99
+ ], { timeout: 30_000 });
85
100
  try {
86
- await execFileAsync('yt-dlp', [
87
- '--write-subs',
88
- '--write-auto-subs',
89
- '--sub-langs', 'en',
90
- '--sub-format', 'json3',
91
- '--skip-download',
92
- '--quiet',
93
- '--no-progress',
94
- '-o', outTemplate,
95
- url,
96
- ], { timeout: 30_000 });
101
+ await run();
97
102
  }
98
103
  catch (e) {
99
- // Distinguish "yt-dlp not in PATH" from "ran but video has no captions"
100
- const isNotFound = e.code === 'ENOENT';
101
- return { transcript: null, reason: isNotFound ? 'not-installed' : 'no-transcript' };
104
+ const err = e;
105
+ if (err.code === 'ENOENT')
106
+ return { transcript: null, reason: 'not-installed' };
107
+ // YouTube bot-detection: retry with browser cookies
108
+ // Triggered when running from cloud/residential IPs without a session
109
+ const stderr = err.stderr ?? '';
110
+ if (stderr.includes('Sign in') || stderr.includes('bot')) {
111
+ const browsers = ['chrome', 'chromium', 'firefox', 'safari', 'edge'];
112
+ let authed = false;
113
+ for (const browser of browsers) {
114
+ try {
115
+ await run('--cookies-from-browser', browser);
116
+ authed = true;
117
+ break;
118
+ }
119
+ catch {
120
+ continue;
121
+ }
122
+ }
123
+ if (!authed)
124
+ return { transcript: null, reason: 'no-transcript' };
125
+ }
126
+ // Other errors (geo-block, video unavailable): fall through to file check
102
127
  }
103
- // Find output file: {videoId}.en.json3, {videoId}.en-US.json3, etc.
128
+ // Find output file: {videoId}.en.json3, {videoId}.zh-Hans.json3, etc.
104
129
  try {
105
130
  const files = readdirSync(dir).filter(f => f.startsWith(videoId) && f.endsWith('.json3'));
106
131
  for (const file of files) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "summd",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "description": "CLI for sum.md — Sum to anything.",
5
5
  "license": "MIT",
6
6
  "bin": {