summd 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/url-to-md.js +70 -56
  2. package/package.json +1 -1
package/dist/url-to-md.js CHANGED
@@ -78,79 +78,93 @@ async function fetchYouTubeTitle(url, videoId) {
78
78
  catch { }
79
79
  return `YouTube: ${videoId}`;
80
80
  }
81
- // Browser cookie sources — tried in order, first one that succeeds is used.
82
- // Cookies bypass bot-detection and geo-blocks; required on most IPs.
81
+ // Browser cookie sources — tried in order, first working one is reused for all runs.
83
82
  const BROWSERS = ['chrome', 'chromium', 'firefox', 'safari', 'edge'];
84
- // yt-dlpuses browser cookies by default; falls back to no-cookies for public videos.
85
- // --sub-langs all: accept whatever subtitle language the video provides, no preference.
83
+ // Subtitle strategies tried in order, stop at first that produces output.
84
+ // Each downloads only the requested languages, not everything.
85
+ const SUB_STRATEGIES = [
86
+ // 1. Original-language ASR: yt-dlp 'orig' matches the video's primary language
87
+ ['--write-auto-subs', '--sub-langs', 'orig'],
88
+ // 2. English manual + ASR
89
+ ['--write-subs', '--write-auto-subs', '--sub-langs', 'en'],
90
+ // 3. Most-spoken languages by global user count (covers remaining cases)
91
+ ['--write-subs', '--write-auto-subs', '--sub-langs', 'zh-Hans,zh,ja,ko,fr,de,es,pt,hi,ar,ru'],
92
+ ];
86
93
  async function ytDlpTranscript(url, videoId) {
87
94
  const dir = tmpdir();
88
95
  const outTemplate = join(dir, videoId);
89
- const run = (cookieArgs) => execFileAsync('yt-dlp', [
90
- '--write-subs',
91
- '--write-auto-subs',
92
- '--sub-langs', 'all', // accept any language — no hardcoded preference
93
- '--sub-format', 'json3',
94
- '--skip-download',
95
- '--quiet',
96
- '--no-progress',
97
- ...cookieArgs,
98
- '-o', outTemplate,
99
- url,
100
- ], { timeout: 30_000 });
101
- // Try with browser cookies first (handles bot-detection, auth, geo-block).
102
- // Fall back to no-cookies only if no browser is available.
103
- let ran = false;
104
- for (const browser of BROWSERS) {
96
+ const run = async (subArgs, cookieArgs) => {
105
97
  try {
106
- await run(['--cookies-from-browser', browser]);
107
- ran = true;
108
- break;
98
+ await execFileAsync('yt-dlp', [
99
+ '--sub-format', 'json3',
100
+ '--skip-download', '--quiet', '--no-progress',
101
+ ...subArgs, ...cookieArgs,
102
+ '-o', outTemplate, url,
103
+ ], { timeout: 30_000 });
104
+ return 'ok';
109
105
  }
110
106
  catch (e) {
111
107
  const err = e;
112
108
  if (err.code === 'ENOENT')
113
- return { transcript: null, reason: 'not-installed' };
109
+ return 'not-installed';
114
110
  const stderr = err.stderr ?? '';
115
- // Cookie access failed (browser not installed / locked DB) try next browser
116
- if (stderr.includes('browser') || stderr.includes('cookie') || stderr.includes('Could not find'))
117
- continue;
118
- // yt-dlp ran but failed for another reason (video unavailable, no captions…)
119
- ran = true;
120
- break;
111
+ if (stderr.includes('Could not find') || stderr.includes('cookies from browser'))
112
+ return 'cookie-error';
113
+ return 'other-error';
121
114
  }
122
- }
123
- // No browser found with usable cookies — try without (works for fully public videos)
124
- if (!ran) {
115
+ };
116
+ const readAndClean = () => {
125
117
  try {
126
- await run([]);
127
- }
128
- catch (e) {
129
- const err = e;
130
- if (err.code === 'ENOENT')
131
- return { transcript: null, reason: 'not-installed' };
132
- return { transcript: null, reason: 'no-transcript' };
133
- }
134
- }
135
- // Read the first valid output file; clean up all temp files regardless
136
- try {
137
- const files = readdirSync(dir).filter(f => f.startsWith(videoId) && f.endsWith('.json3'));
138
- for (const file of files) {
139
- const filePath = join(dir, file);
140
- try {
141
- const text = parseTimedText(JSON.parse(readFileSync(filePath, 'utf8')));
142
- if (text)
143
- return { transcript: text };
144
- }
145
- finally {
118
+ const files = readdirSync(dir).filter(f => f.startsWith(videoId) && f.endsWith('.json3'));
119
+ let transcript = null;
120
+ for (const file of files) {
121
+ const filePath = join(dir, file);
146
122
  try {
147
- unlinkSync(filePath);
123
+ if (!transcript) {
124
+ const text = parseTimedText(JSON.parse(readFileSync(filePath, 'utf8')));
125
+ if (text)
126
+ transcript = text;
127
+ }
128
+ }
129
+ finally {
130
+ try {
131
+ unlinkSync(filePath);
132
+ }
133
+ catch { }
148
134
  }
149
- catch { }
150
135
  }
136
+ return transcript;
137
+ }
138
+ catch {
139
+ return null;
151
140
  }
141
+ };
142
+ // Phase 1: find a working browser cookie source using the first subtitle strategy.
143
+ // The result tells us both which browser works AND whether strategy 1 produced output.
144
+ let cookieArgs = [];
145
+ for (const browser of BROWSERS) {
146
+ const outcome = await run(SUB_STRATEGIES[0], ['--cookies-from-browser', browser]);
147
+ if (outcome === 'not-installed')
148
+ return { transcript: null, reason: 'not-installed' };
149
+ if (outcome === 'cookie-error')
150
+ continue; // browser not available, try next
151
+ cookieArgs = ['--cookies-from-browser', browser];
152
+ break;
153
+ }
154
+ // cookieArgs is empty if no browser found — fall back to cookie-less requests
155
+ // Check if strategy 1 produced output
156
+ const t1 = readAndClean();
157
+ if (t1)
158
+ return { transcript: t1 };
159
+ // Phase 2: try remaining subtitle strategies with the confirmed cookie source
160
+ for (const subArgs of SUB_STRATEGIES.slice(1)) {
161
+ const outcome = await run(subArgs, cookieArgs);
162
+ if (outcome === 'not-installed')
163
+ return { transcript: null, reason: 'not-installed' };
164
+ const t = readAndClean();
165
+ if (t)
166
+ return { transcript: t };
152
167
  }
153
- catch { }
154
168
  return { transcript: null, reason: 'no-transcript' };
155
169
  }
156
170
  // json3 parser — shared algorithm with browser extension
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "summd",
3
- "version": "0.1.8",
3
+ "version": "0.1.10",
4
4
  "description": "CLI for sum.md — Sum to anything.",
5
5
  "license": "MIT",
6
6
  "bin": {