video-pipeline 1.2.8 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -78,10 +78,10 @@ COL_CONTENT=content # 输出列:识别文本写入此列
78
78
  COL_KEYWORDS=keywords # 输出列:AI 分析结果(关键词归纳)写入此列
79
79
  #
80
80
  # 【平台视频 ID 列】至少配置一个,列值会替换 URL 模板中的占位符
81
- COL_TENCENTVID=extra.tencent
82
- COL_BILIBILIBVID=extra.bilibili
83
- COL_YOUTUBEID=extra.youtube
84
- COL_YOUKUID=extra.youku
81
+ COL_TENCENTVID=extra.tencentVid
82
+ COL_BILIBILIBVID=extra.bilibiliBvid
83
+ COL_YOUTUBEID=extra.youtubeId
84
+ COL_YOUKUID=extra.youkuId
85
85
 
86
86
  # ── 处理的 Sheet ────────────────────────────────────────────────────────────
87
87
  # 【自由】逗号分隔的 sheet 名称列表,可增删、调序;不指定则处理所有 sheet
@@ -132,7 +132,8 @@ YOUKU_URL_TPL=https://v.youku.com/v_show/id_{youku}.html
132
132
  BILIBILI_COOKIES_FROM_BROWSER=firefox # 【自由】firefox / chrome / edge
133
133
  # 方案 B(备用): 从文件读取 cookie
134
134
  # BILIBILI_COOKIE_FILE=cookies/bilibili.txt # 【自由】cookie 文件路径(方案 A 启用时此行无效)
135
- BILIBILI_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36 # 【自由】
135
+ # 【自由】自定义 User-Agent (含特殊字符需用双引号包裹)
136
+ BILIBILI_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36"
136
137
  BILIBILI_REFERER=https://www.bilibili.com/ # 【自由】
137
138
  BILIBILI_CONCURRENT_FRAGMENTS=4 # 【自由】并发分片数
138
139
  BILIBILI_FORMAT=bestvideo[height<=720]+bestaudio/bestvideo+bestaudio/best # 【自由】
@@ -146,7 +147,8 @@ YOUTUBE_COOKIES_FROM_BROWSER=firefox # 【自由】firefox / chrome /
146
147
  # ⚠️ Windows Chromium 系浏览器 DPAPI 解密已知失败,不建议用 --cookies-from-browser chrome
147
148
  YOUTUBE_PROXY=http://127.0.0.1:7897 # 【自由】代理地址 (Clash Verge 默认 7897, v2rayN 默认 10809)
148
149
  # ⚠️ 脚本会自动给 yt-dlp 及其 node/ejs 子进程注入 HTTPS_PROXY 环境变量
149
- YOUTUBE_USER_AGENT=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36 # 【自由】
150
+ # 【自由】自定义 User-Agent (含特殊字符需用双引号包裹)
151
+ YOUTUBE_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36"
150
152
  YOUTUBE_JS_RUNTIMES=node # 【自由】解 n-sig 的 JS 运行时
151
153
  YOUTUBE_REMOTE_COMPONENTS=ejs:github # 【自由】YouTube n-sig 需要的远程组件
152
154
  YOUTUBE_FORMAT=bestvideo[height<=720]+bestaudio/bestvideo+bestaudio/best # 【自由】
@@ -160,7 +162,6 @@ TENCENT_USER_AGENT= # 【自由】
160
162
  YOUKU_COOKIE_FILE= # 【自由】
161
163
  YOUKU_USER_AGENT= # 【自由】
162
164
 
163
-
164
165
  # ── AI 分析/关键词归纳 ─────────────────────────────────────────────────────
165
166
  # 【自由】是否启用 AI 分析环节(true/false),在 transcribe 之后执行
166
167
  AI_ENABLED=true
package/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.2.9] - 2026-06-11
4
+
5
+ ### Bug Fixes
6
+
7
+ - yt-dlp download progress parsing and encoding (`e32acc9`)
8
+ - yt-dlp download progress not showing — listen stdout not stderr (`8cb99b0`)
9
+ - quote USER_AGENT values with double quotes to prevent semicolon parsing issues (`419dff2`)
10
+
11
+ ### Documentation
12
+
13
+ - update (`f761ad4`)
14
+
15
+
3
16
  ## [1.2.8] - 2026-06-11
4
17
 
5
18
  ### Features
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "video-pipeline",
3
- "version": "1.2.8",
3
+ "version": "1.2.9",
4
4
  "description": "视频下载、转码、文本识别、AI 关键词分析一体化流程 CLI 工具",
5
5
  "keywords": [
6
6
  "video",
package/process_videos.js CHANGED
@@ -664,15 +664,13 @@ function spawnWithTimeout(cmd, args, timeout, options = {}) {
664
664
  reject(Object.assign(new Error(`Timeout after ${timeout}s`), { name: 'TimeoutError', code: 'ETIMEDOUT' }));
665
665
  }, timeout * 1000);
666
666
 
667
- if (onProgress && child.stderr) {
668
- const rl = readline.createInterface({ input: child.stderr, crlfDelay: Infinity });
669
- rl.on('line', line => {
670
- stderr += line + '\n';
671
- try { onProgress(line); } catch {}
672
- });
673
- child.stderr.on('end', () => rl.close());
674
- // 同时消费 stdout,防止缓冲区填满阻塞子进程
675
- child.stdout.on('data', d => { stdout += d.toString(); });
667
+ if (onProgress) {
668
+ // 同时监听 stdout stderr yt-dlp --newline 的 [download] 进度输出在 stdout
669
+ const onLine = (buf, line) => { try { onProgress(line); } catch {} };
670
+ const rlOut = readline.createInterface({ input: child.stdout, crlfDelay: Infinity });
671
+ rlOut.on('line', line => { stdout += line + '\n'; onLine('stdout', line); });
672
+ const rlErr = readline.createInterface({ input: child.stderr, crlfDelay: Infinity });
673
+ rlErr.on('line', line => { stderr += line + '\n'; onLine('stderr', line); });
676
674
  } else {
677
675
  child.stdout.on('data', d => { stdout += d.toString(); });
678
676
  child.stderr.on('data', d => { stderr += d.toString(); });
@@ -680,9 +678,6 @@ function spawnWithTimeout(cmd, args, timeout, options = {}) {
680
678
 
681
679
  child.on('close', code => {
682
680
  clearTimeout(timer);
683
- if (!onProgress) {
684
- // Without onProgress, stderr was captured by the 'data' handler
685
- }
686
681
  if (code === 0) resolve({ stdout, stderr });
687
682
  else reject(Object.assign(new Error(`Exit code ${code}`), { code, stderr }));
688
683
  });
@@ -788,13 +783,16 @@ function cleanupPartials(dlDir, stem) {
788
783
 
789
784
  // ============================== 下载 ==============================
790
785
  function parseYtdlpProgress(line) {
791
- // Parse yt-dlp progress line like "[download] 12.3% of ~50.00MiB at 2.5MiB/s ETA 00:15"
792
- const m = line.match(/\[download\]\s+([\d.]+%)\s+of\s+~?([\d.]+[KMG]iB)\s+at\s+([\d.]+[KMG]iB\/s)\s+ETA\s+([\d:]+)/);
793
- if (m) return `DL ${m[1]} of ${m[2]} @ ${m[3]} ETA ${m[4]}`;
794
- // Also try: "[download] 100% of 50.00MiB"
795
- const m2 = line.match(/\[download\]\s+([\d.]+%)\s+of\s+([\d.]+[KMG]iB)/);
796
- if (m2) return `DL ${m2[1]} of ${m2[2]}`;
797
- return null;
786
+ // Parse yt-dlp progress lines like:
787
+ // "[download] 12.3% of ~50.00MiB at 2.5MiB/s ETA 00:15"
788
+ // "[download] 0.0% of 61.66MiB at Unknown B/s ETA Unknown"
789
+ const m = line.match(/\[download\]\s+([\d.]+%)\s+of\s+~?\s*([\d.]+[KMG]iB)/);
790
+ if (!m) return null;
791
+ const pct = m[1]; // e.g. "12.3%"
792
+ const size = m[2]; // e.g. "50.00MiB"
793
+ const spd = (line.match(/at\s+([\d.]+ ?[KMG]?i?B\/s)/) || [])[1] || '?';
794
+ const eta = (line.match(/ETA\s+([\d:]+)/) || [])[1] || '?';
795
+ return `DL ${pct} of ${size} @ ${spd} ETA ${eta}`;
798
796
  }
799
797
 
800
798
  async function stepDownload(row, sheetName, maxRetries, retryDelay, force, timeout = 600) {
@@ -830,6 +828,10 @@ async function stepDownload(row, sheetName, maxRetries, retryDelay, force, timeo
830
828
  '-f', cfg.format || 'bestvideo+bestaudio/best',
831
829
  ];
832
830
 
831
+ if (force) {
832
+ args.push('--force-overwrites');
833
+ }
834
+
833
835
  if (cfg.concurrent_fragments) {
834
836
  args.push('--concurrent-fragments', String(cfg.concurrent_fragments));
835
837
  }
@@ -2339,8 +2341,8 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
2339
2341
  .description('视频下载、转码、文本识别、AI分析一体化流程')
2340
2342
  .option('--sheet <name>', '指定 sheet 名称')
2341
2343
  .option('--id <id>', '指定 extra.id 或 title(单条测试)')
2342
- .option('--offset <n>', '跳过前 N 条任务(从 0 开始),默认 0', parseInt, 0)
2343
- .option('--limit <n>', '最多处理 N 条任务,默认无限制', parseInt, 0)
2344
+ .option('--offset <n>', '跳过前 N 条任务(从 0 开始),默认 0', v => parseInt(v, 10), 0)
2345
+ .option('--limit <n>', '最多处理 N 条任务,默认无限制', v => parseInt(v, 10), 0)
2344
2346
  .option('--step <step>', '指定执行步骤(可多次指定),如 --step transcode --step transcribe', (val, prev) => {
2345
2347
  const allowed = ['download', 'transcode', 'transcribe', 'analyze'];
2346
2348
  if (!allowed.includes(val)) {
@@ -2350,13 +2352,13 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
2350
2352
  return [...(prev || []), val];
2351
2353
  })
2352
2354
  .option('--force', '强制重做下载+转码(忽略已有文件)')
2353
- .option('--concurrency <n>', '并发数,默认 1', parseInt, 1)
2354
- .option('--retry <n>', '每步失败最大重试次数,默认 0', parseInt, 0)
2355
- .option('--retry-delay <n>', '重试间隔基数(秒),默认 5', parseFloat, 5.0)
2356
- .option('--download-timeout <n>', '下载超时(秒),默认 600', parseInt, 600)
2357
- .option('--transcode-timeout <n>', '转码超时(秒),默认 600', parseInt, 600)
2358
- .option('--transcribe-timeout <n>', '识别超时(秒),默认 600', parseInt, 600)
2359
- .option('--analyze-timeout <n>', 'AI 分析超时(秒),默认 300', parseInt, 300)
2355
+ .option('--concurrency <n>', '并发数,默认 1', v => parseInt(v, 10), 1)
2356
+ .option('--retry <n>', '每步失败最大重试次数,默认 0', v => parseInt(v, 10), 0)
2357
+ .option('--retry-delay <n>', '重试间隔基数(秒),默认 5', v => parseFloat(v), 5.0)
2358
+ .option('--download-timeout <n>', '下载超时(秒),默认 600', v => parseInt(v, 10), 600)
2359
+ .option('--transcode-timeout <n>', '转码超时(秒),默认 600', v => parseInt(v, 10), 600)
2360
+ .option('--transcribe-timeout <n>', '识别超时(秒),默认 600', v => parseInt(v, 10), 600)
2361
+ .option('--analyze-timeout <n>', 'AI 分析超时(秒),默认 300', v => parseInt(v, 10), 300)
2360
2362
  .option('--dry-run', '干跑模式,只列任务不执行')
2361
2363
  .option('--retry-failed <path>', '从报告 JSON 重跑失败项(output/reports/{sheet}/report_xxx.json)')
2362
2364
  .option('--init', '复制 .env.example 到当前目录并重命名为 .env')