video-pipeline 1.2.8 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +8 -7
- package/CHANGELOG.md +13 -0
- package/package.json +1 -1
- package/process_videos.js +30 -28
package/.env.example
CHANGED
|
@@ -78,10 +78,10 @@ COL_CONTENT=content # 输出列:识别文本写入此列
|
|
|
78
78
|
COL_KEYWORDS=keywords # 输出列:AI 分析结果(关键词归纳)写入此列
|
|
79
79
|
#
|
|
80
80
|
# 【平台视频 ID 列】至少配置一个,列值会替换 URL 模板中的占位符
|
|
81
|
-
COL_TENCENTVID=extra.
|
|
82
|
-
COL_BILIBILIBVID=extra.
|
|
83
|
-
COL_YOUTUBEID=extra.
|
|
84
|
-
COL_YOUKUID=extra.
|
|
81
|
+
COL_TENCENTVID=extra.tencentVid
|
|
82
|
+
COL_BILIBILIBVID=extra.bilibiliBvid
|
|
83
|
+
COL_YOUTUBEID=extra.youtubeId
|
|
84
|
+
COL_YOUKUID=extra.youkuId
|
|
85
85
|
|
|
86
86
|
# ── 处理的 Sheet ────────────────────────────────────────────────────────────
|
|
87
87
|
# 【自由】逗号分隔的 sheet 名称列表,可增删、调序;不指定则处理所有 sheet
|
|
@@ -132,7 +132,8 @@ YOUKU_URL_TPL=https://v.youku.com/v_show/id_{youku}.html
|
|
|
132
132
|
BILIBILI_COOKIES_FROM_BROWSER=firefox # 【自由】firefox / chrome / edge
|
|
133
133
|
# 方案 B(备用): 从文件读取 cookie
|
|
134
134
|
# BILIBILI_COOKIE_FILE=cookies/bilibili.txt # 【自由】cookie 文件路径(方案 A 启用时此行无效)
|
|
135
|
-
|
|
135
|
+
# 【自由】自定义 User-Agent (含特殊字符需用双引号包裹)
|
|
136
|
+
BILIBILI_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36"
|
|
136
137
|
BILIBILI_REFERER=https://www.bilibili.com/ # 【自由】
|
|
137
138
|
BILIBILI_CONCURRENT_FRAGMENTS=4 # 【自由】并发分片数
|
|
138
139
|
BILIBILI_FORMAT=bestvideo[height<=720]+bestaudio/bestvideo+bestaudio/best # 【自由】
|
|
@@ -146,7 +147,8 @@ YOUTUBE_COOKIES_FROM_BROWSER=firefox # 【自由】firefox / chrome /
|
|
|
146
147
|
# ⚠️ Windows Chromium 系浏览器 DPAPI 解密已知失败,不建议用 --cookies-from-browser chrome
|
|
147
148
|
YOUTUBE_PROXY=http://127.0.0.1:7897 # 【自由】代理地址 (Clash Verge 默认 7897, v2rayN 默认 10809)
|
|
148
149
|
# ⚠️ 脚本会自动给 yt-dlp 及其 node/ejs 子进程注入 HTTPS_PROXY 环境变量
|
|
149
|
-
|
|
150
|
+
# 【自由】自定义 User-Agent (含特殊字符需用双引号包裹)
|
|
151
|
+
YOUTUBE_USER_AGENT="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/149.0.0.0 Safari/537.36"
|
|
150
152
|
YOUTUBE_JS_RUNTIMES=node # 【自由】解 n-sig 的 JS 运行时
|
|
151
153
|
YOUTUBE_REMOTE_COMPONENTS=ejs:github # 【自由】YouTube n-sig 需要的远程组件
|
|
152
154
|
YOUTUBE_FORMAT=bestvideo[height<=720]+bestaudio/bestvideo+bestaudio/best # 【自由】
|
|
@@ -160,7 +162,6 @@ TENCENT_USER_AGENT= # 【自由】
|
|
|
160
162
|
YOUKU_COOKIE_FILE= # 【自由】
|
|
161
163
|
YOUKU_USER_AGENT= # 【自由】
|
|
162
164
|
|
|
163
|
-
|
|
164
165
|
# ── AI 分析/关键词归纳 ─────────────────────────────────────────────────────
|
|
165
166
|
# 【自由】是否启用 AI 分析环节(true/false),在 transcribe 之后执行
|
|
166
167
|
AI_ENABLED=true
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,18 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.2.9] - 2026-06-11
|
|
4
|
+
|
|
5
|
+
### Bug Fixes
|
|
6
|
+
|
|
7
|
+
- yt-dlp download progress parsing and encoding (`e32acc9`)
|
|
8
|
+
- yt-dlp download progress not showing — listen stdout not stderr (`8cb99b0`)
|
|
9
|
+
- quote USER_AGENT values with double quotes to prevent semicolon parsing issues (`419dff2`)
|
|
10
|
+
|
|
11
|
+
### Documentation
|
|
12
|
+
|
|
13
|
+
- update (`f761ad4`)
|
|
14
|
+
|
|
15
|
+
|
|
3
16
|
## [1.2.8] - 2026-06-11
|
|
4
17
|
|
|
5
18
|
### Features
|
package/package.json
CHANGED
package/process_videos.js
CHANGED
|
@@ -664,15 +664,13 @@ function spawnWithTimeout(cmd, args, timeout, options = {}) {
|
|
|
664
664
|
reject(Object.assign(new Error(`Timeout after ${timeout}s`), { name: 'TimeoutError', code: 'ETIMEDOUT' }));
|
|
665
665
|
}, timeout * 1000);
|
|
666
666
|
|
|
667
|
-
if (onProgress
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
});
|
|
673
|
-
|
|
674
|
-
// 同时消费 stdout,防止缓冲区填满阻塞子进程
|
|
675
|
-
child.stdout.on('data', d => { stdout += d.toString(); });
|
|
667
|
+
if (onProgress) {
|
|
668
|
+
// 同时监听 stdout 和 stderr — yt-dlp --newline 的 [download] 进度输出在 stdout
|
|
669
|
+
const onLine = (buf, line) => { try { onProgress(line); } catch {} };
|
|
670
|
+
const rlOut = readline.createInterface({ input: child.stdout, crlfDelay: Infinity });
|
|
671
|
+
rlOut.on('line', line => { stdout += line + '\n'; onLine('stdout', line); });
|
|
672
|
+
const rlErr = readline.createInterface({ input: child.stderr, crlfDelay: Infinity });
|
|
673
|
+
rlErr.on('line', line => { stderr += line + '\n'; onLine('stderr', line); });
|
|
676
674
|
} else {
|
|
677
675
|
child.stdout.on('data', d => { stdout += d.toString(); });
|
|
678
676
|
child.stderr.on('data', d => { stderr += d.toString(); });
|
|
@@ -680,9 +678,6 @@ function spawnWithTimeout(cmd, args, timeout, options = {}) {
|
|
|
680
678
|
|
|
681
679
|
child.on('close', code => {
|
|
682
680
|
clearTimeout(timer);
|
|
683
|
-
if (!onProgress) {
|
|
684
|
-
// Without onProgress, stderr was captured by the 'data' handler
|
|
685
|
-
}
|
|
686
681
|
if (code === 0) resolve({ stdout, stderr });
|
|
687
682
|
else reject(Object.assign(new Error(`Exit code ${code}`), { code, stderr }));
|
|
688
683
|
});
|
|
@@ -788,13 +783,16 @@ function cleanupPartials(dlDir, stem) {
|
|
|
788
783
|
|
|
789
784
|
// ============================== 下载 ==============================
|
|
790
785
|
function parseYtdlpProgress(line) {
|
|
791
|
-
// Parse yt-dlp progress
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
|
|
786
|
+
// Parse yt-dlp progress lines like:
|
|
787
|
+
// "[download] 12.3% of ~50.00MiB at 2.5MiB/s ETA 00:15"
|
|
788
|
+
// "[download] 0.0% of 61.66MiB at Unknown B/s ETA Unknown"
|
|
789
|
+
const m = line.match(/\[download\]\s+([\d.]+%)\s+of\s+~?\s*([\d.]+[KMG]iB)/);
|
|
790
|
+
if (!m) return null;
|
|
791
|
+
const pct = m[1]; // e.g. "12.3%"
|
|
792
|
+
const size = m[2]; // e.g. "50.00MiB"
|
|
793
|
+
const spd = (line.match(/at\s+([\d.]+ ?[KMG]?i?B\/s)/) || [])[1] || '?';
|
|
794
|
+
const eta = (line.match(/ETA\s+([\d:]+)/) || [])[1] || '?';
|
|
795
|
+
return `DL ${pct} of ${size} @ ${spd} ETA ${eta}`;
|
|
798
796
|
}
|
|
799
797
|
|
|
800
798
|
async function stepDownload(row, sheetName, maxRetries, retryDelay, force, timeout = 600) {
|
|
@@ -830,6 +828,10 @@ async function stepDownload(row, sheetName, maxRetries, retryDelay, force, timeo
|
|
|
830
828
|
'-f', cfg.format || 'bestvideo+bestaudio/best',
|
|
831
829
|
];
|
|
832
830
|
|
|
831
|
+
if (force) {
|
|
832
|
+
args.push('--force-overwrites');
|
|
833
|
+
}
|
|
834
|
+
|
|
833
835
|
if (cfg.concurrent_fragments) {
|
|
834
836
|
args.push('--concurrent-fragments', String(cfg.concurrent_fragments));
|
|
835
837
|
}
|
|
@@ -2339,8 +2341,8 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
|
|
|
2339
2341
|
.description('视频下载、转码、文本识别、AI分析一体化流程')
|
|
2340
2342
|
.option('--sheet <name>', '指定 sheet 名称')
|
|
2341
2343
|
.option('--id <id>', '指定 extra.id 或 title(单条测试)')
|
|
2342
|
-
.option('--offset <n>', '跳过前 N 条任务(从 0 开始),默认 0', parseInt, 0)
|
|
2343
|
-
.option('--limit <n>', '最多处理 N 条任务,默认无限制', parseInt, 0)
|
|
2344
|
+
.option('--offset <n>', '跳过前 N 条任务(从 0 开始),默认 0', v => parseInt(v, 10), 0)
|
|
2345
|
+
.option('--limit <n>', '最多处理 N 条任务,默认无限制', v => parseInt(v, 10), 0)
|
|
2344
2346
|
.option('--step <step>', '指定执行步骤(可多次指定),如 --step transcode --step transcribe', (val, prev) => {
|
|
2345
2347
|
const allowed = ['download', 'transcode', 'transcribe', 'analyze'];
|
|
2346
2348
|
if (!allowed.includes(val)) {
|
|
@@ -2350,13 +2352,13 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
|
|
|
2350
2352
|
return [...(prev || []), val];
|
|
2351
2353
|
})
|
|
2352
2354
|
.option('--force', '强制重做下载+转码(忽略已有文件)')
|
|
2353
|
-
.option('--concurrency <n>', '并发数,默认 1', parseInt, 1)
|
|
2354
|
-
.option('--retry <n>', '每步失败最大重试次数,默认 0', parseInt, 0)
|
|
2355
|
-
.option('--retry-delay <n>', '重试间隔基数(秒),默认 5', parseFloat, 5.0)
|
|
2356
|
-
.option('--download-timeout <n>', '下载超时(秒),默认 600', parseInt, 600)
|
|
2357
|
-
.option('--transcode-timeout <n>', '转码超时(秒),默认 600', parseInt, 600)
|
|
2358
|
-
.option('--transcribe-timeout <n>', '识别超时(秒),默认 600', parseInt, 600)
|
|
2359
|
-
.option('--analyze-timeout <n>', 'AI 分析超时(秒),默认 300', parseInt, 300)
|
|
2355
|
+
.option('--concurrency <n>', '并发数,默认 1', v => parseInt(v, 10), 1)
|
|
2356
|
+
.option('--retry <n>', '每步失败最大重试次数,默认 0', v => parseInt(v, 10), 0)
|
|
2357
|
+
.option('--retry-delay <n>', '重试间隔基数(秒),默认 5', v => parseFloat(v), 5.0)
|
|
2358
|
+
.option('--download-timeout <n>', '下载超时(秒),默认 600', v => parseInt(v, 10), 600)
|
|
2359
|
+
.option('--transcode-timeout <n>', '转码超时(秒),默认 600', v => parseInt(v, 10), 600)
|
|
2360
|
+
.option('--transcribe-timeout <n>', '识别超时(秒),默认 600', v => parseInt(v, 10), 600)
|
|
2361
|
+
.option('--analyze-timeout <n>', 'AI 分析超时(秒),默认 300', v => parseInt(v, 10), 300)
|
|
2360
2362
|
.option('--dry-run', '干跑模式,只列任务不执行')
|
|
2361
2363
|
.option('--retry-failed <path>', '从报告 JSON 重跑失败项(output/reports/{sheet}/report_xxx.json)')
|
|
2362
2364
|
.option('--init', '复制 .env.example 到当前目录并重命名为 .env')
|