video-pipeline 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example CHANGED
@@ -27,10 +27,10 @@ EXCEL_FILE=data/export_2026-06-10_split.xlsx
27
27
 
28
28
  # ── 输出目录(相对于项目根目录)─────────────────────────────────────────────
29
29
  # 【自由】任意合法目录名
30
- DOWNLOADS_DIR=downloads # 下载的视频文件
31
- TRANSCODED_DIR=transcoded # 转码后的音频文件
32
- REPORTS_DIR=reports # 执行报告 JSON
33
- COOKIES_DIR=cookies # 站点 cookie 文件
30
+ DOWNLOADS_DIR=output/downloads # 下载的视频文件
31
+ TRANSCODED_DIR=output/transcoded # 转码后的音频文件
32
+ REPORTS_DIR=output/reports # 执行报告 JSON
33
+ COOKIES_DIR=cookies # 站点 cookie 文件
34
34
 
35
35
  # ── 外部工具 ────────────────────────────────────────────────────────────────
36
36
  # 【自由】可改为工具的绝对路径或自定义命令名
@@ -51,6 +51,9 @@ FFPROBE=ffprobe # ffprobe 媒体信息
51
51
  # 远程服务模式 - whisper.cpp server (默认)
52
52
  WHISPER_BACKEND=service # 【自由】service 或 local
53
53
  WHISPER_SERVICE=http://localhost:9588 # 【自由】服务地址
54
+ WHISPER_TEMPERATURE=0.0 # 【自由】whisper 推理温度 (0.0~1.0, 越低越确定)
55
+ WHISPER_TEMPERATURE_INC=0.2 # 【自由】whisper 温度增量 (fallback 时升温步长)
56
+ WHISPER_RESPONSE_FORMAT=json # 【自由】whisper 返回格式 (json/text/srt/vtt)
54
57
 
55
58
  # 本地模式 - openai-whisper CLI (取消下方注释并注释上方即可切换)
56
59
  # WHISPER_BACKEND=local
@@ -157,12 +160,14 @@ YOUKU_USER_AGENT= # 【自由】
157
160
  # ── AI 分析/关键词归纳 ─────────────────────────────────────────────────────
158
161
  # 【自由】是否启用 AI 分析环节(true/false),在 transcribe 之后执行
159
162
  AI_ENABLED=true
160
- # 【自由】OpenAI 兼容 API 配置
161
- AI_API_KEY=sk-jewvkpoAQNKAARQMYULWW1vv8x6UH4H9Qe4j10tp2AJFM3TY
162
- AI_BASE_URL=https://apihub.agnes-ai.com/v1
163
- AI_MODEL=agnes-2.0-flash
163
+ # 【自由】OpenAI 兼容 API 配置(⚠️ 请勿提交真实 API Key 到此文件)
164
+ AI_API_KEY=your-api-key-here
165
+ AI_BASE_URL=https://your-api-host/v1
166
+ AI_MODEL=your-model-name
164
167
  # 【自由】请求超时(秒),默认 300
165
168
  AI_TIMEOUT=300
169
+ # 【自由】AI 推理温度 (0.0~2.0, 越低越确定/保守, 越高越随机/创意)
170
+ AI_TEMPERATURE=0.3
166
171
  # 【关联】提示词模板,{content} 占位符会被识别文本替换
167
172
  # 【自由】提示词内容可随意修改,但必须保留 {content} 占位符
168
173
  AI_PROMPT_TPL=帮我归纳总结一下Keywords,尽可能全一点,这是内容:{content}
package/CHANGELOG.md CHANGED
@@ -1,5 +1,17 @@
1
1
  # Changelog
2
2
 
3
+ ## [1.2.0] - 2026-06-11
4
+
5
+ ### Bug Fixes
6
+
7
+ - 安全漏洞修复 + dry-run 模式完善 + 全面测试套件 (`3677b6a`)
8
+
9
+ ### Refactoring
10
+
11
+ - 输出目录统一归入 output/ 并清理测试产物 (`f5cad03`)
12
+ - whisper/AI 硬编码参数改为 env 可配置 (`611b079`)
13
+
14
+
3
15
  ## [1.1.0] - 2026-06-11
4
16
 
5
17
  ### Features
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "video-pipeline",
3
- "version": "1.1.0",
3
+ "version": "1.2.0",
4
4
  "description": "视频下载、转码、文本识别、AI 关键词分析一体化流程 CLI 工具",
5
5
  "keywords": [
6
6
  "video",
package/process_videos.js CHANGED
@@ -43,10 +43,10 @@ function envPath(key, defaultValue) {
43
43
  }
44
44
 
45
45
  let EXCEL_FILE = envPath('EXCEL_FILE', 'data/export_2026-06-10_split.xlsx');
46
- const DOWNLOADS_DIR = envPath('DOWNLOADS_DIR', 'downloads');
47
- const TRANSCODED_DIR = envPath('TRANSCODED_DIR', 'transcoded');
46
+ const DOWNLOADS_DIR = envPath('DOWNLOADS_DIR', 'output/downloads');
47
+ const TRANSCODED_DIR = envPath('TRANSCODED_DIR', 'output/transcoded');
48
48
  const COOKIES_DIR = envPath('COOKIES_DIR', 'cookies');
49
- const REPORTS_DIR = envPath('REPORTS_DIR', 'reports');
49
+ const REPORTS_DIR = envPath('REPORTS_DIR', 'output/reports');
50
50
 
51
51
  const YTDLP = process.env.YTDLP || 'yt-dlp';
52
52
  const FFMPEG = process.env.FFMPEG || 'ffmpeg';
@@ -57,6 +57,9 @@ const WHISPER_MODEL = process.env.WHISPER_MODEL || 'base';
57
57
  const WHISPER_DEVICE = process.env.WHISPER_DEVICE || 'cpu';
58
58
  const WHISPER_LANGUAGE = process.env.WHISPER_LANGUAGE || '';
59
59
  const WHISPER_SERVICE_MODEL = process.env.WHISPER_SERVICE_MODEL || '';
60
+ const WHISPER_TEMPERATURE = process.env.WHISPER_TEMPERATURE || '0.0';
61
+ const WHISPER_TEMPERATURE_INC = process.env.WHISPER_TEMPERATURE_INC || '0.2';
62
+ const WHISPER_RESPONSE_FORMAT = process.env.WHISPER_RESPONSE_FORMAT || 'json';
60
63
  let _SERVICE_MODEL_LOADED = null;
61
64
 
62
65
  const TRANSCODE_EXT = process.env.TRANSCODE_EXT || '.wav';
@@ -181,27 +184,6 @@ function timestamp() {
181
184
  return new Date().toTimeString().slice(0, 8);
182
185
  }
183
186
 
184
- // ============================== 锁 / 并发控制 ==============================
185
- let _printLock = false;
186
- const _printQueue = [];
187
- function printLock(fn) {
188
- return new Promise(resolve => {
189
- _printQueue.push(async () => {
190
- _printLock = true;
191
- try { fn(); } finally { _printLock = false; }
192
- resolve();
193
- });
194
- if (_printQueue.length === 1) processQueue();
195
- });
196
- }
197
- async function processQueue() {
198
- while (_printQueue.length) {
199
- await _printQueue[0]();
200
- _printQueue.shift();
201
- }
202
- }
203
-
204
- // 简化:Node.js 单线程,简单场景下不需要锁
205
187
  function lockedPrint(s) {
206
188
  console.log(s);
207
189
  }
@@ -274,7 +256,12 @@ class TaskResult {
274
256
 
275
257
  // ============================== 工具函数 ==============================
276
258
  function safeFilename(name) {
277
- return String(name).replace(/[\\/:*?"<>|]/g, '_').trim();
259
+ let safe = String(name).replace(/[\\/:*?"<>|]/g, '_').trim();
260
+ // 防止路径遍历:过滤 ..
261
+ while (safe.includes('..')) safe = safe.replace('..', '_');
262
+ // 防止以 . 开头(Unix 隐藏文件)
263
+ safe = safe.replace(/^\.+/, '');
264
+ return safe || 'unknown';
278
265
  }
279
266
 
280
267
  function readExcelSheet(sheetName) {
@@ -476,7 +463,7 @@ async function resolveUrlConflict(proposedPath) {
476
463
  console.log(c('yellow', '文件名不能为空,使用默认名称'));
477
464
  return { action: 'proceed', path: proposedPath };
478
465
  }
479
- const newPath = path.join(dir, `${customName}${ext}`);
466
+ const newPath = path.join(dir, `${safeFilename(customName)}${ext}`);
480
467
  return resolveUrlConflict(newPath);
481
468
  }
482
469
 
@@ -697,6 +684,7 @@ async function stepAnalyze(text, maxRetries, retryDelay, timeout = 300) {
697
684
  const baseUrl = (process.env.AI_BASE_URL || '').replace(/\/$/, '');
698
685
  const model = process.env.AI_MODEL || '';
699
686
  const promptTpl = process.env.AI_PROMPT_TPL || '帮我归纳总结一下Keywords,尽可能全一点,这是内容:{content}';
687
+ const aiTemperature = parseFloat(process.env.AI_TEMPERATURE || '0.3');
700
688
  const aiTimeout = parseInt(process.env.AI_TIMEOUT || String(timeout), 10);
701
689
 
702
690
  if (!apiKey || !baseUrl || !model) {
@@ -708,7 +696,7 @@ async function stepAnalyze(text, maxRetries, retryDelay, timeout = 300) {
708
696
  const payload = JSON.stringify({
709
697
  model,
710
698
  messages: [{ role: 'user', content: prompt }],
711
- temperature: 0.3,
699
+ temperature: aiTemperature,
712
700
  });
713
701
 
714
702
  let lastErr = null;
@@ -850,7 +838,7 @@ async function stepDownload(row, sheetName, maxRetries, retryDelay, force, timeo
850
838
  }
851
839
 
852
840
  try {
853
- const { result, retriesUsed, error } = await retryCall(doDownload, maxRetries, retryDelay, stem);
841
+ await retryCall(doDownload, maxRetries, retryDelay, stem);
854
842
  } catch (e) {
855
843
  logError(`[${stem}] yt-dlp download failed: ${(e.stderr || e.message).slice(-2000)}`);
856
844
  return { file: null, retries: maxRetries, error: (e.stderr || e.message).slice(0, 500) };
@@ -1051,9 +1039,9 @@ async function transcribeService(audioFile, stem, maxRetries, retryDelay, timeou
1051
1039
  const fileBlob = await fs.openAsBlob(audioFile);
1052
1040
  const form = new FormData();
1053
1041
  form.append('file', fileBlob, path.basename(audioFile));
1054
- form.append('temperature', '0.0');
1055
- form.append('temperature_inc', '0.2');
1056
- form.append('response_format', 'json');
1042
+ form.append('temperature', WHISPER_TEMPERATURE);
1043
+ form.append('temperature_inc', WHISPER_TEMPERATURE_INC);
1044
+ form.append('response_format', WHISPER_RESPONSE_FORMAT);
1057
1045
 
1058
1046
  const controller = new AbortController();
1059
1047
  const timer = setTimeout(() => controller.abort(), timeout * 1000);
@@ -1146,7 +1134,6 @@ function writeAllContentsToExcel(results, keywordsDict = null) {
1146
1134
  }
1147
1135
 
1148
1136
  // Write content column
1149
- writeColumn(null, COL_CONTENT, updates); // null sheetName means iterate all sheets
1150
1137
  for (const [sheetName, rowsObj] of groupBySheetMap(updates)) {
1151
1138
  writeColumn(sheetName, COL_CONTENT, Object.entries(rowsObj));
1152
1139
  }
@@ -1179,20 +1166,28 @@ function groupBySheetMap(updates) {
1179
1166
  }
1180
1167
 
1181
1168
  // ============================== 报告 ==============================
1169
+ function computeSummary(results) {
1170
+ let success = 0, partial = 0, failed = 0, noVideo = 0;
1171
+ for (const r of results) {
1172
+ if (r.overall_status === 'success') success++;
1173
+ else if (r.overall_status === 'partial') partial++;
1174
+ else if (r.overall_status === 'failed') failed++;
1175
+ else if (r.overall_status === 'no_video') noVideo++;
1176
+ }
1177
+ return { total: results.length, success, partial, failed, no_video: noVideo };
1178
+ }
1179
+
1182
1180
  function generateReport(results, config) {
1183
1181
  fs.mkdirSync(REPORTS_DIR, { recursive: true });
1184
1182
  const ts = new Date().toISOString().replace(/[-:T]/g, '').slice(0, 15).replace(/(\d{8})(\d{6})/, '$1_$2');
1185
1183
  const reportFile = path.join(REPORTS_DIR, `report_${ts}.json`);
1186
1184
 
1187
- const success = results.filter(r => r.overall_status === 'success').length;
1188
- const partial = results.filter(r => r.overall_status === 'partial').length;
1189
- const failed = results.filter(r => r.overall_status === 'failed').length;
1190
- const noVideo = results.filter(r => r.overall_status === 'no_video').length;
1185
+ const summary = computeSummary(results);
1191
1186
 
1192
1187
  const report = {
1193
1188
  timestamp: new Date().toISOString(),
1194
1189
  config,
1195
- summary: { total: results.length, success, partial, failed, no_video: noVideo },
1190
+ summary,
1196
1191
  items: results.map(r => r.toJSON()),
1197
1192
  failed_items: results.filter(r => r.overall_status === 'failed' || r.overall_status === 'partial')
1198
1193
  .map(r => ({
@@ -1210,15 +1205,12 @@ function generateReport(results, config) {
1210
1205
  }
1211
1206
 
1212
1207
  function printReportSummary(results) {
1213
- const success = results.filter(r => r.overall_status === 'success').length;
1214
- const partial = results.filter(r => r.overall_status === 'partial').length;
1215
- const failed = results.filter(r => r.overall_status === 'failed').length;
1216
- const noVid = results.filter(r => r.overall_status === 'no_video').length;
1208
+ const { total, success, partial, failed, no_video: noVid } = computeSummary(results);
1217
1209
 
1218
1210
  console.log(`\n${'='.repeat(60)}`);
1219
1211
  console.log(` 执行摘要`);
1220
1212
  console.log(`${'='.repeat(60)}`);
1221
- console.log(` 总计: ${results.length}`);
1213
+ console.log(` 总计: ${total}`);
1222
1214
  console.log(` ✅ 成功: ${success}`);
1223
1215
  console.log(` ⚠️ 部分成功: ${partial}`);
1224
1216
  console.log(` ❌ 失败: ${failed}`);
@@ -1239,6 +1231,35 @@ function printReportSummary(results) {
1239
1231
  }
1240
1232
  }
1241
1233
 
1234
+
1235
+ // ============================== 环境预检 + 用户确认 ==============================
1236
+ async function checkAndConfirmEnv(envCheck, dryRun, confirmMsg) {
1237
+ if (envCheck.allOk) return true;
1238
+ console.log(`\n${'='.repeat(60)}`);
1239
+ console.log(' \u26a0\ufe0f 工具/服务预检:以下依赖不可用');
1240
+ console.log('='.repeat(60));
1241
+ for (const issue of envCheck.issues) console.log(` \u2022 ${issue}`);
1242
+ console.log('\n 涉及的步骤将失败。');
1243
+ if (dryRun) return true;
1244
+ try {
1245
+ const rl = require('readline').createInterface({ input: process.stdin, output: process.stdout });
1246
+ const answer = await new Promise(resolve => {
1247
+ rl.question(`\n ${confirmMsg}(输入 yes 继续,其他任意键取消): `, ans => {
1248
+ rl.close();
1249
+ resolve(ans.trim().toLowerCase());
1250
+ });
1251
+ });
1252
+ if (answer !== 'yes') {
1253
+ console.log('用户取消执行(工具不可用)');
1254
+ return false;
1255
+ }
1256
+ return true;
1257
+ } catch (e) {
1258
+ console.log('非交互环境,取消执行');
1259
+ return false;
1260
+ }
1261
+ }
1262
+
1242
1263
  // ============================== 单任务处理 ==============================
1243
1264
  async function processOneTask(row, sheetName, steps, maxRetries, retryDelay, force,
1244
1265
  whisperAvailable, positionLabel = '', downloadTimeout = 600, transcodeTimeout = 600,
@@ -1777,30 +1798,8 @@ async function run({
1777
1798
 
1778
1799
  logInfo(`tasks: ${tasks.length}, concurrency: ${concurrency}, max retries: ${maxRetries}`);
1779
1800
 
1780
- // ── 工具/服务预检 ──
1781
1801
  const envCheck = await checkEnvironmentAsync(steps);
1782
- if (!envCheck.allOk) {
1783
- console.log(`\n${'='.repeat(60)}`);
1784
- console.log(' ⚠️ 工具/服务预检:以下依赖不可用');
1785
- console.log('='.repeat(60));
1786
- for (const issue of envCheck.issues) {
1787
- console.log(` • ${issue}`);
1788
- }
1789
- console.log('\n 涉及的步骤将失败。');
1790
- if (!dryRun) {
1791
- const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
1792
- const answer = await new Promise(resolve => {
1793
- rl.question('\n 是否继续执行?(输入 yes 继续,其他任意键取消): ', ans => {
1794
- rl.close();
1795
- resolve(ans.trim().toLowerCase());
1796
- });
1797
- });
1798
- if (answer !== 'yes') {
1799
- logInfo('用户取消执行(工具不可用)');
1800
- return;
1801
- }
1802
- }
1803
- }
1802
+ if (!await checkAndConfirmEnv(envCheck, dryRun, '是否继续执行?')) return;
1804
1803
 
1805
1804
  // ── 干跑模式 ──
1806
1805
  if (dryRun) {
@@ -2011,26 +2010,8 @@ async function runFromReport(reportPath, steps, maxRetries, retryDelay, concurre
2011
2010
  return;
2012
2011
  }
2013
2012
 
2014
- // ── 工具/服务预检 ──
2015
2013
  const envRfr = await checkEnvironmentAsync(steps);
2016
- if (!envRfr.allOk) {
2017
- console.log(`\n${'='.repeat(60)}`);
2018
- console.log(' ⚠️ 工具/服务预检:以下依赖不可用');
2019
- console.log('='.repeat(60));
2020
- for (const issue of envRfr.issues) console.log(` • ${issue}`);
2021
- console.log('\n 涉及的步骤将失败。');
2022
- const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
2023
- const answer = await new Promise(resolve => {
2024
- rl.question('\n 是否继续重跑?(输入 yes 继续,其他任意键取消): ', ans => {
2025
- rl.close();
2026
- resolve(ans.trim().toLowerCase());
2027
- });
2028
- });
2029
- if (answer !== 'yes') {
2030
- logInfo('用户取消重跑(工具不可用)');
2031
- return;
2032
- }
2033
- }
2014
+ if (!await checkAndConfirmEnv(envRfr, dryRun, '是否继续重跑?')) return;
2034
2015
 
2035
2016
  let whisperAvailable = false;
2036
2017
  if (steps.includes('transcribe')) {
@@ -2192,11 +2173,19 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
2192
2173
  console.log(` 视频ID: ${c('cyan', parsed.videoId)}`);
2193
2174
  console.log(` 链接: ${c('cyan', parsed.watchUrl)}`);
2194
2175
 
2195
- // 构建文件路径: downloads/<platform>/<name>.mp4
2176
+ // dry-run 模式
2177
+ if (opts.dryRun) {
2178
+ console.log(c('dim', '\n── 开始执行 (dry-run) ──\n'));
2179
+ console.log(` 将执行步骤: ${c('cyan', steps.join(' → '))}`);
2180
+ console.log(` 输出名称: ${c('cyan', opts.name || parsed.videoId)}`);
2181
+ process.exit(0);
2182
+ }
2183
+
2184
+ // 构建文件路径: output/downloads/<platform>/<name>.mp4
2196
2185
  fs.mkdirSync(DOWNLOADS_DIR, { recursive: true });
2197
2186
  const dlDir = path.join(DOWNLOADS_DIR, parsed.platform);
2198
2187
  fs.mkdirSync(dlDir, { recursive: true });
2199
- const fileName = opts.name || parsed.videoId;
2188
+ const fileName = safeFilename(opts.name || parsed.videoId);
2200
2189
  const proposedPath = path.join(dlDir, `${fileName}.mp4`);
2201
2190
 
2202
2191
  // 冲突处理(--force 时直接覆盖)
@@ -2299,9 +2288,20 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
2299
2288
  }
2300
2289
  console.log(`\n 可执行步骤: ${c('green', steps.join(' → '))}`);
2301
2290
 
2291
+ // dry-run 模式
2292
+ if (opts.dryRun) {
2293
+ console.log(c('dim', '\n── 开始执行 (dry-run) ──\n'));
2294
+ console.log(` [本地文件] 将执行步骤: ${c('cyan', steps.join(' → '))}`);
2295
+ console.log(` 输入文件: ${c('cyan', inputPath)}`);
2296
+ if (opts.name) {
2297
+ console.log(` 输出名称: ${c('cyan', opts.name)}`);
2298
+ }
2299
+ process.exit(0);
2300
+ }
2301
+
2302
2302
  // 确定输出文件名
2303
2303
  const sheetName = 'local';
2304
- const baseName = opts.name || path.parse(inputPath).name;
2304
+ const baseName = safeFilename(opts.name || path.parse(inputPath).name);
2305
2305
  const stem = baseName;
2306
2306
 
2307
2307
  // 检查转码输出文件是否已有冲突