npm - video-pipeline - Versions diffs - 1.2.4 → 1.2.6 - Mend

video-pipeline 1.2.4 → 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/.env.example CHANGED Viewed

@@ -170,4 +170,4 @@ AI_TIMEOUT=300
 AI_TEMPERATURE=0.3
 # 【关联】提示词模板，{content} 占位符会被识别文本替换
 # 【自由】提示词内容可随意修改，但必须保留 {content} 占位符
-AI_PROMPT_TPL=帮我归纳总结一下Keywords，尽可能全一点，这是内容：{content}
+AI_PROMPT_TPL=帮我归纳总结一下提供内容的关键词，尽可能全面，无遗漏，无重复，无幻想，关键词之间用英文逗号分隔开。如果内容为英文，则关键词全部是英文，如果内容是中文，则关键词以中文为主，可以附带一些英文关键词。这是内容：{content}

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,38 @@
 # Changelog
+## [1.2.6] - 2026-06-11
+### Features
+- 报告按 sheet/站点分目录存储 (`6610c57`)
+- 统一三种来源报告格式 + 修复多处 bug (`2a6f606`)
+### Bug Fixes
+- groupBySheetMap 返回 Map 而非普通对象，修复 for...of 不可迭代错误 (`dfae532`)
+### Documentation
+- update (`6ddc48b`)
+- 修正 --input 模式的 {sheet} 表述为固定 local (`be61e29`)
+- 输出结构速查表 — 三来源×四环节对照 (`248168c`)
+## [1.2.5] - 2026-06-11
+### Features
+- 新增 --offset / --limit 参数，支持跳过和限量处理 Excel 数据 (`d691822`)
+### Bug Fixes
+- base_dir 改用 cwd 而非脚本安装目录，修复全局安装后路径解析错误 (`0fe5b1e`)
+### Documentation
+- update (`45a8d45`)
 ## [1.2.4] - 2026-06-11
 ### Features

package/README.md CHANGED Viewed

@@ -158,8 +158,22 @@ WHISPER_LANGUAGE=zh          # 空=多语言自动检测（默认），需要指
 ├── transcoded/                   # ffmpeg 转码输出（wav 16kHz mono）
 │   ├── YouTube视频/
 │   └── 普诺赛中文站/
-├── reports/                      # 执行报告（JSON）
-│   └── report_YYYYMMDD_HHMMSS.json
+├── reports/                      # 执行报告（按 sheet/站点分目录）
+│   ├── YouTube视频/
+│   │   ├── report_YYYYMMDD_HHMMSS.json   # JSON 报告（机器可读，用于重跑）
+│   │   └── tasks/                        # 人类可读文本摘要
+│   │       ├── 2143.txt
+│   │       └── ...
+│   ├── 普诺赛中文站/
+│   │   ├── report_YYYYMMDD_HHMMSS.json
+│   │   └── tasks/
+│   │       └── ...
+│   ├── youtube/                  # --url 模式按平台名分目录
+│   │   ├── report_YYYYMMDD_HHMMSS.json
+│   │   └── tasks/
+│   └── local/                    # --input 模式默认目录
+│       ├── report_YYYYMMDD_HHMMSS.json
+│       └── tasks/
 ├── scripts/                      # 辅助脚本
 │   ├── release.js                 # 版本发布脚本
 │   └── regenerate-changelog.js  # CHANGELOG 重建脚本
@@ -244,17 +258,21 @@ node process_videos.js --sheet "YouTube视频" --concurrency 2 --retry 3
 # 先干跑预览
 node process_videos.js --dry-run
+# Excel 数据量大时，偏移+限量调试
+node process_videos.js --offset 10 --limit 5 --dry-run  # 跳过前10条，预览5条
+node process_videos.js --limit 3 --concurrency 1        # 只处理前3条
 ```
 ### 重跑失败
 ```bash
-# 第一次跑完后生成 reports/report_xxx.json
+# 第一次跑完后生成 reports/{sheet名称}/report_xxx.json
 # 查看失败项：
-node process_videos.js --retry-failed reports/report_20260610_143000.json --dry-run
+node process_videos.js --retry-failed reports/YouTube视频/report_20260610_143000.json --dry-run
 # 重跑：
-node process_videos.js --retry-failed reports/report_20260610_143000.json --concurrency 2 --retry 3
+node process_videos.js --retry-failed reports/YouTube视频/report_20260610_143000.json --concurrency 2 --retry 3
 ```
 ### 超时控制（防止任务卡死）
@@ -344,6 +362,8 @@ node process_videos.js --input "downloads/产品介绍.mp4" --step analyze
 |---|---|---|---|
 | `--sheet <name>` | str | 全部 | 指定 sheet 名称 |
 | `--id <id>` | str | — | 指定 extra.id 或 title（单条测试） |
+| `--offset <n>` | int | 0 | 跳过前 N 条任务（从 0 开始），适合调试大量数据 |
+| `--limit <n>` | int | 0 | 最多处理 N 条任务，0 表示无限制 |
 | `--step <step>` | str | 全跑 | 只执行某步：`download` / `transcode` / `transcribe` / `analyze` |
 | `--force` | flag | off | 强制重做下载+转码，忽略已有文件 |
 | `--concurrency <n>` | int | 1 | 并发数，建议 2~3 |
@@ -354,7 +374,7 @@ node process_videos.js --input "downloads/产品介绍.mp4" --step analyze
 | `--transcribe-timeout <n>` | int | 600 | 单个识别任务最长执行时间（秒） |
 | `--analyze-timeout <n>` | int | 300 | 单个 AI 分析任务最长执行时间（秒） |
 | `--dry-run` | flag | off | 干跑模式，只列任务不执行 |
-| `--retry-failed <path>` | path | — | 从报告 JSON 重跑失败项 |
+| `--retry-failed <path>` | path | — | 从报告 JSON 重跑失败项（如 `reports/YouTube视频/report_xxx.json`） |
 | `--init` | flag | off | 复制 .env.example 到当前目录并重命名为 .env |
 | `--file <path>` | path | — | 指定 Excel 文件路径（优先级高于 EXCEL_FILE 环境变量） |
 | `--input <path>` | path | — | 指定本地视频文件路径（跳过下载，直接转码→识别→分析） |
@@ -504,9 +524,64 @@ node process_videos.js --sheet "YouTube视频" --step analyze --concurrency 2
 ---
-## 报告格式
+## 输出结构速查表
+三种输入来源在不同处理环节的输出路径汇总如下。所有路径均以 `output/` 为根（可通过 `DOWNLOADS_DIR` / `TRANSCODED_DIR` / `REPORTS_DIR` 环境变量覆盖）。
+> `{sheet}` = Excel 工作表名（如 `YouTube视频`、`普诺赛中文站`）
+> `{platform}` = 视频平台标识（如 `youtube`、`bilibili`、`tencentVid`、`youku`）
+> `{stem}` = 去重后的安全文件名（不含扩展名）
+### ① Excel 批量模式（默认）
+| 环节 | 输出路径 | 产物格式 | 说明 |
+|------|---------|---------|------|
+| 下载 | `output/downloads/{sheet}/{stem}.mp4` | 视频 | yt-dlp 下载原始视频 |
+| 转码 | `output/transcoded/{sheet}/{stem}.wav` | 音频 | ffmpeg 转 16kHz mono WAV |
+| JSON 报告 | `output/reports/{sheet}/report_YYYYMMDD_HHMMSS.json` | JSON | 机器可读，含 summary + failed_items，可供 --retry-failed 重跑 |
+| 文本报告 | `output/reports/{sheet}/tasks/{stem}.txt` | 文本 | 人类可读，含语音识别原文 + AI 关键词分析 |
+> 多 sheet 同时执行时，每个 sheet 独立一个子目录，互不干扰。
+### ② --url 直链模式
+| 环节 | 输出路径 | 产物格式 | 说明 |
+|------|---------|---------|------|
+| 下载 | `output/downloads/{platform}/{name}.mp4` | 视频 | yt-dlp 下载单个视频 |
+| 转码 | `output/transcoded/{platform}/{name}.wav` | 音频 | ffmpeg 转 16kHz mono WAV |
+| JSON 报告 | `output/reports/{platform}/report_YYYYMMDD_HHMMSS.json` | JSON | 格式与 Excel 模式一致 |
+| 文本报告 | `output/reports/{platform}/tasks/{name}.txt` | 文本 | 含识别原文 + AI 分析 |
+> `{platform}` 由脚本自动从 URL 解析，如 `https://www.youtube.com/watch?v=xxx` → `youtube`。
+### ③ --input 本地文件模式
+| 环节 | 输出路径 | 产物格式 | 说明 |
+|------|---------|---------|------|
+| 下载 | —（跳过） | — | 本地文件无需下载 |
+| 转码 | `output/transcoded/local/{stem}.wav` | 音频 | ffmpeg 转 16kHz mono WAV |
+| JSON 报告 | `output/reports/local/report_YYYYMMDD_HHMMSS.json` | JSON | 格式与 Excel 模式一致 |
+| 文本报告 | `output/reports/local/tasks/{stem}.txt` | 文本 | 含识别原文 + AI 分析 |
+> `local` 是 `--input` 模式的固定目录名（与 Excel 模式的 sheet 名无关），所有本地文件处理结果统一归入此目录。
+---
+### 三种来源对比一览
+| 维度 | Excel 批量 | --url 直链 | --input 本地文件 |
+|------|-----------|-----------|-----------------|
+| 输入 | Excel 行（多视频批量） | 单个视频 URL | 本地视频/音频文件 |
+| 下载目录 | `downloads/{sheet}/` | `downloads/{platform}/` | 无 |
+| 转码目录 | `transcoded/{sheet}/` | `transcoded/{platform}/` | `transcoded/local/` |
+| 报告目录 | `reports/{sheet}/` | `reports/{platform}/` | `reports/local/` |
+| 分组依据 | Excel sheet 名 | URL 解析的平台名 | 固定 `local` |
+| 并发支持 | ✅ 多线程 | ❌ 单任务 | ❌ 单任务 |
+| 支持 --retry-failed | ✅ | ❌ | ❌ |
-执行后在 `reports/` 生成 `report_YYYYMMDD_HHMMSS.json`：
+---
+### JSON 报告结构
 ```json
 {
@@ -532,6 +607,8 @@ node process_videos.js --sheet "YouTube视频" --step analyze --concurrency 2
 }
 ```
+### 状态含义
 - **success**：下载 + 转码 + 识别全部成功（AI 分析失败不影响此状态）
 - **partial**：下载 + 转码成功，识别或 AI 分析失败
 - **failed**：下载或转码失败
@@ -554,7 +631,7 @@ node process_videos.js --sheet "YouTube视频" --id 2143 --retry 2
 node process_videos.js --concurrency 3 --retry 3
 # 4. 查看报告，重跑失败项
-node process_videos.js --retry-failed reports/report_xxx.json --concurrency 2 --retry 3
+node process_videos.js --retry-failed reports/YouTube视频/report_xxx.json --concurrency 2 --retry 3
 ```
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "video-pipeline",
-  "version": "1.2.4",
+  "version": "1.2.6",
   "description": "视频下载、转码、文本识别、AI 关键词分析一体化流程 CLI 工具",
   "keywords": [
     "video",

package/process_videos.js CHANGED Viewed

@@ -8,6 +8,7 @@
  *   node process_videos.js --sheet "普诺赛中文站" --id 427
  *   node process_videos.js --step download
  *   node process_videos.js --dry-run
+ *   node process_videos.js --offset 10 --limit 5   # 跳过前10条，只处理5条
  */
 // ============================== 依赖 ==============================
@@ -34,7 +35,7 @@ dotenv.config({ path: _dotenvPath });
 // ============================== 路径配置 ==============================
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
-const BASE_DIR = path.resolve(__dirname);
+const BASE_DIR = process.cwd();
 function envPath(key, defaultValue) {
   const val = process.env[key] || defaultValue;
@@ -1156,11 +1157,11 @@ function writeAllContentsToExcel(results, keywordsDict = null) {
 }
 function groupBySheetMap(updates) {
-  const result = {};
+  const result = new Map();
   for (const [compositeKey, text] of updates) {
     const [sheetName, key] = compositeKey.split('|');
-    if (!result[sheetName]) result[sheetName] = {};
-    result[sheetName][key] = text;
+    if (!result.has(sheetName)) result.set(sheetName, {});
+    result.get(sheetName)[key] = text;
   }
   return result;
 }
@@ -1177,10 +1178,31 @@ function computeSummary(results) {
   return { total: results.length, success, partial, failed, no_video: noVideo };
 }
-function generateReport(results, config) {
-  fs.mkdirSync(REPORTS_DIR, { recursive: true });
+/**
+ * 生成执行报告 JSON 文件。
+ * - 提供 sheetName 时：报告存入 REPORTS_DIR/{sheetName}/report_{ts}.json
+ * - 不提供时：按 r.sheet 分组，每 sheet 调用自身，返回路径数组
+ */
+function generateReport(results, config, sheetName) {
+  if (!sheetName) {
+    // ── 按 sheet 分组生成 ──
+    const sheetGroups = new Map();
+    for (const r of results) {
+      if (!sheetGroups.has(r.sheet)) sheetGroups.set(r.sheet, []);
+      sheetGroups.get(r.sheet).push(r);
+    }
+    const paths = [];
+    for (const [sheet, items] of sheetGroups) {
+      paths.push(generateReport(items, config, sheet));
+    }
+    return paths;
+  }
+  // ── 单 sheet 报告 ──
+  const dir = path.join(REPORTS_DIR, sheetName);
+  fs.mkdirSync(dir, { recursive: true });
   const ts = new Date().toISOString().replace(/[-:T]/g, '').slice(0, 15).replace(/(\d{8})(\d{6})/, '$1_$2');
-  const reportFile = path.join(REPORTS_DIR, `report_${ts}.json`);
+  const reportFile = path.join(dir, `report_${ts}.json`);
   const summary = computeSummary(results);
@@ -1527,62 +1549,106 @@ async function runInputTask(opts) {
   console.log(c('dim', '\n── 开始执行 ──\n'));
+  // ── 解决 stem 重名 ──
+  let usedStem = stem;
+  {
+    let counter = 1;
+    const tcDir = path.join(TRANSCODED_DIR, sheetName);
+    fs.mkdirSync(tcDir, { recursive: true });
+    let testPath = path.join(tcDir, usedStem + TRANSCODE_EXT);
+    while (fs.existsSync(testPath) && !steps.includes('transcode')) {
+      // 跳过转码但转码产物已存在 → 直接用
+      break;
+    }
+    if (steps.includes('transcode') && !force) {
+      while (fs.existsSync(testPath)) {
+        usedStem = `${stem}_${counter}`;
+        testPath = path.join(tcDir, usedStem + TRANSCODE_EXT);
+        counter++;
+      }
+    }
+  }
+  if (usedStem !== stem) {
+    console.log(`  ⚠️  stem "${stem}" 已存在 → 使用 "${usedStem}"`);
+  }
+  // ── 构建 TaskResult ──
+  const result = new TaskResult(sheetName, usedStem, path.basename(inputPath), 'local', null, usedStem);
+  result.download = new StepResult('skipped');
   // ── download: 跳过（本地文件）──
-  console.log(`  [${stem}] 📥 下载: ${c('yellow', '已跳过 (本地文件)')}`);
+  console.log(`  [${usedStem}] 📥 下载: ${c('yellow', '已跳过 (本地文件)')}`);
   // ── transcode ──
   let tcFile = null;
   if (steps.includes('transcode')) {
-    console.log(`  [${stem}] 🎵 开始转码...`);
+    console.log(`  [${usedStem}] 🎵 开始转码...`);
     try {
       const { file, error } = await stepTranscode(inputPath, sheetName, maxRetries, retryDelay, force, transcodeTimeout);
       tcFile = file;
       if (file && fs.existsSync(file)) {
         const size = (fs.statSync(file).size / 1024 / 1024).toFixed(1);
-        console.log(`  [${stem}] 🎵 转码完成: ${file} (${size} MB)`);
+        console.log(`  [${usedStem}] 🎵 转码完成: ${file} (${size} MB)`);
+        result.transcode = new StepResult('success', file);
       } else {
-        console.log(`  [${stem}] 🎵 转码: ${c(file ? 'yellow' : 'red', file ? '已跳过 (文件已存在)' : '失败 — ' + (error || ''))}`);
+        console.log(`  [${usedStem}] 🎵 转码: ${c(file ? 'yellow' : 'red', file ? '已跳过 (文件已存在)' : '失败 — ' + (error || ''))}`);
+        result.transcode = new StepResult(file ? 'skipped' : 'failed', file, error);
       }
     } catch (e) {
-      console.log(`  [${stem}] 🎵 转码: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+      console.log(`  [${usedStem}] 🎵 转码: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+      result.transcode = new StepResult('failed', null, String(e.message).slice(0, 500));
     }
     if (!tcFile) {
       console.log(c('yellow', '\n⚠️  转码未产出文件，后续步骤将跳过\n'));
+      result.overall_status = 'failed';
+      result.error = 'transcode failed';
+      return result;
     }
   } else if (steps.includes('transcribe')) {
-    // 无 transcode 步骤但有 transcribe：优先使用已有转码文件
     const tcDir = path.join(TRANSCODED_DIR, sheetName);
-    const expectedTc = path.join(tcDir, stem + TRANSCODE_EXT);
+    const expectedTc = path.join(tcDir, usedStem + TRANSCODE_EXT);
     if (fs.existsSync(expectedTc)) {
       tcFile = expectedTc;
-      console.log(`  [${stem}] 🎵 转码: ${c('yellow', '使用已有文件 ' + path.basename(expectedTc))}`);
+      result.transcode = new StepResult('success', tcFile);
+      console.log(`  [${usedStem}] 🎵 转码: ${c('yellow', '使用已有文件 ' + path.basename(expectedTc))}`);
     } else {
-      console.log(`  [${stem}] 🎵 转码: ${c('red', '未找到转码文件，将尝试用原始文件识别（可能失败）')}`);
+      console.log(`  [${usedStem}] 🎵 转码: ${c('red', '未找到转码文件，将尝试用原始文件识别（可能失败）')}`);
       tcFile = inputPath;
+      result.transcode = new StepResult('warning', inputPath, 'transcode file not found, using raw input');
     }
   } else {
     tcFile = inputPath;
+    result.transcode = new StepResult('success', inputPath);
   }
   // ── transcribe ──
   let transcribeText = '';
   if (steps.includes('transcribe') && tcFile) {
     if (!whisperAvailable) {
-      console.log(`  [${stem}] 📝 识别: ${c('red', 'whisper 不可用')}`);
+      console.log(`  [${usedStem}] 📝 识别: ${c('red', 'whisper 不可用')}`);
+      result.transcribe = new StepResult('failed', null, 'whisper unreachable');
+      result.overall_status = 'failed';
+      result.error = 'whisper unreachable';
+      return result;
     } else {
-      console.log(`  [${stem}] 📝 开始语音识别...`);
+      console.log(`  [${usedStem}] 📝 开始语音识别...`);
       try {
         const { text, error } = await stepTranscribe(tcFile, maxRetries, retryDelay, transcribeTimeout);
         if (text && typeof text === 'string') {
           transcribeText = text;
-          console.log(`  [${stem}] 📝 识别完成: ${text.length} 字符`);
+          console.log(`  [${usedStem}] 📝 识别完成: ${text.length} 字符`);
+          result.transcribe = new StepResult('success', text);
         } else {
-          console.log(`  [${stem}] 📝 识别: ${c('red', '失败 — ' + (error || ''))}`);
+          console.log(`  [${usedStem}] 📝 识别: ${c('red', '失败 — ' + (error || ''))}`);
+          result.transcribe = new StepResult('failed', null, error);
         }
       } catch (e) {
-        console.log(`  [${stem}] 📝 识别: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+        console.log(`  [${usedStem}] 📝 识别: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+        result.transcribe = new StepResult('failed', null, String(e.message).slice(0, 500));
       }
     }
+  } else {
+    result.transcribe = new StepResult('skipped');
   }
   // ── AI analyze ──
@@ -1590,28 +1656,45 @@ async function runInputTask(opts) {
   if (steps.includes('analyze') && transcribeText) {
     const aiEnabled = (process.env.AI_ENABLED || 'true').toLowerCase() === 'true';
     if (aiEnabled) {
-      console.log(`  [${stem}] 🤖 开始 AI 分析...`);
+      console.log(`  [${usedStem}] 🤖 开始 AI 分析...`);
       try {
         const { text: kw, error } = await stepAnalyze(transcribeText, maxRetries, retryDelay, analyzeTimeout);
         if (kw && typeof kw === 'string') {
           analyzeText = kw;
-          console.log(`  [${stem}] 🤖 AI分析完成: ${kw.length} 字符`);
+          console.log(`  [${usedStem}] 🤖 AI分析完成: ${kw.length} 字符`);
+          result.analyze = new StepResult('success', kw);
         } else {
-          console.log(`  [${stem}] 🤖 AI分析: ${c('red', '失败 — ' + (error || ''))}`);
+          console.log(`  [${usedStem}] 🤖 AI分析: ${c('red', '失败 — ' + (error || ''))}`);
+          result.analyze = new StepResult('failed', null, error);
         }
       } catch (e) {
-        console.log(`  [${stem}] 🤖 AI分析: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+        console.log(`  [${usedStem}] 🤖 AI分析: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+        result.analyze = new StepResult('failed', null, String(e.message).slice(0, 500));
       }
     } else {
-      console.log(`  [${stem}] 🤖 AI分析: ${c('yellow', '已禁用 (AI_ENABLED=false)')}`);
+      console.log(`  [${usedStem}] 🤖 AI分析: ${c('yellow', '已禁用 (AI_ENABLED=false)')}`);
+      result.analyze = new StepResult('skipped');
     }
+  } else {
+    result.analyze = new StepResult('skipped');
+  }
+  // ── 判定整体状态 ──
+  if (result.transcode.status === 'failed') {
+    result.overall_status = 'failed';
+  } else if (result.transcribe.status === 'failed' && steps.includes('transcribe')) {
+    result.overall_status = 'partial';
+  } else if (result.analyze.status === 'failed') {
+    result.overall_status = 'partial';
+  } else {
+    result.overall_status = 'success';
   }
   // ── 保存文本结果 ──
   if (transcribeText || analyzeText) {
-    const outDir = path.join(REPORTS_DIR, 'input-tasks');
+    const outDir = path.join(REPORTS_DIR, sheetName, 'tasks');
     fs.mkdirSync(outDir, { recursive: true });
-    const outFile = path.join(outDir, `${stem}.txt`);
+    const outFile = path.join(outDir, `${usedStem}.txt`);
     const lines = [
       `文件: ${inputPath}`,
       `平台: local`,
@@ -1642,6 +1725,8 @@ async function runInputTask(opts) {
     console.log(c('yellow', `⚠️  ${failed.length} 个步骤未成功: ${failed.join(', ')}`));
   }
   console.log('');
+  return result;
 }
@@ -1737,7 +1822,7 @@ async function runUrlTask(opts) {
   const analyzeText = (result.analyze && typeof result.analyze.file === 'string') ? result.analyze.file : '';
   if (transcribeText || analyzeText) {
-    const outDir = path.join(REPORTS_DIR, 'url-tasks');
+    const outDir = path.join(REPORTS_DIR, platform, 'tasks');
     fs.mkdirSync(outDir, { recursive: true });
     const outFile = path.join(outDir, `${stem}.txt`);
     const lines = [
@@ -1757,12 +1842,14 @@ async function runUrlTask(opts) {
   }
   console.log(c('bold', c('green', `\n\uD83C\uDF89 \u5168\u90E8\u5B8C\u6210! (${successes.length}/${steps.length} \u6B65\u6210\u529F)\n`)));
+  return result;
 }
 async function run({
   targetSheet, targetId, steps, maxRetries, retryDelay,
   concurrency, force, dryRun, retryFailed,
   downloadTimeout, transcodeTimeout, transcribeTimeout, analyzeTimeout,
+  offset = 0, rowLimit = 0,
 }) {
   // ── 重跑失败模式 ──
   if (retryFailed) {
@@ -1772,7 +1859,7 @@ async function run({
   // ── 构建任务列表 ──
   const sheets = targetSheet ? [targetSheet] : VIDEO_SHEETS;
-  const tasks = [];
+  let tasks = [];
   for (const sheetName of sheets) {
     let rows = readExcelSheet(sheetName);
     if (targetId) {
@@ -1796,6 +1883,15 @@ async function run({
     }
   }
+  // ── 偏移/限量（全局，跨 sheet） ──
+  if (offset > 0 || rowLimit > 0) {
+    const start = offset;
+    const end = rowLimit > 0 ? start + rowLimit : undefined;
+    const originalLen = tasks.length;
+    tasks = tasks.slice(start, end);
+    logInfo(`applied offset=${start}, limit=${rowLimit || 'all'} → tasks: ${originalLen} → ${tasks.length}`);
+  }
   logInfo(`tasks: ${tasks.length}, concurrency: ${concurrency}, max retries: ${maxRetries}`);
   const envCheck = await checkEnvironmentAsync(steps);
@@ -1861,10 +1957,10 @@ async function run({
     sheets, target_id: targetId, steps, max_retries: maxRetries,
     retry_delay: retryDelay, concurrency, force,
   };
-  const reportPath = generateReport(results, config);
+  const reportPaths = generateReport(results, config);
   printReportSummary(results);
-  logInfo(`all done! report: ${reportPath}`);
+  logInfo(`all done! reports: ${Array.isArray(reportPaths) ? reportPaths.join(', ') : reportPaths}`);
 }
 function printDryRun(tasks, steps, env) {
@@ -2057,9 +2153,9 @@ async function runFromReport(reportPath, steps, maxRetries, retryDelay, concurre
   const config = { retry_from: reportPath, steps, max_retries: maxRetries,
     retry_delay: retryDelay, concurrency, force };
-  const reportFilePath = generateReport(results, config);
+  const reportPaths = generateReport(results, config);
   printReportSummary(results);
-  logInfo(`all done! report: ${reportFilePath}`);
+  logInfo(`all done! reports: ${Array.isArray(reportPaths) ? reportPaths.join(', ') : reportPaths}`);
 }
 // ============================== CLI ==============================
@@ -2069,6 +2165,8 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     .description('视频下载、转码、文本识别、AI分析一体化流程')
     .option('--sheet <name>', '指定 sheet 名称')
     .option('--id <id>', '指定 extra.id 或 title（单条测试）')
+    .option('--offset <n>', '跳过前 N 条任务（从 0 开始），默认 0', parseInt, 0)
+    .option('--limit <n>', '最多处理 N 条任务，默认无限制', parseInt, 0)
     .option('--step <step>', '指定执行步骤（可多次指定），如 --step transcode --step transcribe', (val, prev) => {
       const allowed = ['download', 'transcode', 'transcribe', 'analyze'];
       if (!allowed.includes(val)) {
@@ -2086,7 +2184,7 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     .option('--transcribe-timeout <n>', '识别超时（秒），默认 600', parseInt, 600)
     .option('--analyze-timeout <n>', 'AI 分析超时（秒），默认 300', parseInt, 300)
     .option('--dry-run', '干跑模式，只列任务不执行')
-    .option('--retry-failed <path>', '从报告 JSON 重跑失败项')
+    .option('--retry-failed <path>', '从报告 JSON 重跑失败项（output/reports/{sheet}/report_xxx.json）')
     .option('--init', '复制 .env.example 到当前目录并重命名为 .env')
     .option('--file <path>', '指定 Excel 文件路径（优先级高于 EXCEL_FILE 环境变量）')
     .option('--input <path>', '指定本地视频文件路径（跳过下载，直接转码→识别→分析）')
@@ -2216,7 +2314,7 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     }
     // 执行流水线
-    await runUrlTask({
+    const urlResult = await runUrlTask({
       watchUrl: parsed.watchUrl,
       platform: parsed.platform,
       pkey: parsed.pkey,
@@ -2234,6 +2332,13 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
       whisperAvailable,
     });
+    // 生成标准报告 JSON（与 Excel 模式格式一致）
+    if (urlResult) {
+      const config = { steps, max_retries: opts.retry, retry_delay: opts.retryDelay, concurrency: 1, force: opts.force || false };
+      generateReport([urlResult], config, parsed.platform);
+      printReportSummary([urlResult]);
+    }
     process.exit(0);
   }
@@ -2336,7 +2441,7 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     }
     // 执行流水线
-    await runInputTask({
+    const inputResult = await runInputTask({
       inputPath,
       stem,
       sheetName,
@@ -2351,6 +2456,13 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
       fileInfo,
     });
+    // 生成标准报告 JSON（与 Excel 模式格式一致）
+    if (inputResult) {
+      const config = { steps, max_retries: opts.retry, retry_delay: opts.retryDelay, concurrency: 1, force: opts.force || false };
+      generateReport([inputResult], config, sheetName);
+      printReportSummary([inputResult]);
+    }
     process.exit(0);
   }
@@ -2359,6 +2471,8 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     targetSheet: opts.sheet || null,
     targetId: opts.id || null,
     steps,
+    offset: opts.offset || 0,
+    rowLimit: opts.limit || 0,
     maxRetries: opts.retry,
     retryDelay: opts.retryDelay,
     concurrency: opts.concurrency,