npm - video-pipeline - Versions diffs - 1.0.4 → 1.1.0 - Mend

video-pipeline 1.0.4 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,20 +1,17 @@
 # Changelog
-## [1.0.4] - 2026-06-11
+## [1.1.0] - 2026-06-11
 ### Features
-- rename npm package to video-pipeline (`644cc8a`)
-- add --url option to Python version (parity with Node.js) (`e1df301`)
+- add --input option for local file processing (Node.js + Python) (`4805e3b`)
 ### Bug Fixes
-- recreate v1.0.3 tag and regenerate CHANGELOG with correct scopes (`9c2db90`)
-- normalize CHANGELOG format to Keep a Changelog standard (`2b887f4`)
-- correct changelog per-version ranges + fix getLastTag for Windows (`755222a`)
+- whisper transcribe FormData + multi --step + auto-find wav (`f8c9fa2`)
-## [Unreleased]
+## [1.0.4] - 2026-06-11
 ### Features
@@ -23,6 +20,7 @@
 ### Bug Fixes
+- recreate v1.0.3 tag and regenerate CHANGELOG with correct scopes (`9c2db90`)
 - normalize CHANGELOG format to Keep a Changelog standard (`2b887f4`)
 - correct changelog per-version ranges + fix getLastTag for Windows (`755222a`)

package/README.md CHANGED Viewed

@@ -1,6 +1,38 @@
-# 视频下载 / 转码 / 文本识别 / AI 分析 流程
+# 视频处理流水线 (Video Pipeline)
-基于 `process_videos.py`，一键完成：yt-dlp 下载 → ffmpeg 转码 → whisper 识别 → AI 关键词归纳 → 写回 Excel。
+基于 `process_videos.js` (Node.js) 或 `process_videos.py` (Python)，一键完成：yt-dlp 下载 → ffmpeg 转码 → whisper 识别 → AI 关键词归纳 → 写回 Excel。
+**两种使用方式：**
+- **Excel 批量处理**：从 Excel 文件读取视频 ID，自动完成全流程
+- **直链下载**：通过 `--url` 直接指定视频链接，自动识别平台并下载
+- **本地文件**：通过 `--input` 指定本地视频文件，跳过下载直接转码分析
+---
+## 安装方式
+### Node.js 版本（推荐）
+```bash
+# 全局安装
+npm install -g video-pipeline
+# 使用后可直接调用
+video-pipeline --help
+```
+### Python 版本
+```bash
+# 克隆或下载脚本
+git clone https://gitee.com/siriussupreme/yt-dlp_ffmpeg_whisper_memo-ai.git
+cd yt-dlp_ffmpeg_whisper_memo-ai
+# 安装 Python 依赖
+pip install pandas openpyxl requests python-dotenv questionary
+```
+---
 ## 环境依赖
@@ -110,21 +142,33 @@ WHISPER_LANGUAGE=zh          # 空=多语言自动检测（默认），需要指
 ## 目录结构
 ```
-├── process_videos.py              # 主流程脚本
-├── .env.example                   # 环境变量模板（可提交 Git）
-├── .env                           # 实际环境变量（已 gitignore，按需修改）
-├── export_2026-06-10_split.xlsx   # 数据源（YouTube视频 / 普诺赛中文站 两个 sheet）
-├── cookies/
-│   ├── bilibili.txt               # B站 cookie（Netscape 格式）
-│   └── youtube.txt                # YouTube cookie 备用（Firefox 直读方案不需要）
-├── downloads/                     # yt-dlp 下载输出（mp4）
+├── process_videos.js              # Node.js 主流程脚本（推荐）
+├── process_videos.py              # Python 主流程脚本（备选）
+├── package.json                   # Node.js 项目配置（npm 包）
+├── .env.example                  # 环境变量模板（可提交 Git）
+├── .env                          # 实际环境变量（已 gitignore，按需修改）
+├── data/                         # 数据源目录
+│   └── export_2026-06-10_split.xlsx   # Excel 数据源
+├── cookies/                     # 站点 cookie 文件
+│   ├── bilibili.txt            # B站 cookie（Netscape 格式）
+│   └── youtube.txt             # YouTube cookie 备用（Firefox 直读方案不需要）
+├── downloads/                    # yt-dlp 下载输出（mp4）
 │   ├── YouTube视频/
 │   └── 普诺赛中文站/
-├── transcoded/                    # ffmpeg 转码输出（wav 16kHz mono）
+├── transcoded/                   # ffmpeg 转码输出（wav 16kHz mono）
 │   ├── YouTube视频/
 │   └── 普诺赛中文站/
-└── reports/                       # 执行报告（JSON）
-    └── report_YYYYMMDD_HHMMSS.json
+├── reports/                      # 执行报告（JSON）
+│   └── report_YYYYMMDD_HHMMSS.json
+├── scripts/                      # 辅助脚本
+│   ├── release.js                 # 版本发布脚本
+│   └── regenerate-changelog.js  # CHANGELOG 重建脚本
+├── .github/                      # GitHub Actions 工作流
+├── .husky/                      # Git hooks（commit 消息检查）
+├── node_modules/                 # Node.js 依赖（已 gitignore）
+├── CHANGELOG.md                  # 版本变更记录
+├── README.md                     # 使用文档
+└── LICENSE                       # MIT 许可证
 ```
 ---
@@ -167,35 +211,39 @@ yt-dlp 可直接从 Firefox 浏览器读取 cookie，无需手动导出：
 ```bash
 # 下载 + 转码 + 识别 + AI分析，指定 sheet + extra.id
+node process_videos.js --sheet "YouTube视频" --id 2143
+# 或 Python 版本
 python process_videos.py --sheet "YouTube视频" --id 2143
 # 只跑下载
+node process_videos.js --sheet "普诺赛中文站" --id 16 --step download
+# 或 Python 版本
 python process_videos.py --sheet "普诺赛中文站" --id 16 --step download
 # 只跑转码（需要已有下载文件）
-python process_videos.py --sheet "普诺赛中文站" --id 16 --step transcode
+node process_videos.js --sheet "普诺赛中文站" --id 16 --step transcode
 # 只跑识别（需要已有转码文件）
-python process_videos.py --sheet "普诺赛中文站" --id 16 --step transcribe
+node process_videos.js --sheet "普诺赛中文站" --id 16 --step transcribe
 # 只跑 AI 分析（需要已有识别文本）
-python process_videos.py --sheet "普诺赛中文站" --id 16 --step analyze
+node process_videos.js --sheet "普诺赛中文站" --id 16 --step analyze
 # 强制重新下载（忽略已有文件）
-python process_videos.py --sheet "YouTube视频" --id 2143 --force
+node process_videos.js --sheet "YouTube视频" --id 2143 --force
 ```
 ### 批量全量
 ```bash
 # 全量执行（2 个并发，失败重试 3 次）
-python process_videos.py --concurrency 2 --retry 3
+node process_videos.js --concurrency 2 --retry 3
 # 只跑某一 sheet
-python process_videos.py --sheet "YouTube视频" --concurrency 2 --retry 3
+node process_videos.js --sheet "YouTube视频" --concurrency 2 --retry 3
 # 先干跑预览
-python process_videos.py --dry-run
+node process_videos.js --dry-run
 ```
 ### 重跑失败
@@ -203,10 +251,10 @@ python process_videos.py --dry-run
 ```bash
 # 第一次跑完后生成 reports/report_xxx.json
 # 查看失败项：
-python process_videos.py --retry-failed reports/report_20260610_143000.json --dry-run
+node process_videos.js --retry-failed reports/report_20260610_143000.json --dry-run
 # 重跑：
-python process_videos.py --retry-failed reports/report_20260610_143000.json --concurrency 2 --retry 3
+node process_videos.js --retry-failed reports/report_20260610_143000.json --concurrency 2 --retry 3
 ```
 ### 超时控制（防止任务卡死）
@@ -215,7 +263,7 @@ python process_videos.py --retry-failed reports/report_20260610_143000.json --co
 ```bash
 # 自定义超时（单位秒）
-python process_videos.py \
+node process_videos.js \
     --download-timeout 900 \    # 下载 15 分钟
     --transcode-timeout 600 \   # 转码 10 分钟
     --transcribe-timeout 1200 \ # 识别 20 分钟
@@ -228,6 +276,51 @@ python process_videos.py \
 - 无论超时多少次，**不会阻塞其他并发任务**，失败项会记录到报告
 - 超时失败的任务可用 `--retry-failed` 单独重跑
+### 直接指定 URL 下载
+```bash
+# 直接指定视频链接，自动识别平台（支持标准链接、短链接、内嵌链接）
+node process_videos.js --url "https://www.youtube.com/watch?v=zzJmKPX8a3c"
+python process_videos.py --url "https://www.bilibili.com/video/BV1xx411c7mD"
+# 指定输出文件名（不含扩展名）
+node process_videos.js --url "https://youtu.be/zzJmKPX8a3c" --name "产品介绍"
+# 只执行部分步骤
+node process_videos.js --url "https://www.youtube.com/watch?v=zzJmKPX8a3c" --step transcode
+```
+**支持的 URL 格式：**
+- YouTube: 标准页、短链接、Shorts、内嵌页、直播
+- B站: 标准页（BV/av号）、短链接、内嵌页、移动端
+- 腾讯视频: 标准页、内嵌页、移动端
+- 优酷: 标准页
+**文件命名规则：**
+- 默认：`{平台}_{视频ID}`（如 `youtube_zzJmKPX8a3c`）
+- 自定义：通过 `--name` 指定（如 `--name "产品介绍"`）
+- 冲突处理：自动提示选择（覆盖 / 跳过 / 自定义名称）
+### 处理本地文件
+```bash
+# 指定本地视频文件，跳过下载，直接转码→识别→分析
+node process_videos.js --input "downloads/产品介绍.mp4"
+python process_videos.py --input "downloads/产品介绍.mp4"
+# 指定输出文件名
+node process_videos.js --input "downloads/产品介绍.mp4" --name "产品介绍_分析"
+# 只执行部分步骤
+node process_videos.js --input "downloads/产品介绍.mp4" --step analyze
+```
+**文件校验：**
+- 检查文件是否存在
+- 检查文件格式是否支持（视频/音频）
+- 检查是否可以正常读取
+- 校验失败会提示错误并退出
 ### 工具预检（执行前自动检测）
 每次执行任务前，脚本会自动检测本次涉及步骤所需的工具/服务是否可用：
@@ -249,19 +342,25 @@ python process_videos.py \
 | 参数 | 类型 | 默认值 | 说明 |
 |---|---|---|---|
-| `--sheet` | str | 全部 | 指定 sheet：`YouTube视频` 或 `普诺赛中文站` |
-| `--id` | str | — | 指定 extra.id 或 title（单条测试） |
-| `--step` | str | 全跑 | 只执行某步：`download` / `transcode` / `transcribe` |
+| `--sheet <name>` | str | 全部 | 指定 sheet 名称 |
+| `--id <id>` | str | — | 指定 extra.id 或 title（单条测试） |
+| `--step <step>` | str | 全跑 | 只执行某步：`download` / `transcode` / `transcribe` / `analyze` |
 | `--force` | flag | off | 强制重做下载+转码，忽略已有文件 |
-| `--concurrency` | int | 1 | 并发数，建议 2~3 |
-| `--retry` | int | 0 | 每步失败最大重试次数 |
-| `--retry-delay` | float | 5 | 重试间隔基数（秒），指数退避 5→10→20 |
-| `--download-timeout` | int | 600 | 单个下载任务最长执行时间（秒） |
-| `--transcode-timeout` | int | 600 | 单个转码任务最长执行时间（秒） |
-| `--transcribe-timeout` | int | 600 | 单个识别任务最长执行时间（秒） |
-| `--analyze-timeout` | int | 300 | 单个 AI 分析任务最长执行时间（秒） |
+| `--concurrency <n>` | int | 1 | 并发数，建议 2~3 |
+| `--retry <n>` | int | 0 | 每步失败最大重试次数 |
+| `--retry-delay <n>` | float | 5 | 重试间隔基数（秒），指数退避 5→10→20 |
+| `--download-timeout <n>` | int | 600 | 单个下载任务最长执行时间（秒） |
+| `--transcode-timeout <n>` | int | 600 | 单个转码任务最长执行时间（秒） |
+| `--transcribe-timeout <n>` | int | 600 | 单个识别任务最长执行时间（秒） |
+| `--analyze-timeout <n>` | int | 300 | 单个 AI 分析任务最长执行时间（秒） |
 | `--dry-run` | flag | off | 干跑模式，只列任务不执行 |
-| `--retry-failed` | path | — | 从报告 JSON 重跑失败项 |
+| `--retry-failed <path>` | path | — | 从报告 JSON 重跑失败项 |
+| `--init` | flag | off | 复制 .env.example 到当前目录并重命名为 .env |
+| `--file <path>` | path | — | 指定 Excel 文件路径（优先级高于 EXCEL_FILE 环境变量） |
+| `--input <path>` | path | — | 指定本地视频文件路径（跳过下载，直接转码→识别→分析） |
+| `--url <url>` | str | — | 直接指定视频下载链接（跳过 Excel），支持标准链接和内嵌链接 |
+| `--name <name>` | str | — | 指定输出文件名，不含扩展名（与 --url / --input 配合使用） |
+| `--env-file <path>` | path | .env | 指定要加载的 .env 文件路径 |
 ---
@@ -340,11 +439,13 @@ AI_TIMEOUT=300
 ```bash
 # 已有识别文本，只跑 AI 分析
+node process_videos.js --sheet "普诺赛中文站" --id 427 --step analyze
+# 或 Python 版本
 python process_videos.py --sheet "普诺赛中文站" --id 427 --step analyze
 # 单独跑 analyze 超过 16 条不会写入 Excel
 # 要想写入 Excel 跑完整流程 --step analyze
-python process_videos.py --sheet "YouTube视频" --step analyze --concurrency 2
+node process_videos.js --sheet "YouTube视频" --step analyze --concurrency 2
 ```
 ### 禁用 AI 分析
@@ -442,16 +543,18 @@ python process_videos.py --sheet "YouTube视频" --step analyze --concurrency 2
 ```bash
 # 1. 干跑预览
+node process_videos.js --dry-run
+# 或 Python 版本
 python process_videos.py --dry-run
 # 2. 单条验证
-python process_videos.py --sheet "YouTube视频" --id 2143 --retry 2
+node process_videos.js --sheet "YouTube视频" --id 2143 --retry 2
 # 3. 全量执行
-python process_videos.py --concurrency 3 --retry 3
+node process_videos.js --concurrency 3 --retry 3
 # 4. 查看报告，重跑失败项
-python process_videos.py --retry-failed reports/report_xxx.json --concurrency 2 --retry 3
+node process_videos.js --retry-failed reports/report_xxx.json --concurrency 2 --retry 3
 ```
 ---
@@ -568,22 +671,58 @@ python process_videos.py --retry-failed reports/report_xxx.json --concurrency 2
 ## 换电脑使用
-1. 安装上述所有必装工具，确保 `yt-dlp`、`ffmpeg`、`ffprobe`、`node` 均在 PATH
-2. `pip install pandas openpyxl requests python-dotenv`
-3. `cp .env.example .env`，根据实际情况修改 `.env` 中的路径、代理端口和字段映射
-4. 用 Firefox 登录 YouTube，设置 `YOUTUBE_COOKIES_FROM_BROWSER=firefox`
-5. B站 cookie 仍需手动导出 `cookies/bilibili.txt`
-6. 启动代理（Clash Verge 等），确认端口匹配 `YOUTUBE_PROXY`
-7. `python process_videos.py --dry-run` 验证
+### Node.js 版本
+1. 安装 Node.js (18+)：[nodejs.org](https://nodejs.org/)
+2. 安装视频处理工具：
+   ```bash
+   npm install -g video-pipeline
+   ```
+3. 克隆或下载项目文件（`.env.example`、`.env`、`cookies/` 等）
+4. 安装必装工具：`yt-dlp`、`ffmpeg`、`ffprobe`，确保均在 PATH
+5. 用 Firefox 登录 YouTube，设置 `YOUTUBE_COOKIES_FROM_BROWSER=firefox`
+6. B站 cookie 仍需手动导出 `cookies/bilibili.txt`
+7. 启动代理（Clash Verge 等），确认端口匹配 `YOUTUBE_PROXY`
+8. `video-pipeline --dry-run` 验证
+### Python 版本
+1. 安装 Python 3.9+：[python.org](https://www.python.org/)
+2. 安装必装工具：`yt-dlp`、`ffmpeg`、`ffprobe`，确保均在 PATH
+3. 安装 Python 依赖：`pip install pandas openpyxl requests python-dotenv questionary`
+4. `cp .env.example .env`，根据实际情况修改 `.env` 中的路径、代理端口和字段映射
+5. 用 Firefox 登录 YouTube，设置 `YOUTUBE_COOKIES_FROM_BROWSER=firefox`
+6. B站 cookie 仍需手动导出 `cookies/bilibili.txt`
+7. 启动代理（Clash Verge 等），确认端口匹配 `YOUTUBE_PROXY`
+8. `python process_videos.py --dry-run` 验证
+---
 ## 适配其他 Excel
 如果需要用这套脚本处理**其他项目的 Excel**（列名不同、平台不同）：
+**方法一：修改 .env 文件**
 1. 复制 `.env.example` 为新 `.env`（或修改现有 `.env`）
 2. 修改 `EXCEL_FILE` 指向新 Excel
 3. 修改列映射（`COL_ID`、`COL_TITLE`、`COL_CONTENT` 及各平台列名）
 4. 修改 `VIDEO_SHEETS` 为新的 sheet 名称
 5. 如需新平台，在 `PLATFORM_PRIORITY` 中添加 key，并配置对应的 `{KEY}_URL_TPL`
-6. `python process_videos.py --dry-run` 验证配置
+6. `node process_videos.js --dry-run` 验证配置
 7. 跑全量
+**方法二：使用 --file 选项（推荐）**
+```bash
+# 直接指定 Excel 文件，无需修改 .env
+node process_videos.js --file "data/其他项目.xlsx" --dry-run
+# 配合 --env-file 使用自定义环境变量
+node process_videos.js --file "data/其他项目.xlsx" --env-file ".env.其他项目" --dry-run
+```
+**优点：**
+- 无需修改 `.env` 文件
+- 可以为不同项目创建不同的 `.env` 配置文件
+- 命令行优先级高于环境变量

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "video-pipeline",
-  "version": "1.0.4",
+  "version": "1.1.0",
   "description": "视频下载、转码、文本识别、AI 关键词分析一体化流程 CLI 工具",
   "keywords": [
     "video",

package/process_videos.js CHANGED Viewed

@@ -80,6 +80,21 @@ const PLATFORM_COL_MAP = {
   youkuId: COL_YOUKUID,
 };
+// ============================== 工具函数 ==============================
+function c(color, text) {
+  const colors = {
+    dim: '\x1b[2m',
+    yellow: '\x1b[33m',
+    cyan: '\x1b[36m',
+    green: '\x1b[32m',
+    red: '\x1b[31m',
+    blue: '\x1b[34m',
+    magenta: '\x1b[35m',
+    reset: '\x1b[0m',
+  };
+  return (colors[color] || '') + text + colors.reset;
+}
 const PLATFORM_PRIORITY = (process.env.PLATFORM_PRIORITY || 'bilibiliBvid,youtubeId,tencentVid,youkuId')
   .split(',').map(s => s.trim()).filter(Boolean);
@@ -1033,11 +1048,9 @@ async function transcribeService(audioFile, stem, maxRetries, retryDelay, timeou
       }
       // Run inference
-      const fileStream = fs.createReadStream(audioFile);
-      const fileStat = fs.statSync(audioFile);
+      const fileBlob = await fs.openAsBlob(audioFile);
       const form = new FormData();
-      // Use ReadStream directly - Node.js fetch supports it natively for FormData
-      form.append('file', fileStream, path.basename(audioFile));
+      form.append('file', fileBlob, path.basename(audioFile));
       form.append('temperature', '0.0');
       form.append('temperature_inc', '0.2');
       form.append('response_format', 'json');
@@ -1359,6 +1372,258 @@ async function processOneTask(row, sheetName, steps, maxRetries, retryDelay, for
 }
 // ============================== 主控流程 ==============================
+// ═══════════════════════════════════════════════════════════════════
+// 本地文件流水线（--input 模式）
+// ═══════════════════════════════════════════════════════════════════
+/**
+ * 验证本地视频文件，检测可执行步骤
+ * 返回 { valid, format, hasVideo, hasAudio, videoCodec, audioCodec,
+ *         duration, width, height, errors, feasibleSteps }
+ */
+function validateInputFile(filePath) {
+  const result = {
+    valid: false, format: '', hasVideo: false, hasAudio: false,
+    videoCodec: '', audioCodec: '', duration: 0, width: 0, height: 0,
+    errors: [], feasibleSteps: [],
+  };
+  // 1. 文件存在性
+  const absPath = path.resolve(filePath);
+  if (!fs.existsSync(absPath)) {
+    result.errors.push('文件不存在');
+    return result;
+  }
+  const stat = fs.statSync(absPath);
+  if (!stat.isFile()) {
+    result.errors.push('不是一个文件');
+    return result;
+  }
+  if (stat.size === 0) {
+    result.errors.push('文件大小为 0');
+    return result;
+  }
+  // 2. ffprobe 分析
+  if (!which(FFPROBE)) {
+    result.errors.push(`ffprobe 不可用 (${FFPROBE})`);
+    result.valid = true; // 文件本身有效，但无法探测流信息
+    result.feasibleSteps = ['transcode', 'transcribe', 'analyze']; // 乐观推测
+    return result;
+  }
+  try {
+    const probeRaw = execSync(
+      `${FFPROBE} -v error -show_entries stream=codec_type,codec_name,width,height -show_entries format=format_name,duration -of json "${absPath}"`,
+      { encoding: 'utf-8', timeout: 30000 }
+    );
+    const info = JSON.parse(probeRaw);
+    // 提取 format 信息
+    if (info.format) {
+      result.format = (info.format.format_name || '').split(',')[0];
+      result.duration = parseFloat(info.format.duration || '0');
+    }
+    // 提取 stream 信息
+    if (info.streams) {
+      for (const s of info.streams) {
+        if (s.codec_type === 'video') {
+          result.hasVideo = true;
+          result.videoCodec = s.codec_name || '';
+          result.width = s.width || 0;
+          result.height = s.height || 0;
+        }
+        if (s.codec_type === 'audio') {
+          result.hasAudio = true;
+          result.audioCodec = s.codec_name || '';
+        }
+      }
+    }
+    result.valid = true;
+    // 3. 判断可执行步骤
+    if (result.hasVideo) {
+      result.feasibleSteps.push('transcode');
+    }
+    if (result.hasAudio) {
+      result.feasibleSteps.push('transcribe', 'analyze');
+    }
+    // 无视频无音频 → 所有步骤不可行
+    if (!result.hasVideo && !result.hasAudio) {
+      result.feasibleSteps = [];
+      result.errors.push('文件不包含视频或音频流，无法处理');
+    }
+    // 如果只有视频没有音频：只能转码
+    if (result.hasVideo && !result.hasAudio) {
+      result.errors.push('文件不含音频轨道，将跳过语音识别和 AI 分析');
+    }
+  } catch (e) {
+    result.errors.push(`ffprobe 解析失败: ${(e.stderr || e.message || '').slice(0, 200)}`);
+    result.valid = true; // 文件存在且不为空，让 ffmpeg 自行判断
+    result.feasibleSteps = ['transcode', 'transcribe', 'analyze'];
+  }
+  return result;
+}
+/**
+ * 处理 --input 模式下的文件冲突（已存在的转码/识别结果）
+ * @param {string} proposedPath - 即将生成的输出文件路径
+ * @returns {Promise<{action: 'overwrite'|'skip', path: string}>}
+ */
+async function resolveInputConflict(proposedPath) {
+  if (!fs.existsSync(proposedPath)) {
+    return { action: 'overwrite', path: proposedPath };
+  }
+  const size = (fs.statSync(proposedPath).size / 1024 / 1024).toFixed(1);
+  console.log(`\n⚠️  文件已存在: ${proposedPath} (${size} MB)`);
+  const choice = await select({
+    message: '如何处理已有文件?',
+    choices: [
+      { name: '覆盖已有文件 (overwrite)', value: 'overwrite', description: '删除现有文件，重新生成' },
+      { name: '跳过此步骤 (skip)', value: 'skip', description: '保留现有文件，不重新处理' },
+    ],
+  });
+  return { action: choice, path: proposedPath };
+}
+/**
+ * --input 模式的独立流水线
+ * 不通过 processOneTask（因为它依赖 findDownloadedFile 从 DOWNLOADS_DIR 找文件），
+ * 直接串联 step 函数，保证输入文件路径准确传递。
+ */
+async function runInputTask(opts) {
+  const {
+    inputPath, stem, sheetName, steps,
+    maxRetries, retryDelay, force,
+    transcodeTimeout, transcribeTimeout, analyzeTimeout,
+    whisperAvailable, fileInfo,
+  } = opts;
+  console.log(c('dim', '\n── 开始执行 ──\n'));
+  // ── download: 跳过（本地文件）──
+  console.log(`  [${stem}] 📥 下载: ${c('yellow', '已跳过 (本地文件)')}`);
+  // ── transcode ──
+  let tcFile = null;
+  if (steps.includes('transcode')) {
+    console.log(`  [${stem}] 🎵 开始转码...`);
+    try {
+      const { file, error } = await stepTranscode(inputPath, sheetName, maxRetries, retryDelay, force, transcodeTimeout);
+      tcFile = file;
+      if (file && fs.existsSync(file)) {
+        const size = (fs.statSync(file).size / 1024 / 1024).toFixed(1);
+        console.log(`  [${stem}] 🎵 转码完成: ${file} (${size} MB)`);
+      } else {
+        console.log(`  [${stem}] 🎵 转码: ${c(file ? 'yellow' : 'red', file ? '已跳过 (文件已存在)' : '失败 — ' + (error || ''))}`);
+      }
+    } catch (e) {
+      console.log(`  [${stem}] 🎵 转码: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+    }
+    if (!tcFile) {
+      console.log(c('yellow', '\n⚠️  转码未产出文件，后续步骤将跳过\n'));
+    }
+  } else if (steps.includes('transcribe')) {
+    // 无 transcode 步骤但有 transcribe：优先使用已有转码文件
+    const tcDir = path.join(TRANSCODED_DIR, sheetName);
+    const expectedTc = path.join(tcDir, stem + TRANSCODE_EXT);
+    if (fs.existsSync(expectedTc)) {
+      tcFile = expectedTc;
+      console.log(`  [${stem}] 🎵 转码: ${c('yellow', '使用已有文件 ' + path.basename(expectedTc))}`);
+    } else {
+      console.log(`  [${stem}] 🎵 转码: ${c('red', '未找到转码文件，将尝试用原始文件识别（可能失败）')}`);
+      tcFile = inputPath;
+    }
+  } else {
+    tcFile = inputPath;
+  }
+  // ── transcribe ──
+  let transcribeText = '';
+  if (steps.includes('transcribe') && tcFile) {
+    if (!whisperAvailable) {
+      console.log(`  [${stem}] 📝 识别: ${c('red', 'whisper 不可用')}`);
+    } else {
+      console.log(`  [${stem}] 📝 开始语音识别...`);
+      try {
+        const { text, error } = await stepTranscribe(tcFile, maxRetries, retryDelay, transcribeTimeout);
+        if (text && typeof text === 'string') {
+          transcribeText = text;
+          console.log(`  [${stem}] 📝 识别完成: ${text.length} 字符`);
+        } else {
+          console.log(`  [${stem}] 📝 识别: ${c('red', '失败 — ' + (error || ''))}`);
+        }
+      } catch (e) {
+        console.log(`  [${stem}] 📝 识别: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+      }
+    }
+  }
+  // ── AI analyze ──
+  let analyzeText = '';
+  if (steps.includes('analyze') && transcribeText) {
+    const aiEnabled = (process.env.AI_ENABLED || 'true').toLowerCase() === 'true';
+    if (aiEnabled) {
+      console.log(`  [${stem}] 🤖 开始 AI 分析...`);
+      try {
+        const { text: kw, error } = await stepAnalyze(transcribeText, maxRetries, retryDelay, analyzeTimeout);
+        if (kw && typeof kw === 'string') {
+          analyzeText = kw;
+          console.log(`  [${stem}] 🤖 AI分析完成: ${kw.length} 字符`);
+        } else {
+          console.log(`  [${stem}] 🤖 AI分析: ${c('red', '失败 — ' + (error || ''))}`);
+        }
+      } catch (e) {
+        console.log(`  [${stem}] 🤖 AI分析: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
+      }
+    } else {
+      console.log(`  [${stem}] 🤖 AI分析: ${c('yellow', '已禁用 (AI_ENABLED=false)')}`);
+    }
+  }
+  // ── 保存文本结果 ──
+  if (transcribeText || analyzeText) {
+    const outDir = path.join(REPORTS_DIR, 'input-tasks');
+    fs.mkdirSync(outDir, { recursive: true });
+    const outFile = path.join(outDir, `${stem}.txt`);
+    const lines = [
+      `文件: ${inputPath}`,
+      `平台: local`,
+      `文件格式: ${fileInfo.format || 'unknown'}`,
+      `时长: ${fileInfo.duration ? fileInfo.duration.toFixed(1) + 's' : 'unknown'}`,
+      '', '='.repeat(60), '',
+    ];
+    if (transcribeText) {
+      lines.push('【语音识别内容】', '', transcribeText, '');
+    }
+    if (analyzeText) {
+      lines.push('【AI 分析关键词】', '', analyzeText);
+    }
+    fs.writeFileSync(outFile, lines.join('\n'), 'utf-8');
+    console.log(`\n  📄 报告已保存: ${outFile}`);
+  }
+  // ── 总结 ──
+  console.log('');
+  const success = [];
+  if (tcFile) success.push('transcode');
+  if (transcribeText) success.push('transcribe');
+  if (analyzeText) success.push('analyze');
+  const failed = steps.filter(s => s !== 'download' && !success.includes(s));
+  if (failed.length === 0) {
+    console.log(c('green', '✅ 全部步骤执行成功'));
+  } else {
+    console.log(c('yellow', `⚠️  ${failed.length} 个步骤未成功: ${failed.join(', ')}`));
+  }
+  console.log('');
+}
 // ═══════════════════════════════════════════════════════════════════
 // URL 直链流水线（--url 模式）
 // ═══════════════════════════════════════════════════════════════════
@@ -1823,13 +2088,13 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     .description('视频下载、转码、文本识别、AI分析一体化流程')
     .option('--sheet <name>', '指定 sheet 名称')
     .option('--id <id>', '指定 extra.id 或 title（单条测试）')
-    .option('--step <step>', '只执行某一步：download / transcode / transcribe / analyze', (val) => {
+    .option('--step <step>', '指定执行步骤（可多次指定），如 --step transcode --step transcribe', (val, prev) => {
       const allowed = ['download', 'transcode', 'transcribe', 'analyze'];
       if (!allowed.includes(val)) {
         console.error(`Invalid step: ${val}. Must be one of: ${allowed.join(', ')}`);
         process.exit(1);
       }
-      return val;
+      return [...(prev || []), val];
     })
     .option('--force', '强制重做下载+转码（忽略已有文件）')
     .option('--concurrency <n>', '并发数，默认 1', parseInt, 1)
@@ -1843,8 +2108,9 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     .option('--retry-failed <path>', '从报告 JSON 重跑失败项')
     .option('--init', '复制 .env.example 到当前目录并重命名为 .env')
     .option('--file <path>', '指定 Excel 文件路径（优先级高于 EXCEL_FILE 环境变量）')
+    .option('--input <path>', '指定本地视频文件路径（跳过下载，直接转码→识别→分析）')
     .option('--url <url>', '直接指定视频下载链接（跳过 Excel），支持标准链接和内嵌链接')
-    .option('--name <name>', '指定下载文件名，不含扩展名（与 --url 配合使用）')
+    .option('--name <name>', '指定输出文件名，不含扩展名（与 --url / --input 配合使用）')
     .option('--env-file <path>', '指定要加载的 .env 文件路径（默认: 当前目录 .env）');
   program.parse();
@@ -1906,7 +2172,7 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     EXCEL_FILE = path.resolve(opts.file);
     logInfo(`Excel 文件覆盖为: ${EXCEL_FILE}`);
   }
-  const steps = opts.step ? [opts.step] : ['download', 'transcode', 'transcribe', 'analyze'];
+  const steps = opts.step?.length ? opts.step : ['download', 'transcode', 'transcribe', 'analyze'];
   // ── --url 模式：直接处理单个视频链接 ──
   if (opts.url) {
     const parsed = parseUrl(opts.url);
@@ -1982,6 +2248,112 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
     process.exit(0);
   }
+  // ── --input 模式：直接处理本地视频文件 ──
+  if (opts.input) {
+    const inputPath = path.resolve(opts.input);
+    console.log(c('dim', '\n── 文件校验 ──'));
+    console.log(`  文件: ${c('cyan', inputPath)}`);
+    const fileInfo = validateInputFile(inputPath);
+    if (!fileInfo.valid) {
+      console.log(c('red', `\n❌ 无法处理该文件:`));
+      for (const e of fileInfo.errors) {
+        console.log(c('red', `   ${e}`));
+      }
+      process.exit(1);
+    }
+    // 展示文件信息
+    console.log(`  格式: ${c('cyan', fileInfo.format || 'unknown')}`);
+    if (fileInfo.hasVideo) {
+      console.log(`  视频: ${c('cyan', fileInfo.videoCodec)} ${fileInfo.width}x${fileInfo.height}`);
+    }
+    if (fileInfo.hasAudio) {
+      console.log(`  音频: ${c('cyan', fileInfo.audioCodec)}`);
+    }
+    if (fileInfo.duration > 0) {
+      const dur = fileInfo.duration;
+      const mm = Math.floor(dur / 60);
+      const ss = Math.floor(dur % 60);
+      console.log(`  时长: ${c('cyan', `${mm}:${String(ss).padStart(2, '0')}`)} (${dur.toFixed(1)}s)`);
+    }
+    if (fileInfo.errors.length > 0) {
+      console.log('');
+      for (const e of fileInfo.errors) {
+        console.log(c('yellow', `  ⚠️  ${e}`));
+      }
+    }
+    // 展示可执行步骤
+    const defaultSteps = fileInfo.feasibleSteps;
+    // 用户可通过 --step 指定步骤，但只保留可行的
+    let steps;
+    if (opts.step?.length) {
+      steps = opts.step.filter(s => defaultSteps.includes(s));
+      if (steps.length === 0) {
+        console.log(c('yellow', `\n⚠️  --step ${opts.step.join(', ')} 不可行（文件不支持）\n`));
+        process.exit(1);
+      }
+    } else {
+      steps = defaultSteps;
+    }
+    console.log(`\n  可执行步骤: ${c('green', steps.join(' → '))}`);
+    // 确定输出文件名
+    const sheetName = 'local';
+    const baseName = opts.name || path.parse(inputPath).name;
+    const stem = baseName;
+    // 检查转码输出文件是否已有冲突
+    if (steps.includes('transcode') && !opts.force) {
+      const tcDir = path.join(TRANSCODED_DIR, sheetName);
+      const tcPath = path.join(tcDir, stem + TRANSCODE_EXT);
+      const conflict = await resolveInputConflict(tcPath);
+      if (conflict.action === 'skip') {
+        console.log(c('yellow', '\n⏭️  已跳过转码\n'));
+        steps = steps.filter(s => s !== 'transcode');
+      }
+    }
+    if (steps.length === 0) {
+      console.log(c('yellow', '\n无剩余步骤可执行\n'));
+      process.exit(0);
+    }
+    // 确保目录存在
+    if (steps.includes('transcode')) {
+      fs.mkdirSync(path.join(TRANSCODED_DIR, sheetName), { recursive: true });
+    }
+    // 检查 whisper 可用性
+    let whisperAvailable = false;
+    if (steps.includes('transcribe')) {
+      whisperAvailable = await checkWhisperAvailable();
+      if (!whisperAvailable) {
+        const backend = WHISPER_BACKEND === 'local' ? 'local CLI' : WHISPER_SERVICE;
+        logWarn(`⚠️ whisper not available (${backend}), transcribe step will fail`);
+      }
+    }
+    // 执行流水线
+    await runInputTask({
+      inputPath,
+      stem,
+      sheetName,
+      steps,
+      maxRetries: opts.retry,
+      retryDelay: opts.retryDelay,
+      force: opts.force || false,
+      transcodeTimeout: opts.transcodeTimeout,
+      transcribeTimeout: opts.transcribeTimeout,
+      analyzeTimeout: opts.analyzeTimeout,
+      whisperAvailable,
+      fileInfo,
+    });
+    process.exit(0);
+  }
   run({
     targetSheet: opts.sheet || null,