video-pipeline 1.2.5 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +1 -1
- package/CHANGELOG.md +18 -0
- package/README.md +80 -9
- package/package.json +1 -1
- package/process_videos.js +134 -35
package/.env.example
CHANGED
|
@@ -170,4 +170,4 @@ AI_TIMEOUT=300
|
|
|
170
170
|
AI_TEMPERATURE=0.3
|
|
171
171
|
# 【关联】提示词模板,{content} 占位符会被识别文本替换
|
|
172
172
|
# 【自由】提示词内容可随意修改,但必须保留 {content} 占位符
|
|
173
|
-
AI_PROMPT_TPL
|
|
173
|
+
AI_PROMPT_TPL=帮我归纳总结一下提供内容的关键词,尽可能全面,无遗漏,无重复,无幻想,关键词之间用英文逗号分隔开。如果内容为英文,则关键词全部是英文,如果内容是中文,则关键词以中文为主,可以附带一些英文关键词。这是内容:{content}
|
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [1.2.6] - 2026-06-11
|
|
4
|
+
|
|
5
|
+
### Features
|
|
6
|
+
|
|
7
|
+
- 报告按 sheet/站点分目录存储 (`6610c57`)
|
|
8
|
+
- 统一三种来源报告格式 + 修复多处 bug (`2a6f606`)
|
|
9
|
+
|
|
10
|
+
### Bug Fixes
|
|
11
|
+
|
|
12
|
+
- groupBySheetMap 返回 Map 而非普通对象,修复 for...of 不可迭代错误 (`dfae532`)
|
|
13
|
+
|
|
14
|
+
### Documentation
|
|
15
|
+
|
|
16
|
+
- update (`6ddc48b`)
|
|
17
|
+
- 修正 --input 模式的 {sheet} 表述为固定 local (`be61e29`)
|
|
18
|
+
- 输出结构速查表 — 三来源×四环节对照 (`248168c`)
|
|
19
|
+
|
|
20
|
+
|
|
3
21
|
## [1.2.5] - 2026-06-11
|
|
4
22
|
|
|
5
23
|
### Features
|
package/README.md
CHANGED
|
@@ -158,8 +158,22 @@ WHISPER_LANGUAGE=zh # 空=多语言自动检测(默认),需要指
|
|
|
158
158
|
├── transcoded/ # ffmpeg 转码输出(wav 16kHz mono)
|
|
159
159
|
│ ├── YouTube视频/
|
|
160
160
|
│ └── 普诺赛中文站/
|
|
161
|
-
├── reports/ #
|
|
162
|
-
│
|
|
161
|
+
├── reports/ # 执行报告(按 sheet/站点分目录)
|
|
162
|
+
│ ├── YouTube视频/
|
|
163
|
+
│ │ ├── report_YYYYMMDD_HHMMSS.json # JSON 报告(机器可读,用于重跑)
|
|
164
|
+
│ │ └── tasks/ # 人类可读文本摘要
|
|
165
|
+
│ │ ├── 2143.txt
|
|
166
|
+
│ │ └── ...
|
|
167
|
+
│ ├── 普诺赛中文站/
|
|
168
|
+
│ │ ├── report_YYYYMMDD_HHMMSS.json
|
|
169
|
+
│ │ └── tasks/
|
|
170
|
+
│ │ └── ...
|
|
171
|
+
│ ├── youtube/ # --url 模式按平台名分目录
|
|
172
|
+
│ │ ├── report_YYYYMMDD_HHMMSS.json
|
|
173
|
+
│ │ └── tasks/
|
|
174
|
+
│ └── local/ # --input 模式默认目录
|
|
175
|
+
│ ├── report_YYYYMMDD_HHMMSS.json
|
|
176
|
+
│ └── tasks/
|
|
163
177
|
├── scripts/ # 辅助脚本
|
|
164
178
|
│ ├── release.js # 版本发布脚本
|
|
165
179
|
│ └── regenerate-changelog.js # CHANGELOG 重建脚本
|
|
@@ -253,12 +267,12 @@ node process_videos.js --limit 3 --concurrency 1 # 只处理前3条
|
|
|
253
267
|
### 重跑失败
|
|
254
268
|
|
|
255
269
|
```bash
|
|
256
|
-
# 第一次跑完后生成 reports/report_xxx.json
|
|
270
|
+
# 第一次跑完后生成 reports/{sheet名称}/report_xxx.json
|
|
257
271
|
# 查看失败项:
|
|
258
|
-
node process_videos.js --retry-failed reports/report_20260610_143000.json --dry-run
|
|
272
|
+
node process_videos.js --retry-failed reports/YouTube视频/report_20260610_143000.json --dry-run
|
|
259
273
|
|
|
260
274
|
# 重跑:
|
|
261
|
-
node process_videos.js --retry-failed reports/report_20260610_143000.json --concurrency 2 --retry 3
|
|
275
|
+
node process_videos.js --retry-failed reports/YouTube视频/report_20260610_143000.json --concurrency 2 --retry 3
|
|
262
276
|
```
|
|
263
277
|
|
|
264
278
|
### 超时控制(防止任务卡死)
|
|
@@ -360,7 +374,7 @@ node process_videos.js --input "downloads/产品介绍.mp4" --step analyze
|
|
|
360
374
|
| `--transcribe-timeout <n>` | int | 600 | 单个识别任务最长执行时间(秒) |
|
|
361
375
|
| `--analyze-timeout <n>` | int | 300 | 单个 AI 分析任务最长执行时间(秒) |
|
|
362
376
|
| `--dry-run` | flag | off | 干跑模式,只列任务不执行 |
|
|
363
|
-
| `--retry-failed <path>` | path | — | 从报告 JSON
|
|
377
|
+
| `--retry-failed <path>` | path | — | 从报告 JSON 重跑失败项(如 `reports/YouTube视频/report_xxx.json`) |
|
|
364
378
|
| `--init` | flag | off | 复制 .env.example 到当前目录并重命名为 .env |
|
|
365
379
|
| `--file <path>` | path | — | 指定 Excel 文件路径(优先级高于 EXCEL_FILE 环境变量) |
|
|
366
380
|
| `--input <path>` | path | — | 指定本地视频文件路径(跳过下载,直接转码→识别→分析) |
|
|
@@ -510,9 +524,64 @@ node process_videos.js --sheet "YouTube视频" --step analyze --concurrency 2
|
|
|
510
524
|
|
|
511
525
|
---
|
|
512
526
|
|
|
513
|
-
##
|
|
527
|
+
## 输出结构速查表
|
|
528
|
+
|
|
529
|
+
三种输入来源在不同处理环节的输出路径汇总如下。所有路径均以 `output/` 为根(可通过 `DOWNLOADS_DIR` / `TRANSCODED_DIR` / `REPORTS_DIR` 环境变量覆盖)。
|
|
530
|
+
|
|
531
|
+
> `{sheet}` = Excel 工作表名(如 `YouTube视频`、`普诺赛中文站`)
|
|
532
|
+
> `{platform}` = 视频平台标识(如 `youtube`、`bilibili`、`tencentVid`、`youku`)
|
|
533
|
+
> `{stem}` = 去重后的安全文件名(不含扩展名)
|
|
534
|
+
|
|
535
|
+
### ① Excel 批量模式(默认)
|
|
536
|
+
|
|
537
|
+
| 环节 | 输出路径 | 产物格式 | 说明 |
|
|
538
|
+
|------|---------|---------|------|
|
|
539
|
+
| 下载 | `output/downloads/{sheet}/{stem}.mp4` | 视频 | yt-dlp 下载原始视频 |
|
|
540
|
+
| 转码 | `output/transcoded/{sheet}/{stem}.wav` | 音频 | ffmpeg 转 16kHz mono WAV |
|
|
541
|
+
| JSON 报告 | `output/reports/{sheet}/report_YYYYMMDD_HHMMSS.json` | JSON | 机器可读,含 summary + failed_items,可供 --retry-failed 重跑 |
|
|
542
|
+
| 文本报告 | `output/reports/{sheet}/tasks/{stem}.txt` | 文本 | 人类可读,含语音识别原文 + AI 关键词分析 |
|
|
543
|
+
|
|
544
|
+
> 多 sheet 同时执行时,每个 sheet 独立一个子目录,互不干扰。
|
|
545
|
+
|
|
546
|
+
### ② --url 直链模式
|
|
547
|
+
|
|
548
|
+
| 环节 | 输出路径 | 产物格式 | 说明 |
|
|
549
|
+
|------|---------|---------|------|
|
|
550
|
+
| 下载 | `output/downloads/{platform}/{name}.mp4` | 视频 | yt-dlp 下载单个视频 |
|
|
551
|
+
| 转码 | `output/transcoded/{platform}/{name}.wav` | 音频 | ffmpeg 转 16kHz mono WAV |
|
|
552
|
+
| JSON 报告 | `output/reports/{platform}/report_YYYYMMDD_HHMMSS.json` | JSON | 格式与 Excel 模式一致 |
|
|
553
|
+
| 文本报告 | `output/reports/{platform}/tasks/{name}.txt` | 文本 | 含识别原文 + AI 分析 |
|
|
554
|
+
|
|
555
|
+
> `{platform}` 由脚本自动从 URL 解析,如 `https://www.youtube.com/watch?v=xxx` → `youtube`。
|
|
556
|
+
|
|
557
|
+
### ③ --input 本地文件模式
|
|
514
558
|
|
|
515
|
-
|
|
559
|
+
| 环节 | 输出路径 | 产物格式 | 说明 |
|
|
560
|
+
|------|---------|---------|------|
|
|
561
|
+
| 下载 | —(跳过) | — | 本地文件无需下载 |
|
|
562
|
+
| 转码 | `output/transcoded/local/{stem}.wav` | 音频 | ffmpeg 转 16kHz mono WAV |
|
|
563
|
+
| JSON 报告 | `output/reports/local/report_YYYYMMDD_HHMMSS.json` | JSON | 格式与 Excel 模式一致 |
|
|
564
|
+
| 文本报告 | `output/reports/local/tasks/{stem}.txt` | 文本 | 含识别原文 + AI 分析 |
|
|
565
|
+
|
|
566
|
+
> `local` 是 `--input` 模式的固定目录名(与 Excel 模式的 sheet 名无关),所有本地文件处理结果统一归入此目录。
|
|
567
|
+
|
|
568
|
+
---
|
|
569
|
+
|
|
570
|
+
### 三种来源对比一览
|
|
571
|
+
|
|
572
|
+
| 维度 | Excel 批量 | --url 直链 | --input 本地文件 |
|
|
573
|
+
|------|-----------|-----------|-----------------|
|
|
574
|
+
| 输入 | Excel 行(多视频批量) | 单个视频 URL | 本地视频/音频文件 |
|
|
575
|
+
| 下载目录 | `downloads/{sheet}/` | `downloads/{platform}/` | 无 |
|
|
576
|
+
| 转码目录 | `transcoded/{sheet}/` | `transcoded/{platform}/` | `transcoded/local/` |
|
|
577
|
+
| 报告目录 | `reports/{sheet}/` | `reports/{platform}/` | `reports/local/` |
|
|
578
|
+
| 分组依据 | Excel sheet 名 | URL 解析的平台名 | 固定 `local` |
|
|
579
|
+
| 并发支持 | ✅ 多线程 | ❌ 单任务 | ❌ 单任务 |
|
|
580
|
+
| 支持 --retry-failed | ✅ | ❌ | ❌ |
|
|
581
|
+
|
|
582
|
+
---
|
|
583
|
+
|
|
584
|
+
### JSON 报告结构
|
|
516
585
|
|
|
517
586
|
```json
|
|
518
587
|
{
|
|
@@ -538,6 +607,8 @@ node process_videos.js --sheet "YouTube视频" --step analyze --concurrency 2
|
|
|
538
607
|
}
|
|
539
608
|
```
|
|
540
609
|
|
|
610
|
+
### 状态含义
|
|
611
|
+
|
|
541
612
|
- **success**:下载 + 转码 + 识别全部成功(AI 分析失败不影响此状态)
|
|
542
613
|
- **partial**:下载 + 转码成功,识别或 AI 分析失败
|
|
543
614
|
- **failed**:下载或转码失败
|
|
@@ -560,7 +631,7 @@ node process_videos.js --sheet "YouTube视频" --id 2143 --retry 2
|
|
|
560
631
|
node process_videos.js --concurrency 3 --retry 3
|
|
561
632
|
|
|
562
633
|
# 4. 查看报告,重跑失败项
|
|
563
|
-
node process_videos.js --retry-failed reports/report_xxx.json --concurrency 2 --retry 3
|
|
634
|
+
node process_videos.js --retry-failed reports/YouTube视频/report_xxx.json --concurrency 2 --retry 3
|
|
564
635
|
```
|
|
565
636
|
|
|
566
637
|
---
|
package/package.json
CHANGED
package/process_videos.js
CHANGED
|
@@ -1157,11 +1157,11 @@ function writeAllContentsToExcel(results, keywordsDict = null) {
|
|
|
1157
1157
|
}
|
|
1158
1158
|
|
|
1159
1159
|
function groupBySheetMap(updates) {
|
|
1160
|
-
const result =
|
|
1160
|
+
const result = new Map();
|
|
1161
1161
|
for (const [compositeKey, text] of updates) {
|
|
1162
1162
|
const [sheetName, key] = compositeKey.split('|');
|
|
1163
|
-
if (!result
|
|
1164
|
-
result
|
|
1163
|
+
if (!result.has(sheetName)) result.set(sheetName, {});
|
|
1164
|
+
result.get(sheetName)[key] = text;
|
|
1165
1165
|
}
|
|
1166
1166
|
return result;
|
|
1167
1167
|
}
|
|
@@ -1178,10 +1178,31 @@ function computeSummary(results) {
|
|
|
1178
1178
|
return { total: results.length, success, partial, failed, no_video: noVideo };
|
|
1179
1179
|
}
|
|
1180
1180
|
|
|
1181
|
-
|
|
1182
|
-
|
|
1181
|
+
/**
|
|
1182
|
+
* 生成执行报告 JSON 文件。
|
|
1183
|
+
* - 提供 sheetName 时:报告存入 REPORTS_DIR/{sheetName}/report_{ts}.json
|
|
1184
|
+
* - 不提供时:按 r.sheet 分组,每 sheet 调用自身,返回路径数组
|
|
1185
|
+
*/
|
|
1186
|
+
function generateReport(results, config, sheetName) {
|
|
1187
|
+
if (!sheetName) {
|
|
1188
|
+
// ── 按 sheet 分组生成 ──
|
|
1189
|
+
const sheetGroups = new Map();
|
|
1190
|
+
for (const r of results) {
|
|
1191
|
+
if (!sheetGroups.has(r.sheet)) sheetGroups.set(r.sheet, []);
|
|
1192
|
+
sheetGroups.get(r.sheet).push(r);
|
|
1193
|
+
}
|
|
1194
|
+
const paths = [];
|
|
1195
|
+
for (const [sheet, items] of sheetGroups) {
|
|
1196
|
+
paths.push(generateReport(items, config, sheet));
|
|
1197
|
+
}
|
|
1198
|
+
return paths;
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
// ── 单 sheet 报告 ──
|
|
1202
|
+
const dir = path.join(REPORTS_DIR, sheetName);
|
|
1203
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
1183
1204
|
const ts = new Date().toISOString().replace(/[-:T]/g, '').slice(0, 15).replace(/(\d{8})(\d{6})/, '$1_$2');
|
|
1184
|
-
const reportFile = path.join(
|
|
1205
|
+
const reportFile = path.join(dir, `report_${ts}.json`);
|
|
1185
1206
|
|
|
1186
1207
|
const summary = computeSummary(results);
|
|
1187
1208
|
|
|
@@ -1528,62 +1549,106 @@ async function runInputTask(opts) {
|
|
|
1528
1549
|
|
|
1529
1550
|
console.log(c('dim', '\n── 开始执行 ──\n'));
|
|
1530
1551
|
|
|
1552
|
+
// ── 解决 stem 重名 ──
|
|
1553
|
+
let usedStem = stem;
|
|
1554
|
+
{
|
|
1555
|
+
let counter = 1;
|
|
1556
|
+
const tcDir = path.join(TRANSCODED_DIR, sheetName);
|
|
1557
|
+
fs.mkdirSync(tcDir, { recursive: true });
|
|
1558
|
+
let testPath = path.join(tcDir, usedStem + TRANSCODE_EXT);
|
|
1559
|
+
while (fs.existsSync(testPath) && !steps.includes('transcode')) {
|
|
1560
|
+
// 跳过转码但转码产物已存在 → 直接用
|
|
1561
|
+
break;
|
|
1562
|
+
}
|
|
1563
|
+
if (steps.includes('transcode') && !force) {
|
|
1564
|
+
while (fs.existsSync(testPath)) {
|
|
1565
|
+
usedStem = `${stem}_${counter}`;
|
|
1566
|
+
testPath = path.join(tcDir, usedStem + TRANSCODE_EXT);
|
|
1567
|
+
counter++;
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
}
|
|
1571
|
+
if (usedStem !== stem) {
|
|
1572
|
+
console.log(` ⚠️ stem "${stem}" 已存在 → 使用 "${usedStem}"`);
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
// ── 构建 TaskResult ──
|
|
1576
|
+
const result = new TaskResult(sheetName, usedStem, path.basename(inputPath), 'local', null, usedStem);
|
|
1577
|
+
result.download = new StepResult('skipped');
|
|
1578
|
+
|
|
1531
1579
|
// ── download: 跳过(本地文件)──
|
|
1532
|
-
console.log(` [${
|
|
1580
|
+
console.log(` [${usedStem}] 📥 下载: ${c('yellow', '已跳过 (本地文件)')}`);
|
|
1533
1581
|
|
|
1534
1582
|
// ── transcode ──
|
|
1535
1583
|
let tcFile = null;
|
|
1536
1584
|
if (steps.includes('transcode')) {
|
|
1537
|
-
console.log(` [${
|
|
1585
|
+
console.log(` [${usedStem}] 🎵 开始转码...`);
|
|
1538
1586
|
try {
|
|
1539
1587
|
const { file, error } = await stepTranscode(inputPath, sheetName, maxRetries, retryDelay, force, transcodeTimeout);
|
|
1540
1588
|
tcFile = file;
|
|
1541
1589
|
if (file && fs.existsSync(file)) {
|
|
1542
1590
|
const size = (fs.statSync(file).size / 1024 / 1024).toFixed(1);
|
|
1543
|
-
console.log(` [${
|
|
1591
|
+
console.log(` [${usedStem}] 🎵 转码完成: ${file} (${size} MB)`);
|
|
1592
|
+
result.transcode = new StepResult('success', file);
|
|
1544
1593
|
} else {
|
|
1545
|
-
console.log(` [${
|
|
1594
|
+
console.log(` [${usedStem}] 🎵 转码: ${c(file ? 'yellow' : 'red', file ? '已跳过 (文件已存在)' : '失败 — ' + (error || ''))}`);
|
|
1595
|
+
result.transcode = new StepResult(file ? 'skipped' : 'failed', file, error);
|
|
1546
1596
|
}
|
|
1547
1597
|
} catch (e) {
|
|
1548
|
-
console.log(` [${
|
|
1598
|
+
console.log(` [${usedStem}] 🎵 转码: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
|
|
1599
|
+
result.transcode = new StepResult('failed', null, String(e.message).slice(0, 500));
|
|
1549
1600
|
}
|
|
1550
1601
|
if (!tcFile) {
|
|
1551
1602
|
console.log(c('yellow', '\n⚠️ 转码未产出文件,后续步骤将跳过\n'));
|
|
1603
|
+
result.overall_status = 'failed';
|
|
1604
|
+
result.error = 'transcode failed';
|
|
1605
|
+
return result;
|
|
1552
1606
|
}
|
|
1553
1607
|
} else if (steps.includes('transcribe')) {
|
|
1554
|
-
// 无 transcode 步骤但有 transcribe:优先使用已有转码文件
|
|
1555
1608
|
const tcDir = path.join(TRANSCODED_DIR, sheetName);
|
|
1556
|
-
const expectedTc = path.join(tcDir,
|
|
1609
|
+
const expectedTc = path.join(tcDir, usedStem + TRANSCODE_EXT);
|
|
1557
1610
|
if (fs.existsSync(expectedTc)) {
|
|
1558
1611
|
tcFile = expectedTc;
|
|
1559
|
-
|
|
1612
|
+
result.transcode = new StepResult('success', tcFile);
|
|
1613
|
+
console.log(` [${usedStem}] 🎵 转码: ${c('yellow', '使用已有文件 ' + path.basename(expectedTc))}`);
|
|
1560
1614
|
} else {
|
|
1561
|
-
console.log(` [${
|
|
1615
|
+
console.log(` [${usedStem}] 🎵 转码: ${c('red', '未找到转码文件,将尝试用原始文件识别(可能失败)')}`);
|
|
1562
1616
|
tcFile = inputPath;
|
|
1617
|
+
result.transcode = new StepResult('warning', inputPath, 'transcode file not found, using raw input');
|
|
1563
1618
|
}
|
|
1564
1619
|
} else {
|
|
1565
1620
|
tcFile = inputPath;
|
|
1621
|
+
result.transcode = new StepResult('success', inputPath);
|
|
1566
1622
|
}
|
|
1567
1623
|
|
|
1568
1624
|
// ── transcribe ──
|
|
1569
1625
|
let transcribeText = '';
|
|
1570
1626
|
if (steps.includes('transcribe') && tcFile) {
|
|
1571
1627
|
if (!whisperAvailable) {
|
|
1572
|
-
console.log(` [${
|
|
1628
|
+
console.log(` [${usedStem}] 📝 识别: ${c('red', 'whisper 不可用')}`);
|
|
1629
|
+
result.transcribe = new StepResult('failed', null, 'whisper unreachable');
|
|
1630
|
+
result.overall_status = 'failed';
|
|
1631
|
+
result.error = 'whisper unreachable';
|
|
1632
|
+
return result;
|
|
1573
1633
|
} else {
|
|
1574
|
-
console.log(` [${
|
|
1634
|
+
console.log(` [${usedStem}] 📝 开始语音识别...`);
|
|
1575
1635
|
try {
|
|
1576
1636
|
const { text, error } = await stepTranscribe(tcFile, maxRetries, retryDelay, transcribeTimeout);
|
|
1577
1637
|
if (text && typeof text === 'string') {
|
|
1578
1638
|
transcribeText = text;
|
|
1579
|
-
console.log(` [${
|
|
1639
|
+
console.log(` [${usedStem}] 📝 识别完成: ${text.length} 字符`);
|
|
1640
|
+
result.transcribe = new StepResult('success', text);
|
|
1580
1641
|
} else {
|
|
1581
|
-
console.log(` [${
|
|
1642
|
+
console.log(` [${usedStem}] 📝 识别: ${c('red', '失败 — ' + (error || ''))}`);
|
|
1643
|
+
result.transcribe = new StepResult('failed', null, error);
|
|
1582
1644
|
}
|
|
1583
1645
|
} catch (e) {
|
|
1584
|
-
console.log(` [${
|
|
1646
|
+
console.log(` [${usedStem}] 📝 识别: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
|
|
1647
|
+
result.transcribe = new StepResult('failed', null, String(e.message).slice(0, 500));
|
|
1585
1648
|
}
|
|
1586
1649
|
}
|
|
1650
|
+
} else {
|
|
1651
|
+
result.transcribe = new StepResult('skipped');
|
|
1587
1652
|
}
|
|
1588
1653
|
|
|
1589
1654
|
// ── AI analyze ──
|
|
@@ -1591,28 +1656,45 @@ async function runInputTask(opts) {
|
|
|
1591
1656
|
if (steps.includes('analyze') && transcribeText) {
|
|
1592
1657
|
const aiEnabled = (process.env.AI_ENABLED || 'true').toLowerCase() === 'true';
|
|
1593
1658
|
if (aiEnabled) {
|
|
1594
|
-
console.log(` [${
|
|
1659
|
+
console.log(` [${usedStem}] 🤖 开始 AI 分析...`);
|
|
1595
1660
|
try {
|
|
1596
1661
|
const { text: kw, error } = await stepAnalyze(transcribeText, maxRetries, retryDelay, analyzeTimeout);
|
|
1597
1662
|
if (kw && typeof kw === 'string') {
|
|
1598
1663
|
analyzeText = kw;
|
|
1599
|
-
console.log(` [${
|
|
1664
|
+
console.log(` [${usedStem}] 🤖 AI分析完成: ${kw.length} 字符`);
|
|
1665
|
+
result.analyze = new StepResult('success', kw);
|
|
1600
1666
|
} else {
|
|
1601
|
-
console.log(` [${
|
|
1667
|
+
console.log(` [${usedStem}] 🤖 AI分析: ${c('red', '失败 — ' + (error || ''))}`);
|
|
1668
|
+
result.analyze = new StepResult('failed', null, error);
|
|
1602
1669
|
}
|
|
1603
1670
|
} catch (e) {
|
|
1604
|
-
console.log(` [${
|
|
1671
|
+
console.log(` [${usedStem}] 🤖 AI分析: ${c('red', '异常 — ' + (e.message || '').slice(0, 200))}`);
|
|
1672
|
+
result.analyze = new StepResult('failed', null, String(e.message).slice(0, 500));
|
|
1605
1673
|
}
|
|
1606
1674
|
} else {
|
|
1607
|
-
console.log(` [${
|
|
1675
|
+
console.log(` [${usedStem}] 🤖 AI分析: ${c('yellow', '已禁用 (AI_ENABLED=false)')}`);
|
|
1676
|
+
result.analyze = new StepResult('skipped');
|
|
1608
1677
|
}
|
|
1678
|
+
} else {
|
|
1679
|
+
result.analyze = new StepResult('skipped');
|
|
1680
|
+
}
|
|
1681
|
+
|
|
1682
|
+
// ── 判定整体状态 ──
|
|
1683
|
+
if (result.transcode.status === 'failed') {
|
|
1684
|
+
result.overall_status = 'failed';
|
|
1685
|
+
} else if (result.transcribe.status === 'failed' && steps.includes('transcribe')) {
|
|
1686
|
+
result.overall_status = 'partial';
|
|
1687
|
+
} else if (result.analyze.status === 'failed') {
|
|
1688
|
+
result.overall_status = 'partial';
|
|
1689
|
+
} else {
|
|
1690
|
+
result.overall_status = 'success';
|
|
1609
1691
|
}
|
|
1610
1692
|
|
|
1611
1693
|
// ── 保存文本结果 ──
|
|
1612
1694
|
if (transcribeText || analyzeText) {
|
|
1613
|
-
const outDir = path.join(REPORTS_DIR, '
|
|
1695
|
+
const outDir = path.join(REPORTS_DIR, sheetName, 'tasks');
|
|
1614
1696
|
fs.mkdirSync(outDir, { recursive: true });
|
|
1615
|
-
const outFile = path.join(outDir, `${
|
|
1697
|
+
const outFile = path.join(outDir, `${usedStem}.txt`);
|
|
1616
1698
|
const lines = [
|
|
1617
1699
|
`文件: ${inputPath}`,
|
|
1618
1700
|
`平台: local`,
|
|
@@ -1643,6 +1725,8 @@ async function runInputTask(opts) {
|
|
|
1643
1725
|
console.log(c('yellow', `⚠️ ${failed.length} 个步骤未成功: ${failed.join(', ')}`));
|
|
1644
1726
|
}
|
|
1645
1727
|
console.log('');
|
|
1728
|
+
|
|
1729
|
+
return result;
|
|
1646
1730
|
}
|
|
1647
1731
|
|
|
1648
1732
|
|
|
@@ -1738,7 +1822,7 @@ async function runUrlTask(opts) {
|
|
|
1738
1822
|
const analyzeText = (result.analyze && typeof result.analyze.file === 'string') ? result.analyze.file : '';
|
|
1739
1823
|
|
|
1740
1824
|
if (transcribeText || analyzeText) {
|
|
1741
|
-
const outDir = path.join(REPORTS_DIR, '
|
|
1825
|
+
const outDir = path.join(REPORTS_DIR, platform, 'tasks');
|
|
1742
1826
|
fs.mkdirSync(outDir, { recursive: true });
|
|
1743
1827
|
const outFile = path.join(outDir, `${stem}.txt`);
|
|
1744
1828
|
const lines = [
|
|
@@ -1758,6 +1842,7 @@ async function runUrlTask(opts) {
|
|
|
1758
1842
|
}
|
|
1759
1843
|
|
|
1760
1844
|
console.log(c('bold', c('green', `\n\uD83C\uDF89 \u5168\u90E8\u5B8C\u6210! (${successes.length}/${steps.length} \u6B65\u6210\u529F)\n`)));
|
|
1845
|
+
return result;
|
|
1761
1846
|
}
|
|
1762
1847
|
|
|
1763
1848
|
async function run({
|
|
@@ -1872,10 +1957,10 @@ async function run({
|
|
|
1872
1957
|
sheets, target_id: targetId, steps, max_retries: maxRetries,
|
|
1873
1958
|
retry_delay: retryDelay, concurrency, force,
|
|
1874
1959
|
};
|
|
1875
|
-
const
|
|
1960
|
+
const reportPaths = generateReport(results, config);
|
|
1876
1961
|
printReportSummary(results);
|
|
1877
1962
|
|
|
1878
|
-
logInfo(`all done!
|
|
1963
|
+
logInfo(`all done! reports: ${Array.isArray(reportPaths) ? reportPaths.join(', ') : reportPaths}`);
|
|
1879
1964
|
}
|
|
1880
1965
|
|
|
1881
1966
|
function printDryRun(tasks, steps, env) {
|
|
@@ -2068,9 +2153,9 @@ async function runFromReport(reportPath, steps, maxRetries, retryDelay, concurre
|
|
|
2068
2153
|
|
|
2069
2154
|
const config = { retry_from: reportPath, steps, max_retries: maxRetries,
|
|
2070
2155
|
retry_delay: retryDelay, concurrency, force };
|
|
2071
|
-
const
|
|
2156
|
+
const reportPaths = generateReport(results, config);
|
|
2072
2157
|
printReportSummary(results);
|
|
2073
|
-
logInfo(`all done!
|
|
2158
|
+
logInfo(`all done! reports: ${Array.isArray(reportPaths) ? reportPaths.join(', ') : reportPaths}`);
|
|
2074
2159
|
}
|
|
2075
2160
|
|
|
2076
2161
|
// ============================== CLI ==============================
|
|
@@ -2099,7 +2184,7 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
|
|
|
2099
2184
|
.option('--transcribe-timeout <n>', '识别超时(秒),默认 600', parseInt, 600)
|
|
2100
2185
|
.option('--analyze-timeout <n>', 'AI 分析超时(秒),默认 300', parseInt, 300)
|
|
2101
2186
|
.option('--dry-run', '干跑模式,只列任务不执行')
|
|
2102
|
-
.option('--retry-failed <path>', '从报告 JSON
|
|
2187
|
+
.option('--retry-failed <path>', '从报告 JSON 重跑失败项(output/reports/{sheet}/report_xxx.json)')
|
|
2103
2188
|
.option('--init', '复制 .env.example 到当前目录并重命名为 .env')
|
|
2104
2189
|
.option('--file <path>', '指定 Excel 文件路径(优先级高于 EXCEL_FILE 环境变量)')
|
|
2105
2190
|
.option('--input <path>', '指定本地视频文件路径(跳过下载,直接转码→识别→分析)')
|
|
@@ -2229,7 +2314,7 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
|
|
|
2229
2314
|
}
|
|
2230
2315
|
|
|
2231
2316
|
// 执行流水线
|
|
2232
|
-
await runUrlTask({
|
|
2317
|
+
const urlResult = await runUrlTask({
|
|
2233
2318
|
watchUrl: parsed.watchUrl,
|
|
2234
2319
|
platform: parsed.platform,
|
|
2235
2320
|
pkey: parsed.pkey,
|
|
@@ -2247,6 +2332,13 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
|
|
|
2247
2332
|
whisperAvailable,
|
|
2248
2333
|
});
|
|
2249
2334
|
|
|
2335
|
+
// 生成标准报告 JSON(与 Excel 模式格式一致)
|
|
2336
|
+
if (urlResult) {
|
|
2337
|
+
const config = { steps, max_retries: opts.retry, retry_delay: opts.retryDelay, concurrency: 1, force: opts.force || false };
|
|
2338
|
+
generateReport([urlResult], config, parsed.platform);
|
|
2339
|
+
printReportSummary([urlResult]);
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2250
2342
|
process.exit(0);
|
|
2251
2343
|
}
|
|
2252
2344
|
|
|
@@ -2349,7 +2441,7 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
|
|
|
2349
2441
|
}
|
|
2350
2442
|
|
|
2351
2443
|
// 执行流水线
|
|
2352
|
-
await runInputTask({
|
|
2444
|
+
const inputResult = await runInputTask({
|
|
2353
2445
|
inputPath,
|
|
2354
2446
|
stem,
|
|
2355
2447
|
sheetName,
|
|
@@ -2364,6 +2456,13 @@ if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.
|
|
|
2364
2456
|
fileInfo,
|
|
2365
2457
|
});
|
|
2366
2458
|
|
|
2459
|
+
// 生成标准报告 JSON(与 Excel 模式格式一致)
|
|
2460
|
+
if (inputResult) {
|
|
2461
|
+
const config = { steps, max_retries: opts.retry, retry_delay: opts.retryDelay, concurrency: 1, force: opts.force || false };
|
|
2462
|
+
generateReport([inputResult], config, sheetName);
|
|
2463
|
+
printReportSummary([inputResult]);
|
|
2464
|
+
}
|
|
2465
|
+
|
|
2367
2466
|
process.exit(0);
|
|
2368
2467
|
}
|
|
2369
2468
|
|