video-pipeline 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +168 -0
- package/CHANGELOG.md +98 -0
- package/LICENSE +21 -0
- package/README.md +589 -0
- package/package.json +67 -0
- package/process_videos.js +2010 -0
|
@@ -0,0 +1,2010 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* 视频下载、转码、文本识别、AI分析一体化流程脚本 (Node.js 版)
|
|
4
|
+
*
|
|
5
|
+
* 用法:
|
|
6
|
+
* node process_videos.js --concurrency 3 --retry 3
|
|
7
|
+
* node process_videos.js --sheet "YouTube视频" --concurrency 2
|
|
8
|
+
* node process_videos.js --sheet "普诺赛中文站" --id 427
|
|
9
|
+
* node process_videos.js --step download
|
|
10
|
+
* node process_videos.js --dry-run
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
// ============================== 依赖 ==============================
|
|
14
|
+
import dotenv from 'dotenv';
|
|
15
|
+
import fs from 'fs';
|
|
16
|
+
import path from 'path';
|
|
17
|
+
import os from 'os';
|
|
18
|
+
import { spawn, execSync, execFile } from 'child_process';
|
|
19
|
+
import readline from 'readline';
|
|
20
|
+
import XLSX from 'xlsx';
|
|
21
|
+
import pLimit from 'p-limit';
|
|
22
|
+
import { fileURLToPath } from 'url';
|
|
23
|
+
import { program } from 'commander';
|
|
24
|
+
import { select, input } from '@inquirer/prompts';
|
|
25
|
+
|
|
26
|
+
// --env-file 需在 dotenv 加载前解析
|
|
27
|
+
let _dotenvPath = '.env';
|
|
28
|
+
const _envFileIdx = process.argv.indexOf('--env-file');
|
|
29
|
+
if (_envFileIdx !== -1 && _envFileIdx + 1 < process.argv.length) {
|
|
30
|
+
_dotenvPath = process.argv[_envFileIdx + 1];
|
|
31
|
+
}
|
|
32
|
+
dotenv.config({ path: _dotenvPath });
|
|
33
|
+
|
|
34
|
+
// ============================== 路径配置 ==============================
|
|
35
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
36
|
+
const __dirname = path.dirname(__filename);
|
|
37
|
+
const BASE_DIR = path.resolve(__dirname);
|
|
38
|
+
|
|
39
|
+
function envPath(key, defaultValue) {
|
|
40
|
+
const val = process.env[key] || defaultValue;
|
|
41
|
+
const p = path.resolve(val);
|
|
42
|
+
return path.isAbsolute(val) ? p : path.resolve(BASE_DIR, val);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
let EXCEL_FILE = envPath('EXCEL_FILE', 'data/export_2026-06-10_split.xlsx');
|
|
46
|
+
const DOWNLOADS_DIR = envPath('DOWNLOADS_DIR', 'downloads');
|
|
47
|
+
const TRANSCODED_DIR = envPath('TRANSCODED_DIR', 'transcoded');
|
|
48
|
+
const COOKIES_DIR = envPath('COOKIES_DIR', 'cookies');
|
|
49
|
+
const REPORTS_DIR = envPath('REPORTS_DIR', 'reports');
|
|
50
|
+
|
|
51
|
+
const YTDLP = process.env.YTDLP || 'yt-dlp';
|
|
52
|
+
const FFMPEG = process.env.FFMPEG || 'ffmpeg';
|
|
53
|
+
const FFPROBE = process.env.FFPROBE || 'ffprobe';
|
|
54
|
+
const WHISPER_BACKEND = process.env.WHISPER_BACKEND || 'service';
|
|
55
|
+
const WHISPER_SERVICE = process.env.WHISPER_SERVICE || 'http://localhost:9588';
|
|
56
|
+
const WHISPER_MODEL = process.env.WHISPER_MODEL || 'base';
|
|
57
|
+
const WHISPER_DEVICE = process.env.WHISPER_DEVICE || 'cpu';
|
|
58
|
+
const WHISPER_LANGUAGE = process.env.WHISPER_LANGUAGE || '';
|
|
59
|
+
const WHISPER_SERVICE_MODEL = process.env.WHISPER_SERVICE_MODEL || '';
|
|
60
|
+
let _SERVICE_MODEL_LOADED = null;
|
|
61
|
+
|
|
62
|
+
const TRANSCODE_EXT = process.env.TRANSCODE_EXT || '.wav';
|
|
63
|
+
const FFMPEG_TRANSCODE_ARGS = (process.env.TRANSCODE_ARGS || '-ar 16000 -ac 1 -c:a pcm_s16le').split(/\s+/).filter(Boolean);
|
|
64
|
+
|
|
65
|
+
// ============================== Excel 字段映射 ==============================
|
|
66
|
+
const COL_ID = process.env.COL_ID || 'extra.id';
|
|
67
|
+
const COL_TITLE = process.env.COL_TITLE || 'title';
|
|
68
|
+
const COL_CONTENT = process.env.COL_CONTENT || 'content';
|
|
69
|
+
const COL_KEYWORDS = process.env.COL_KEYWORDS || 'keywords';
|
|
70
|
+
const COL_TENCENTVID = process.env.COL_TENCENTVID || 'extra.tencentVid';
|
|
71
|
+
const COL_BILIBILIBVID = process.env.COL_BILIBILIBVID || 'extra.bilibiliBvid';
|
|
72
|
+
const COL_YOUTUBEID = process.env.COL_YOUTUBEID || 'extra.youtubeId';
|
|
73
|
+
const COL_YOUKUID = process.env.COL_YOUKUID || 'extra.youkuId';
|
|
74
|
+
|
|
75
|
+
// ============================== 平台配置 ==============================
|
|
76
|
+
const PLATFORM_COL_MAP = {
|
|
77
|
+
tencentVid: COL_TENCENTVID,
|
|
78
|
+
bilibiliBvid: COL_BILIBILIBVID,
|
|
79
|
+
youtubeId: COL_YOUTUBEID,
|
|
80
|
+
youkuId: COL_YOUKUID,
|
|
81
|
+
};
|
|
82
|
+
|
|
83
|
+
const PLATFORM_PRIORITY = (process.env.PLATFORM_PRIORITY || 'bilibiliBvid,youtubeId,tencentVid,youkuId')
|
|
84
|
+
.split(',').map(s => s.trim()).filter(Boolean);
|
|
85
|
+
|
|
86
|
+
const _VIDEO_SHEETS_RAW = process.env.VIDEO_SHEETS || '';
|
|
87
|
+
const VIDEO_SHEETS = _VIDEO_SHEETS_RAW
|
|
88
|
+
? _VIDEO_SHEETS_RAW.split(',').map(s => s.trim()).filter(Boolean)
|
|
89
|
+
: [];
|
|
90
|
+
|
|
91
|
+
const _PKEY_ENV_PREFIX = {
|
|
92
|
+
tencentVid: 'TENCENT',
|
|
93
|
+
bilibiliBvid: 'BILIBILI',
|
|
94
|
+
youtubeId: 'YOUTUBE',
|
|
95
|
+
youkuId: 'YOUKU',
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
function buildPlatformConfig() {
|
|
99
|
+
const config = {};
|
|
100
|
+
for (const pkey of PLATFORM_PRIORITY) {
|
|
101
|
+
const prefix = _PKEY_ENV_PREFIX[pkey] || pkey.toUpperCase();
|
|
102
|
+
const cfg = {
|
|
103
|
+
field: PLATFORM_COL_MAP[pkey] || `extra.${pkey}`,
|
|
104
|
+
url_tpl: process.env[`${prefix}_URL_TPL`] || '',
|
|
105
|
+
};
|
|
106
|
+
|
|
107
|
+
// Cookie
|
|
108
|
+
const cfb = process.env[`${prefix}_COOKIES_FROM_BROWSER`] || '';
|
|
109
|
+
const cookieFile = process.env[`${prefix}_COOKIE_FILE`] || '';
|
|
110
|
+
cfg.cookies_from_browser = cfb;
|
|
111
|
+
cfg.cookie_file = cookieFile ? path.resolve(BASE_DIR, cookieFile) : null;
|
|
112
|
+
|
|
113
|
+
// Proxy
|
|
114
|
+
const proxy = process.env[`${prefix}_PROXY`] || '';
|
|
115
|
+
if (proxy) cfg.proxy = proxy;
|
|
116
|
+
|
|
117
|
+
// Extra headers
|
|
118
|
+
const ua = process.env[`${prefix}_USER_AGENT`] || '';
|
|
119
|
+
const extraHeaders = [];
|
|
120
|
+
if (ua) extraHeaders.push('--user-agent', ua);
|
|
121
|
+
if (pkey === 'bilibiliBvid') {
|
|
122
|
+
const referer = process.env[`${prefix}_REFERER`] || '';
|
|
123
|
+
if (referer) extraHeaders.push('--add-header', `Referer:${referer}`);
|
|
124
|
+
}
|
|
125
|
+
if (ua || (pkey === 'bilibiliBvid' && process.env[`${prefix}_REFERER`])) {
|
|
126
|
+
extraHeaders.push('--add-header', 'Accept-Language:zh,en;q=0.9');
|
|
127
|
+
}
|
|
128
|
+
if (extraHeaders.length) cfg.extra_headers = extraHeaders;
|
|
129
|
+
|
|
130
|
+
// Concurrent fragments
|
|
131
|
+
const cf = process.env[`${prefix}_CONCURRENT_FRAGMENTS`] || '';
|
|
132
|
+
if (cf) cfg.concurrent_fragments = parseInt(cf, 10);
|
|
133
|
+
|
|
134
|
+
// Extra args (YouTube)
|
|
135
|
+
if (pkey === 'youtubeId') {
|
|
136
|
+
const jsRt = process.env[`${prefix}_JS_RUNTIMES`] || '';
|
|
137
|
+
const rc = process.env[`${prefix}_REMOTE_COMPONENTS`] || '';
|
|
138
|
+
const extraArgs = [];
|
|
139
|
+
if (jsRt) extraArgs.push('--js-runtimes', jsRt);
|
|
140
|
+
if (rc) extraArgs.push('--remote-components', rc);
|
|
141
|
+
if (extraArgs.length) cfg.extra_args = extraArgs;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// Format
|
|
145
|
+
const fmt = process.env[`${prefix}_FORMAT`] || '';
|
|
146
|
+
if (fmt) cfg.format = fmt;
|
|
147
|
+
|
|
148
|
+
config[pkey] = cfg;
|
|
149
|
+
}
|
|
150
|
+
return config;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
const PLATFORM_CONFIG = buildPlatformConfig();
|
|
154
|
+
|
|
155
|
+
// ============================== 日志 ==============================
|
|
156
|
+
function logInfo(msg) {
|
|
157
|
+
console.log(`${timestamp()} [INFO] ${msg}`);
|
|
158
|
+
}
|
|
159
|
+
function logWarn(msg) {
|
|
160
|
+
console.log(`${timestamp()} [WARN] ${msg}`);
|
|
161
|
+
}
|
|
162
|
+
function logError(msg) {
|
|
163
|
+
console.log(`${timestamp()} [ERROR] ${msg}`);
|
|
164
|
+
}
|
|
165
|
+
function timestamp() {
|
|
166
|
+
return new Date().toTimeString().slice(0, 8);
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// ============================== 锁 / 并发控制 ==============================
|
|
170
|
+
let _printLock = false;
|
|
171
|
+
const _printQueue = [];
|
|
172
|
+
function printLock(fn) {
|
|
173
|
+
return new Promise(resolve => {
|
|
174
|
+
_printQueue.push(async () => {
|
|
175
|
+
_printLock = true;
|
|
176
|
+
try { fn(); } finally { _printLock = false; }
|
|
177
|
+
resolve();
|
|
178
|
+
});
|
|
179
|
+
if (_printQueue.length === 1) processQueue();
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
async function processQueue() {
|
|
183
|
+
while (_printQueue.length) {
|
|
184
|
+
await _printQueue[0]();
|
|
185
|
+
_printQueue.shift();
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// 简化:Node.js 单线程,简单场景下不需要锁
|
|
190
|
+
function lockedPrint(s) {
|
|
191
|
+
console.log(s);
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// ============================== 进度追踪 ==============================
|
|
195
|
+
class OverallProgress {
|
|
196
|
+
constructor(total) {
|
|
197
|
+
this.total = total;
|
|
198
|
+
this.completed = 0;
|
|
199
|
+
this.success = 0;
|
|
200
|
+
this.failed = 0;
|
|
201
|
+
this.partial = 0;
|
|
202
|
+
this.noVideo = 0;
|
|
203
|
+
}
|
|
204
|
+
addResult(status) {
|
|
205
|
+
this.completed++;
|
|
206
|
+
if (status === 'success') this.success++;
|
|
207
|
+
else if (status === 'failed') this.failed++;
|
|
208
|
+
else if (status === 'partial') this.partial++;
|
|
209
|
+
else if (status === 'no_video') this.noVideo++;
|
|
210
|
+
}
|
|
211
|
+
summaryLine() {
|
|
212
|
+
const pct = this.total ? (this.completed / this.total * 100).toFixed(1) : '0.0';
|
|
213
|
+
return `[总进度 ${this.completed}/${this.total} (${pct}%)] 成功:${this.success} 失败:${this.failed} 部分:${this.partial} 无视频:${this.noVideo}`;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// ============================== 数据结构 ==============================
|
|
218
|
+
class StepResult {
|
|
219
|
+
constructor(status = 'skipped', file = null, error = null, retriesUsed = 0) {
|
|
220
|
+
this.status = status;
|
|
221
|
+
this.file = file;
|
|
222
|
+
this.error = error;
|
|
223
|
+
this.retries_used = retriesUsed;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
class TaskResult {
|
|
228
|
+
constructor(sheet, idVal, title, platform = null, videoUrl = null, stem = '') {
|
|
229
|
+
this.sheet = sheet;
|
|
230
|
+
this.id_val = idVal;
|
|
231
|
+
this.title = title;
|
|
232
|
+
this.platform = platform;
|
|
233
|
+
this.video_url = videoUrl;
|
|
234
|
+
this.stem = stem;
|
|
235
|
+
this.download = new StepResult('skipped');
|
|
236
|
+
this.transcode = new StepResult('skipped');
|
|
237
|
+
this.transcribe = new StepResult('skipped');
|
|
238
|
+
this.analyze = new StepResult('skipped');
|
|
239
|
+
this.overall_status = 'pending';
|
|
240
|
+
this.error = null;
|
|
241
|
+
}
|
|
242
|
+
toJSON() {
|
|
243
|
+
return {
|
|
244
|
+
sheet: this.sheet,
|
|
245
|
+
id_val: this.id_val,
|
|
246
|
+
title: this.title,
|
|
247
|
+
platform: this.platform,
|
|
248
|
+
video_url: this.video_url,
|
|
249
|
+
stem: this.stem,
|
|
250
|
+
download: { ...this.download },
|
|
251
|
+
transcode: { ...this.transcode },
|
|
252
|
+
transcribe: { ...this.transcribe },
|
|
253
|
+
analyze: { ...this.analyze },
|
|
254
|
+
overall_status: this.overall_status,
|
|
255
|
+
error: this.error,
|
|
256
|
+
};
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// ============================== 工具函数 ==============================
|
|
261
|
+
function safeFilename(name) {
|
|
262
|
+
return String(name).replace(/[\\/:*?"<>|]/g, '_').trim();
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
function readExcelSheet(sheetName) {
|
|
266
|
+
const wb = XLSX.readFile(EXCEL_FILE);
|
|
267
|
+
if (!wb.SheetNames.includes(sheetName)) {
|
|
268
|
+
throw new Error(`Sheet "${sheetName}" not found in ${EXCEL_FILE}`);
|
|
269
|
+
}
|
|
270
|
+
const ws = wb.Sheets[sheetName];
|
|
271
|
+
return XLSX.utils.sheet_to_json(ws);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
function writeExcelCell(sheetName, rowIndex, colName, value) {
|
|
275
|
+
// rowIndex is 0-based in the sheet data
|
|
276
|
+
const wb = XLSX.readFile(EXCEL_FILE);
|
|
277
|
+
if (!wb.SheetNames.includes(sheetName)) {
|
|
278
|
+
logWarn(`Sheet [${sheetName}] not found, skip write`);
|
|
279
|
+
return false;
|
|
280
|
+
}
|
|
281
|
+
const ws = wb.Sheets[sheetName];
|
|
282
|
+
|
|
283
|
+
// Convert to AOA to find column index
|
|
284
|
+
const aoa = XLSX.utils.sheet_to_json(ws, { header: 1 });
|
|
285
|
+
const headers = aoa[0];
|
|
286
|
+
const colIdx = headers.indexOf(colName);
|
|
287
|
+
if (colIdx === -1) {
|
|
288
|
+
logWarn(`[${sheetName}] column "${colName}" not found, skip write`);
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
// Ensure the row exists
|
|
293
|
+
while (aoa.length <= rowIndex + 1) {
|
|
294
|
+
aoa.push([]);
|
|
295
|
+
}
|
|
296
|
+
aoa[rowIndex + 1][colIdx] = value;
|
|
297
|
+
|
|
298
|
+
// Rebuild sheet
|
|
299
|
+
const newWs = XLSX.utils.aoa_to_sheet(aoa);
|
|
300
|
+
wb.Sheets[sheetName] = newWs;
|
|
301
|
+
XLSX.writeFile(wb, EXCEL_FILE);
|
|
302
|
+
return true;
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
function getVideoId(row) {
|
|
306
|
+
for (const pkey of PLATFORM_PRIORITY) {
|
|
307
|
+
const cfg = PLATFORM_CONFIG[pkey];
|
|
308
|
+
const val = row[cfg.field];
|
|
309
|
+
if (val != null && String(val).trim() !== '') {
|
|
310
|
+
return { pkey, vid: String(val).trim() };
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
return { pkey: null, vid: null };
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
function buildUrl(pkey, vid) {
|
|
317
|
+
const tpl = PLATFORM_CONFIG[pkey]?.url_tpl || '';
|
|
318
|
+
return tpl.replace(`{${pkey}}`, vid);
|
|
319
|
+
}
|
|
320
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
321
|
+
// URL 解析(--url 模式)
|
|
322
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
323
|
+
|
|
324
|
+
const URL_PLATFORM_MAP = [
|
|
325
|
+
{
|
|
326
|
+
platform: 'bilibili',
|
|
327
|
+
pkey: 'bilibiliBvid',
|
|
328
|
+
patterns: [
|
|
329
|
+
/bilibili\.com\/video\/(BV[a-zA-Z0-9]{10})/,
|
|
330
|
+
/b23\.tv\/([a-zA-Z0-9]+)/,
|
|
331
|
+
/player\.bilibili\.com\/player\.html\?[^"'\s]*\baid=(\d+)/,
|
|
332
|
+
],
|
|
333
|
+
},
|
|
334
|
+
{
|
|
335
|
+
platform: 'youtube',
|
|
336
|
+
pkey: 'youtubeId',
|
|
337
|
+
patterns: [
|
|
338
|
+
/(?:youtube\.com\/(?:watch\?v=|embed\/|shorts\/|live\/)|youtu\.be\/)([a-zA-Z0-9_-]{11})/,
|
|
339
|
+
],
|
|
340
|
+
},
|
|
341
|
+
{
|
|
342
|
+
platform: 'tencent',
|
|
343
|
+
pkey: 'tencentVid',
|
|
344
|
+
patterns: [
|
|
345
|
+
/v\.qq\.com\/x\/page\/([a-zA-Z0-9]+)\.html/,
|
|
346
|
+
/v\.qq\.com\/x\/cover\/[^/]+\/([a-zA-Z0-9]+)\.html/,
|
|
347
|
+
/[?&]vid=([a-zA-Z0-9]+)/,
|
|
348
|
+
],
|
|
349
|
+
},
|
|
350
|
+
{
|
|
351
|
+
platform: 'youku',
|
|
352
|
+
pkey: 'youkuId',
|
|
353
|
+
patterns: [
|
|
354
|
+
/v\.youku\.com\/v_show\/id_([a-zA-Z0-9=]+)\.html/,
|
|
355
|
+
],
|
|
356
|
+
},
|
|
357
|
+
];
|
|
358
|
+
|
|
359
|
+
function parseUrl(url) {
|
|
360
|
+
if (!url || typeof url !== 'string') return null;
|
|
361
|
+
url = url.trim();
|
|
362
|
+
|
|
363
|
+
// 提取 iframe src
|
|
364
|
+
const iframeMatch = url.match(/src=["']([^"']+)["']/);
|
|
365
|
+
if (iframeMatch) url = iframeMatch[1];
|
|
366
|
+
|
|
367
|
+
for (const entry of URL_PLATFORM_MAP) {
|
|
368
|
+
for (const re of entry.patterns) {
|
|
369
|
+
const m = url.match(re);
|
|
370
|
+
if (m && m[1]) {
|
|
371
|
+
return {
|
|
372
|
+
platform: entry.platform,
|
|
373
|
+
pkey: entry.pkey,
|
|
374
|
+
videoId: m[1],
|
|
375
|
+
watchUrl: url,
|
|
376
|
+
};
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
return null;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
// ============================== stem 去重 ==============================
|
|
385
|
+
function safeId(val) {
|
|
386
|
+
try {
|
|
387
|
+
return safeFilename(String(Math.floor(Number(val))));
|
|
388
|
+
} catch {
|
|
389
|
+
return safeFilename(String(val));
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
function precomputeStems(rows, sheetName) {
|
|
394
|
+
if (!rows.length) return;
|
|
395
|
+
// Calculate stems for each row
|
|
396
|
+
const stems = rows.map(row => {
|
|
397
|
+
const eid = row[COL_ID];
|
|
398
|
+
if (eid != null && String(eid).trim() !== '') {
|
|
399
|
+
return safeId(eid);
|
|
400
|
+
}
|
|
401
|
+
return safeFilename(String(row[COL_TITLE] || 'unknown'));
|
|
402
|
+
});
|
|
403
|
+
|
|
404
|
+
// Resolve duplicates
|
|
405
|
+
const finalStems = [...stems];
|
|
406
|
+
const count1 = {};
|
|
407
|
+
for (const s of finalStems) count1[s] = (count1[s] || 0) + 1;
|
|
408
|
+
|
|
409
|
+
for (let i = 0; i < finalStems.length; i++) {
|
|
410
|
+
if (count1[finalStems[i]] > 1) {
|
|
411
|
+
const title = safeFilename(String(rows[i][COL_TITLE] || ''));
|
|
412
|
+
finalStems[i] = title ? `${finalStems[i]}_${title}` : finalStems[i];
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
const count2 = {};
|
|
417
|
+
for (const s of finalStems) count2[s] = (count2[s] || 0) + 1;
|
|
418
|
+
|
|
419
|
+
for (let i = 0; i < finalStems.length; i++) {
|
|
420
|
+
if (count2[finalStems[i]] > 1) {
|
|
421
|
+
const { pkey, vid } = getVideoId(rows[i]);
|
|
422
|
+
if (pkey && vid) {
|
|
423
|
+
finalStems[i] = `${finalStems[i]}_${safeFilename(vid)}`;
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Cache on rows (using _stemCache keyed by sheetName + original index)
|
|
429
|
+
rows.forEach((row, i) => {
|
|
430
|
+
if (!row._stemCache) row._stemCache = {};
|
|
431
|
+
row._stemCache[sheetName] = finalStems[i];
|
|
432
|
+
});
|
|
433
|
+
}
|
|
434
|
+
async function resolveUrlConflict(proposedPath) {
|
|
435
|
+
if (!fs.existsSync(proposedPath)) return { action: 'proceed', path: proposedPath };
|
|
436
|
+
|
|
437
|
+
const stem = path.basename(proposedPath, path.extname(proposedPath));
|
|
438
|
+
const dir = path.dirname(proposedPath);
|
|
439
|
+
const ext = path.extname(proposedPath);
|
|
440
|
+
|
|
441
|
+
console.log(`\n⚠️ 文件已存在: ${c('yellow', proposedPath)}`);
|
|
442
|
+
|
|
443
|
+
const action = await select({
|
|
444
|
+
message: '如何处理?',
|
|
445
|
+
choices: [
|
|
446
|
+
{ name: '覆盖 (重新下载替换)', value: 'overwrite' },
|
|
447
|
+
{ name: '跳过 (保留已有文件)', value: 'skip' },
|
|
448
|
+
{ name: '自定义文件名', value: 'custom' },
|
|
449
|
+
],
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
if (action === 'skip') return { action: 'skip', path: null };
|
|
453
|
+
if (action === 'overwrite') return { action: 'proceed', path: proposedPath };
|
|
454
|
+
|
|
455
|
+
// custom name
|
|
456
|
+
const customName = await input({
|
|
457
|
+
message: '输入自定义文件名 (不含扩展名):',
|
|
458
|
+
default: stem,
|
|
459
|
+
});
|
|
460
|
+
if (!customName.trim()) {
|
|
461
|
+
console.log(c('yellow', '文件名不能为空,使用默认名称'));
|
|
462
|
+
return { action: 'proceed', path: proposedPath };
|
|
463
|
+
}
|
|
464
|
+
const newPath = path.join(dir, `${customName}${ext}`);
|
|
465
|
+
return resolveUrlConflict(newPath);
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
function stemName(row, sheetName = '') {
|
|
470
|
+
if (sheetName && row._stemCache && row._stemCache[sheetName]) {
|
|
471
|
+
return row._stemCache[sheetName];
|
|
472
|
+
}
|
|
473
|
+
const eid = row[COL_ID];
|
|
474
|
+
if (eid != null && String(eid).trim() !== '') {
|
|
475
|
+
return safeId(eid);
|
|
476
|
+
}
|
|
477
|
+
return safeFilename(String(row[COL_TITLE] || 'unknown'));
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
function rowKey(row) {
|
|
481
|
+
const eid = row[COL_ID];
|
|
482
|
+
if (eid != null && String(eid).trim() !== '') {
|
|
483
|
+
return safeId(eid);
|
|
484
|
+
}
|
|
485
|
+
return String(row[COL_TITLE] || 'unknown');
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
function findDownloadedFile(dlDir, stem) {
|
|
489
|
+
if (!fs.existsSync(dlDir)) return null;
|
|
490
|
+
const entries = fs.readdirSync(dlDir);
|
|
491
|
+
for (const name of entries) {
|
|
492
|
+
const parsed = path.parse(name);
|
|
493
|
+
if (parsed.name === stem) {
|
|
494
|
+
return path.join(dlDir, name);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return null;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// ============================== 重试机制 ==============================
|
|
501
|
+
function isRetryable(err) {
|
|
502
|
+
// Non-retryable keywords in stderr
|
|
503
|
+
if (err.stderr) {
|
|
504
|
+
const lower = err.stderr.toLowerCase();
|
|
505
|
+
const nonRetry = ['404', '403', '401', 'unavailable', 'private video',
|
|
506
|
+
'video is not available', 'this video is no longer', 'removed',
|
|
507
|
+
'deleted', 'invalid url', 'unsupported url'];
|
|
508
|
+
for (const kw of nonRetry) {
|
|
509
|
+
if (lower.includes(kw)) return false;
|
|
510
|
+
}
|
|
511
|
+
return true;
|
|
512
|
+
}
|
|
513
|
+
// Timeout / connection errors are retryable
|
|
514
|
+
if (err.code === 'ETIMEDOUT' || err.code === 'ECONNRESET' || err.code === 'ECONNREFUSED') return true;
|
|
515
|
+
if (err.name === 'TimeoutError' || err.message?.includes('timeout')) return true;
|
|
516
|
+
return false;
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
async function sleep(ms) {
|
|
520
|
+
return new Promise(resolve => setTimeout(resolve, ms));
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
async function retryCall(fn, maxRetries = 0, baseDelay = 5, taskLabel = '') {
|
|
524
|
+
let lastError = null;
|
|
525
|
+
for (let attempt = 0; attempt <= maxRetries; attempt++) {
|
|
526
|
+
try {
|
|
527
|
+
const result = await fn();
|
|
528
|
+
return { result, retriesUsed: attempt, error: null };
|
|
529
|
+
} catch (err) {
|
|
530
|
+
lastError = String(err.message || err).slice(0, 500);
|
|
531
|
+
if (!isRetryable(err)) throw err;
|
|
532
|
+
if (attempt < maxRetries) {
|
|
533
|
+
const delay = baseDelay * Math.pow(2, attempt);
|
|
534
|
+
logWarn(`[${taskLabel}] 第 ${attempt + 1}/${maxRetries + 1} 次尝试失败,${delay}s 后重试: ${err.message}`);
|
|
535
|
+
await sleep(delay * 1000);
|
|
536
|
+
} else {
|
|
537
|
+
throw err;
|
|
538
|
+
}
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
throw new Error(lastError || 'unknown');
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
// ============================== 工具存在性检查 ==============================
|
|
545
|
+
function which(cmd) {
|
|
546
|
+
try {
|
|
547
|
+
if (process.platform === 'win32') {
|
|
548
|
+
execSync(`where ${cmd}`, { stdio: 'pipe' });
|
|
549
|
+
} else {
|
|
550
|
+
execSync(`which ${cmd}`, { stdio: 'pipe' });
|
|
551
|
+
}
|
|
552
|
+
return true;
|
|
553
|
+
} catch {
|
|
554
|
+
return false;
|
|
555
|
+
}
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
async function checkWhisperAvailable() {
|
|
559
|
+
if (WHISPER_BACKEND === 'local') {
|
|
560
|
+
try {
|
|
561
|
+
execSync('whisper --help', { stdio: 'pipe', timeout: 5000 });
|
|
562
|
+
return true;
|
|
563
|
+
} catch {
|
|
564
|
+
logError('本地 whisper CLI 不可用,请确认: pip install openai-whisper');
|
|
565
|
+
return false;
|
|
566
|
+
}
|
|
567
|
+
} else {
|
|
568
|
+
try {
|
|
569
|
+
const resp = await fetch(WHISPER_SERVICE, { signal: AbortSignal.timeout(3000) });
|
|
570
|
+
return true;
|
|
571
|
+
} catch {
|
|
572
|
+
return false;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
function checkEnvironment(steps) {
|
|
578
|
+
const result = {
|
|
579
|
+
ytdlp: true, ffmpeg: true, ffprobe: true,
|
|
580
|
+
whisper: true, ai: true, allOk: true, issues: []
|
|
581
|
+
};
|
|
582
|
+
// We can only do sync checks here, async ones deferred
|
|
583
|
+
if (steps.includes('download') && !which(YTDLP)) {
|
|
584
|
+
result.ytdlp = false;
|
|
585
|
+
result.allOk = false;
|
|
586
|
+
result.issues.push(`yt-dlp not available (${YTDLP})`);
|
|
587
|
+
}
|
|
588
|
+
if (steps.includes('transcode')) {
|
|
589
|
+
if (!which(FFMPEG)) {
|
|
590
|
+
result.ffmpeg = false;
|
|
591
|
+
result.allOk = false;
|
|
592
|
+
result.issues.push(`ffmpeg not available (${FFMPEG})`);
|
|
593
|
+
}
|
|
594
|
+
if (!which(FFPROBE)) {
|
|
595
|
+
result.ffprobe = false;
|
|
596
|
+
result.allOk = false;
|
|
597
|
+
result.issues.push(`ffprobe not available (${FFPROBE})`);
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
// whisper check requires async, skip in sync version
|
|
601
|
+
// (will be checked async before execution)
|
|
602
|
+
if (steps.includes('analyze')) {
|
|
603
|
+
const aiEnabled = (process.env.AI_ENABLED || 'true').toLowerCase() === 'true';
|
|
604
|
+
const aiKey = process.env.AI_API_KEY || '';
|
|
605
|
+
const aiUrl = process.env.AI_BASE_URL || '';
|
|
606
|
+
if (!aiEnabled) {
|
|
607
|
+
result.ai = false;
|
|
608
|
+
result.allOk = false;
|
|
609
|
+
result.issues.push('AI analysis disabled (AI_ENABLED=false)');
|
|
610
|
+
} else if (!aiKey || !aiUrl) {
|
|
611
|
+
result.ai = false;
|
|
612
|
+
result.allOk = false;
|
|
613
|
+
result.issues.push('AI config incomplete (missing AI_API_KEY / AI_BASE_URL)');
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
return result;
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
async function checkEnvironmentAsync(steps) {
|
|
620
|
+
const result = checkEnvironment(steps);
|
|
621
|
+
if (steps.includes('transcribe')) {
|
|
622
|
+
const ok = await checkWhisperAvailable();
|
|
623
|
+
if (!ok) {
|
|
624
|
+
result.whisper = false;
|
|
625
|
+
result.allOk = false;
|
|
626
|
+
const backend = WHISPER_BACKEND === 'local' ? 'local CLI' : `service ${WHISPER_SERVICE}`;
|
|
627
|
+
result.issues.push(`whisper not available (${backend})`);
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
return result;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// ============================== 进度显示辅助 ==============================
|
|
634
|
+
function spawnWithTimeout(cmd, args, timeout, options = {}) {
|
|
635
|
+
const { onProgress, ...spawnOpts } = options;
|
|
636
|
+
return new Promise((resolve, reject) => {
|
|
637
|
+
const child = spawn(cmd, args, {
|
|
638
|
+
...spawnOpts,
|
|
639
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
640
|
+
});
|
|
641
|
+
let stdout = '';
|
|
642
|
+
let stderr = '';
|
|
643
|
+
const timer = setTimeout(() => {
|
|
644
|
+
child.kill();
|
|
645
|
+
reject(Object.assign(new Error(`Timeout after ${timeout}s`), { name: 'TimeoutError', code: 'ETIMEDOUT' }));
|
|
646
|
+
}, timeout * 1000);
|
|
647
|
+
|
|
648
|
+
if (onProgress && child.stderr) {
|
|
649
|
+
const rl = readline.createInterface({ input: child.stderr, crlfDelay: Infinity });
|
|
650
|
+
rl.on('line', line => {
|
|
651
|
+
stderr += line + '\n';
|
|
652
|
+
try { onProgress(line); } catch {}
|
|
653
|
+
});
|
|
654
|
+
child.stderr.on('end', () => rl.close());
|
|
655
|
+
} else {
|
|
656
|
+
child.stdout.on('data', d => { stdout += d.toString(); });
|
|
657
|
+
child.stderr.on('data', d => { stderr += d.toString(); });
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
child.on('close', code => {
|
|
661
|
+
clearTimeout(timer);
|
|
662
|
+
if (!onProgress) {
|
|
663
|
+
// Without onProgress, stderr was captured by the 'data' handler
|
|
664
|
+
}
|
|
665
|
+
if (code === 0) resolve({ stdout, stderr });
|
|
666
|
+
else reject(Object.assign(new Error(`Exit code ${code}`), { code, stderr }));
|
|
667
|
+
});
|
|
668
|
+
child.on('error', err => {
|
|
669
|
+
clearTimeout(timer);
|
|
670
|
+
reject(err);
|
|
671
|
+
});
|
|
672
|
+
});
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
// ============================== AI 分析 ==============================
|
|
676
|
+
async function stepAnalyze(text, maxRetries, retryDelay, timeout = 300) {
|
|
677
|
+
if (!text || !text.trim()) {
|
|
678
|
+
return { text: null, retries: 0, error: 'content empty, skip AI analysis' };
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
const apiKey = process.env.AI_API_KEY || '';
|
|
682
|
+
const baseUrl = (process.env.AI_BASE_URL || '').replace(/\/$/, '');
|
|
683
|
+
const model = process.env.AI_MODEL || '';
|
|
684
|
+
const promptTpl = process.env.AI_PROMPT_TPL || '帮我归纳总结一下Keywords,尽可能全一点,这是内容:{content}';
|
|
685
|
+
const aiTimeout = parseInt(process.env.AI_TIMEOUT || String(timeout), 10);
|
|
686
|
+
|
|
687
|
+
if (!apiKey || !baseUrl || !model) {
|
|
688
|
+
return { text: null, retries: 0, error: 'AI config incomplete' };
|
|
689
|
+
}
|
|
690
|
+
|
|
691
|
+
const prompt = promptTpl.replace('{content}', text);
|
|
692
|
+
const apiUrl = `${baseUrl}/chat/completions`;
|
|
693
|
+
const payload = JSON.stringify({
|
|
694
|
+
model,
|
|
695
|
+
messages: [{ role: 'user', content: prompt }],
|
|
696
|
+
temperature: 0.3,
|
|
697
|
+
});
|
|
698
|
+
|
|
699
|
+
let lastErr = null;
|
|
700
|
+
const maxAttempts = maxRetries + 1;
|
|
701
|
+
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
702
|
+
try {
|
|
703
|
+
const controller = new AbortController();
|
|
704
|
+
const timer = setTimeout(() => controller.abort(), aiTimeout * 1000);
|
|
705
|
+
const resp = await fetch(apiUrl, {
|
|
706
|
+
method: 'POST',
|
|
707
|
+
headers: {
|
|
708
|
+
'Authorization': `Bearer ${apiKey}`,
|
|
709
|
+
'Content-Type': 'application/json',
|
|
710
|
+
},
|
|
711
|
+
body: payload,
|
|
712
|
+
signal: controller.signal,
|
|
713
|
+
});
|
|
714
|
+
clearTimeout(timer);
|
|
715
|
+
if (!resp.ok) {
|
|
716
|
+
throw new Error(`HTTP ${resp.status}: ${await resp.text().catch(() => '')}`);
|
|
717
|
+
}
|
|
718
|
+
const body = await resp.json();
|
|
719
|
+
const content = body.choices?.[0]?.message?.content || '';
|
|
720
|
+
return { text: content.trim(), retries: attempt, error: null };
|
|
721
|
+
} catch (e) {
|
|
722
|
+
lastErr = String(e.message).slice(0, 500);
|
|
723
|
+
if (attempt < maxAttempts - 1) {
|
|
724
|
+
const delay = Math.min(retryDelay * Math.pow(2, attempt), 30);
|
|
725
|
+
lockedPrint(` [analyze] attempt ${attempt + 1} failed: ${lastErr.slice(0, 100)}, retrying in ${delay}s...`);
|
|
726
|
+
await sleep(delay * 1000);
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
}
|
|
730
|
+
return { text: null, retries: maxAttempts, error: `AI analysis failed after ${maxAttempts} retries: ${lastErr}` };
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// ============================== 清理残留文件 ==============================
|
|
734
|
+
function cleanupPartials(dlDir, stem) {
|
|
735
|
+
const patterns = [
|
|
736
|
+
new RegExp(`^${stem.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\..*\\.part$`),
|
|
737
|
+
new RegExp(`^${stem.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')}\\..*\\.ytdl$`),
|
|
738
|
+
];
|
|
739
|
+
try {
|
|
740
|
+
const files = fs.readdirSync(dlDir);
|
|
741
|
+
for (const f of files) {
|
|
742
|
+
const fullPath = path.join(dlDir, f);
|
|
743
|
+
try {
|
|
744
|
+
const stat = fs.statSync(fullPath);
|
|
745
|
+
if (!stat.isFile()) continue;
|
|
746
|
+
} catch { continue; }
|
|
747
|
+
if (f === `${stem}.part` || f === `${stem}.ytdl` || patterns.some(p => p.test(f))) {
|
|
748
|
+
try { fs.unlinkSync(fullPath); lockedPrint(` [${stem}] cleaned partial: ${f}`); } catch {}
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
} catch {}
|
|
752
|
+
}
|
|
753
|
+
|
|
754
|
+
// ============================== 下载 ==============================
|
|
755
|
+
function parseYtdlpProgress(line) {
|
|
756
|
+
// Parse yt-dlp progress line like "[download] 12.3% of ~50.00MiB at 2.5MiB/s ETA 00:15"
|
|
757
|
+
const m = line.match(/\[download\]\s+([\d.]+%)\s+of\s+~?([\d.]+[KMG]iB)\s+at\s+([\d.]+[KMG]iB\/s)\s+ETA\s+([\d:]+)/);
|
|
758
|
+
if (m) return `DL ${m[1]} of ${m[2]} @ ${m[3]} ETA ${m[4]}`;
|
|
759
|
+
// Also try: "[download] 100% of 50.00MiB"
|
|
760
|
+
const m2 = line.match(/\[download\]\s+([\d.]+%)\s+of\s+([\d.]+[KMG]iB)/);
|
|
761
|
+
if (m2) return `DL ${m2[1]} of ${m2[2]}`;
|
|
762
|
+
return null;
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
async function stepDownload(row, sheetName, maxRetries, retryDelay, force, timeout = 600) {
|
|
766
|
+
const { pkey, vid } = getVideoId(row);
|
|
767
|
+
const stem = stemName(row, sheetName);
|
|
768
|
+
|
|
769
|
+
if (!pkey) {
|
|
770
|
+
return { file: null, retries: 0, error: 'no video ID' };
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
const dlDir = path.join(DOWNLOADS_DIR, sheetName);
|
|
774
|
+
fs.mkdirSync(dlDir, { recursive: true });
|
|
775
|
+
|
|
776
|
+
if (!force) {
|
|
777
|
+
const existing = findDownloadedFile(dlDir, stem);
|
|
778
|
+
if (existing) {
|
|
779
|
+
lockedPrint(` [${stem}] exists ${path.basename(existing)}, skip download`);
|
|
780
|
+
return { file: existing, retries: 0, error: null };
|
|
781
|
+
}
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
const videoUrl = buildUrl(pkey, vid);
|
|
785
|
+
lockedPrint(` [${stem}] start download (platform=${pkey})`);
|
|
786
|
+
lockedPrint(` [${stem}] ${videoUrl}`);
|
|
787
|
+
|
|
788
|
+
const cfg = PLATFORM_CONFIG[pkey];
|
|
789
|
+
const args = [
|
|
790
|
+
videoUrl,
|
|
791
|
+
'-o', path.join(dlDir, `${stem}.%(ext)s`),
|
|
792
|
+
'--no-playlist',
|
|
793
|
+
'--newline',
|
|
794
|
+
'--merge-output-format', 'mp4',
|
|
795
|
+
'-f', cfg.format || 'bestvideo+bestaudio/best',
|
|
796
|
+
];
|
|
797
|
+
|
|
798
|
+
if (cfg.concurrent_fragments) {
|
|
799
|
+
args.push('--concurrent-fragments', String(cfg.concurrent_fragments));
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
// Cookies
|
|
803
|
+
if (cfg.cookies_from_browser) {
|
|
804
|
+
args.push('--cookies-from-browser', cfg.cookies_from_browser);
|
|
805
|
+
} else if (cfg.cookie_file && fs.existsSync(cfg.cookie_file)) {
|
|
806
|
+
args.push('--cookies', cfg.cookie_file);
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
// Proxy
|
|
810
|
+
const extraEnv = {};
|
|
811
|
+
if (cfg.proxy) {
|
|
812
|
+
args.push('--proxy', cfg.proxy);
|
|
813
|
+
extraEnv.HTTPS_PROXY = cfg.proxy;
|
|
814
|
+
extraEnv.HTTP_PROXY = cfg.proxy;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
if (cfg.extra_headers) args.push(...cfg.extra_headers);
|
|
818
|
+
if (cfg.extra_args) args.push(...cfg.extra_args);
|
|
819
|
+
|
|
820
|
+
const env = { ...process.env, ...extraEnv };
|
|
821
|
+
|
|
822
|
+
let lastProgress = '';
|
|
823
|
+
async function doDownload() {
|
|
824
|
+
const { stdout, stderr } = await spawnWithTimeout(YTDLP, args, timeout, {
|
|
825
|
+
env,
|
|
826
|
+
onProgress: line => {
|
|
827
|
+
const prog = parseYtdlpProgress(line);
|
|
828
|
+
if (prog && prog !== lastProgress) {
|
|
829
|
+
lastProgress = prog;
|
|
830
|
+
lockedPrint(` [${stem}] ${prog}`);
|
|
831
|
+
}
|
|
832
|
+
},
|
|
833
|
+
});
|
|
834
|
+
// spawnWithTimeout already rejects on non-zero exit code, no need for stderr check
|
|
835
|
+
}
|
|
836
|
+
|
|
837
|
+
try {
|
|
838
|
+
const { result, retriesUsed, error } = await retryCall(doDownload, maxRetries, retryDelay, stem);
|
|
839
|
+
} catch (e) {
|
|
840
|
+
logError(`[${stem}] yt-dlp download failed: ${(e.stderr || e.message).slice(-2000)}`);
|
|
841
|
+
return { file: null, retries: maxRetries, error: (e.stderr || e.message).slice(0, 500) };
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
const downloaded = findDownloadedFile(dlDir, stem);
|
|
845
|
+
if (downloaded) {
|
|
846
|
+
lockedPrint(` [${stem}] download done -> ${path.basename(downloaded)}`);
|
|
847
|
+
return { file: downloaded, retries: 0, error: null };
|
|
848
|
+
}
|
|
849
|
+
logError(`[${stem}] file not found after download`);
|
|
850
|
+
return { file: null, retries: 0, error: 'file not found after download' };
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// ============================== 转码 ==============================
|
|
854
|
+
function getDuration(filepath) {
|
|
855
|
+
try {
|
|
856
|
+
const result = execSync(
|
|
857
|
+
`${FFPROBE} -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "${filepath}"`,
|
|
858
|
+
{ encoding: 'utf-8', timeout: 30000 }
|
|
859
|
+
);
|
|
860
|
+
const dur = parseFloat(result.trim());
|
|
861
|
+
return isNaN(dur) ? null : dur;
|
|
862
|
+
} catch {
|
|
863
|
+
return null;
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
async function stepTranscode(srcFile, sheetName, maxRetries, retryDelay, force, timeout = 600) {
|
|
868
|
+
const tcDir = path.join(TRANSCODED_DIR, sheetName);
|
|
869
|
+
fs.mkdirSync(tcDir, { recursive: true });
|
|
870
|
+
const stem = path.parse(srcFile).name;
|
|
871
|
+
const outFile = path.join(tcDir, stem + TRANSCODE_EXT);
|
|
872
|
+
|
|
873
|
+
if (!force && fs.existsSync(outFile) && fs.statSync(outFile).size > 0) {
|
|
874
|
+
// Check if source is newer than output
|
|
875
|
+
const srcMtime = fs.statSync(srcFile).mtimeMs;
|
|
876
|
+
const outMtime = fs.statSync(outFile).mtimeMs;
|
|
877
|
+
if (srcMtime > outMtime) {
|
|
878
|
+
lockedPrint(` [${stem}] source updated (re-downloaded), re-transcoding`);
|
|
879
|
+
} else {
|
|
880
|
+
lockedPrint(` [${stem}] transcode file exists, skip`);
|
|
881
|
+
return { file: outFile, retries: 0, error: null };
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
|
|
885
|
+
lockedPrint(` [${stem}] start transcode -> ${path.basename(outFile)}`);
|
|
886
|
+
|
|
887
|
+
const totalDur = getDuration(srcFile);
|
|
888
|
+
|
|
889
|
+
async function doTranscode() {
|
|
890
|
+
const args = ['-y', '-i', srcFile, ...FFMPEG_TRANSCODE_ARGS, outFile];
|
|
891
|
+
const child = spawn(FFMPEG, args, { stdio: ['ignore', 'pipe', 'pipe'] });
|
|
892
|
+
let stderr = '';
|
|
893
|
+
let lastProgress = '';
|
|
894
|
+
const startTime = Date.now();
|
|
895
|
+
|
|
896
|
+
child.stderr.on('data', d => {
|
|
897
|
+
stderr += d.toString();
|
|
898
|
+
// Parse ffmpeg progress
|
|
899
|
+
const match = stderr.match(/time=(\d+):(\d+):(\d+\.?\d*)/g);
|
|
900
|
+
if (match) {
|
|
901
|
+
const last = match[match.length - 1];
|
|
902
|
+
const m = last.match(/time=(\d+):(\d+):(\d+\.?\d*)/);
|
|
903
|
+
if (m) {
|
|
904
|
+
const elapsed = parseInt(m[1]) * 3600 + parseInt(m[2]) * 60 + parseFloat(m[3]);
|
|
905
|
+
let progress;
|
|
906
|
+
if (totalDur && totalDur > 0) {
|
|
907
|
+
const pct = Math.min(100, (elapsed / totalDur * 100)).toFixed(1);
|
|
908
|
+
progress = `${pct}% (${Math.floor(elapsed)}s/${Math.floor(totalDur)}s)`;
|
|
909
|
+
} else {
|
|
910
|
+
progress = `${elapsed.toFixed(1)}s`;
|
|
911
|
+
}
|
|
912
|
+
if (progress !== lastProgress) {
|
|
913
|
+
lockedPrint(` [${stem}] transcode: ${progress}`);
|
|
914
|
+
lastProgress = progress;
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
}
|
|
918
|
+
});
|
|
919
|
+
|
|
920
|
+
return new Promise((resolve, reject) => {
|
|
921
|
+
const timer = setTimeout(() => {
|
|
922
|
+
child.kill();
|
|
923
|
+
reject(Object.assign(new Error(`Transcode timeout after ${timeout}s`), { name: 'TimeoutError' }));
|
|
924
|
+
}, timeout * 1000);
|
|
925
|
+
|
|
926
|
+
child.on('close', code => {
|
|
927
|
+
clearTimeout(timer);
|
|
928
|
+
if (code === 0) resolve();
|
|
929
|
+
else reject(Object.assign(new Error(`ffmpeg exit code ${code}`), { stderr }));
|
|
930
|
+
});
|
|
931
|
+
child.on('error', err => {
|
|
932
|
+
clearTimeout(timer);
|
|
933
|
+
reject(err);
|
|
934
|
+
});
|
|
935
|
+
});
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
try {
|
|
939
|
+
await retryCall(doTranscode, maxRetries, retryDelay, stem);
|
|
940
|
+
lockedPrint(` [${stem}] transcode done`);
|
|
941
|
+
return { file: outFile, retries: 0, error: null };
|
|
942
|
+
} catch (e) {
|
|
943
|
+
logError(`[${stem}] ffmpeg transcode failed: ${(e.stderr || e.message).slice(-2000)}`);
|
|
944
|
+
return { file: null, retries: maxRetries, error: (e.stderr || e.message).slice(0, 500) };
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
|
|
948
|
+
// ============================== 识别 ==============================
|
|
949
|
+
async function stepTranscribe(audioFile, maxRetries, retryDelay, timeout = 600) {
|
|
950
|
+
const stem = path.parse(audioFile).name;
|
|
951
|
+
|
|
952
|
+
const whisperOk = await checkWhisperAvailable();
|
|
953
|
+
if (!whisperOk) {
|
|
954
|
+
const backend = WHISPER_BACKEND === 'local' ? 'local CLI' : WHISPER_SERVICE;
|
|
955
|
+
logWarn(`[${stem}] whisper not available (${backend})`);
|
|
956
|
+
return { text: null, retries: 0, error: `whisper not available (${backend})` };
|
|
957
|
+
}
|
|
958
|
+
|
|
959
|
+
const fileSizeMB = (fs.statSync(audioFile).size / (1024 * 1024)).toFixed(1);
|
|
960
|
+
if (WHISPER_BACKEND === 'local') {
|
|
961
|
+
const langLabel = WHISPER_LANGUAGE || 'auto';
|
|
962
|
+
lockedPrint(` [${stem}] start transcribe [local(${WHISPER_MODEL}/${langLabel})] (${fileSizeMB}MB)...`);
|
|
963
|
+
} else {
|
|
964
|
+
const modelLabel = WHISPER_SERVICE_MODEL || WHISPER_MODEL || '(server default)';
|
|
965
|
+
lockedPrint(` [${stem}] start transcribe [service(${modelLabel})] (${fileSizeMB}MB)...`);
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
if (WHISPER_BACKEND === 'local') {
|
|
969
|
+
return transcribeLocal(audioFile, stem, maxRetries, retryDelay);
|
|
970
|
+
} else {
|
|
971
|
+
return transcribeService(audioFile, stem, maxRetries, retryDelay, timeout);
|
|
972
|
+
}
|
|
973
|
+
}
|
|
974
|
+
|
|
975
|
+
async function transcribeLocal(audioFile, stem, maxRetries, retryDelay, timeout = 600) {
|
|
976
|
+
const startTime = Date.now();
|
|
977
|
+
const outDir = path.dirname(audioFile);
|
|
978
|
+
|
|
979
|
+
async function doTranscribe() {
|
|
980
|
+
const args = [
|
|
981
|
+
audioFile,
|
|
982
|
+
'--model', WHISPER_MODEL,
|
|
983
|
+
'--device', WHISPER_DEVICE,
|
|
984
|
+
];
|
|
985
|
+
if (WHISPER_LANGUAGE) args.push('--language', WHISPER_LANGUAGE);
|
|
986
|
+
args.push('--output_format', 'txt', '--output_dir', outDir);
|
|
987
|
+
|
|
988
|
+
const { stderr } = await spawnWithTimeout('whisper', args, timeout);
|
|
989
|
+
// whisper writes output to {stem}.txt
|
|
990
|
+
const outTxt = path.join(outDir, `${stem}.txt`);
|
|
991
|
+
if (!fs.existsSync(outTxt)) {
|
|
992
|
+
throw new Error('whisper output file not generated');
|
|
993
|
+
}
|
|
994
|
+
return fs.readFileSync(outTxt, 'utf-8').trim();
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
try {
|
|
998
|
+
const { result: text, retriesUsed, error } = await retryCall(doTranscribe, maxRetries, retryDelay, stem);
|
|
999
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(0);
|
|
1000
|
+
if (error) return { text: null, retries: retriesUsed, error };
|
|
1001
|
+
lockedPrint(` [${stem}] transcribe done (${elapsed}s, ${text.length} chars)`);
|
|
1002
|
+
return { text, retries: 0, error: null };
|
|
1003
|
+
} catch (e) {
|
|
1004
|
+
logError(`[${stem}] local whisper transcribe failed: ${e.message}`);
|
|
1005
|
+
return { text: null, retries: maxRetries, error: String(e.message).slice(0, 500) };
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
async function transcribeService(audioFile, stem, maxRetries, retryDelay, timeout = 600) {
|
|
1010
|
+
const startTime = Date.now();
|
|
1011
|
+
let done = false;
|
|
1012
|
+
const progressInterval = setInterval(() => {
|
|
1013
|
+
if (!done) {
|
|
1014
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(0);
|
|
1015
|
+
lockedPrint(` [${stem}] transcribing... ${elapsed}s`);
|
|
1016
|
+
}
|
|
1017
|
+
}, 5000);
|
|
1018
|
+
|
|
1019
|
+
async function doTranscribe() {
|
|
1020
|
+
try {
|
|
1021
|
+
// Switch model if needed
|
|
1022
|
+
if (WHISPER_SERVICE_MODEL && WHISPER_SERVICE_MODEL !== _SERVICE_MODEL_LOADED) {
|
|
1023
|
+
lockedPrint(` [${stem}] switch model: ${WHISPER_SERVICE_MODEL}`);
|
|
1024
|
+
const loadForm = new FormData();
|
|
1025
|
+
loadForm.append('model', WHISPER_SERVICE_MODEL);
|
|
1026
|
+
const loadResp = await fetch(`${WHISPER_SERVICE}/load`, {
|
|
1027
|
+
method: 'POST',
|
|
1028
|
+
body: loadForm,
|
|
1029
|
+
signal: AbortSignal.timeout(30000),
|
|
1030
|
+
});
|
|
1031
|
+
if (!loadResp.ok) throw new Error(`/load failed: HTTP ${loadResp.status}`);
|
|
1032
|
+
_SERVICE_MODEL_LOADED = WHISPER_SERVICE_MODEL;
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
// Run inference
|
|
1036
|
+
const fileStream = fs.createReadStream(audioFile);
|
|
1037
|
+
const fileStat = fs.statSync(audioFile);
|
|
1038
|
+
const form = new FormData();
|
|
1039
|
+
// Use ReadStream directly - Node.js fetch supports it natively for FormData
|
|
1040
|
+
form.append('file', fileStream, path.basename(audioFile));
|
|
1041
|
+
form.append('temperature', '0.0');
|
|
1042
|
+
form.append('temperature_inc', '0.2');
|
|
1043
|
+
form.append('response_format', 'json');
|
|
1044
|
+
|
|
1045
|
+
const controller = new AbortController();
|
|
1046
|
+
const timer = setTimeout(() => controller.abort(), timeout * 1000);
|
|
1047
|
+
const resp = await fetch(`${WHISPER_SERVICE}/inference`, {
|
|
1048
|
+
method: 'POST',
|
|
1049
|
+
body: form,
|
|
1050
|
+
signal: controller.signal,
|
|
1051
|
+
});
|
|
1052
|
+
clearTimeout(timer);
|
|
1053
|
+
if (!resp.ok) throw new Error(`/inference failed: HTTP ${resp.status}`);
|
|
1054
|
+
const data = await resp.json();
|
|
1055
|
+
const text = (data.text || '').trim();
|
|
1056
|
+
if (!text) throw new Error('whisper returned empty text');
|
|
1057
|
+
return text;
|
|
1058
|
+
} finally {
|
|
1059
|
+
// Don't set done here - let the outer finally handle it
|
|
1060
|
+
}
|
|
1061
|
+
}
|
|
1062
|
+
|
|
1063
|
+
try {
|
|
1064
|
+
const { result: text, retriesUsed, error } = await retryCall(doTranscribe, maxRetries, retryDelay, stem);
|
|
1065
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(0);
|
|
1066
|
+
if (error) return { text: null, retries: retriesUsed, error };
|
|
1067
|
+
lockedPrint(` [${stem}] transcribe done (${elapsed}s, ${text.length} chars)`);
|
|
1068
|
+
return { text, retries: 0, error: null };
|
|
1069
|
+
} catch (e) {
|
|
1070
|
+
logError(`[${stem}] whisper transcribe failed: ${e.message}`);
|
|
1071
|
+
return { text: null, retries: maxRetries, error: String(e.message).slice(0, 500) };
|
|
1072
|
+
} finally {
|
|
1073
|
+
done = true;
|
|
1074
|
+
clearInterval(progressInterval);
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
// ============================== Excel 批量写回 ==============================
|
|
1079
|
+
function writeAllContentsToExcel(results, keywordsDict = null) {
|
|
1080
|
+
if (!results.length) return;
|
|
1081
|
+
|
|
1082
|
+
// Collect content updates
|
|
1083
|
+
const updates = new Map(); // key: "sheet|id"
|
|
1084
|
+
for (const r of results) {
|
|
1085
|
+
if (r.transcribe.status === 'success' && r.transcribe.file) {
|
|
1086
|
+
const text = r.transcribe.file;
|
|
1087
|
+
if (text.trim()) {
|
|
1088
|
+
updates.set(`${r.sheet}|${r.id_val}`, text);
|
|
1089
|
+
}
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
if (!updates.size && !keywordsDict?.size) return;
|
|
1094
|
+
|
|
1095
|
+
logInfo(`write ${updates.size} content + ${keywordsDict?.size || 0} keywords to Excel...`);
|
|
1096
|
+
const wb = XLSX.readFile(EXCEL_FILE, { cellFormula: true, cellDates: true });
|
|
1097
|
+
|
|
1098
|
+
/**
|
|
1099
|
+
* Write text values to a specific column, matching rows by id or title.
|
|
1100
|
+
* Uses direct cell writes to preserve existing formatting.
|
|
1101
|
+
*/
|
|
1102
|
+
function writeColumn(sheetName, colName, entries) {
|
|
1103
|
+
if (!wb.SheetNames.includes(sheetName)) return;
|
|
1104
|
+
const ws = wb.Sheets[sheetName];
|
|
1105
|
+
|
|
1106
|
+
// Read header row only (via AOA for detection - we don't rebuild the sheet)
|
|
1107
|
+
const aoa = XLSX.utils.sheet_to_json(ws, { header: 1 });
|
|
1108
|
+
const headers = aoa[0];
|
|
1109
|
+
const targetCol = headers.indexOf(colName);
|
|
1110
|
+
const idCol = headers.indexOf(COL_ID);
|
|
1111
|
+
const titleCol = headers.indexOf(COL_TITLE);
|
|
1112
|
+
if (targetCol === -1) return;
|
|
1113
|
+
|
|
1114
|
+
for (const [key, text] of entries) {
|
|
1115
|
+
for (let r = 1; r < aoa.length; r++) {
|
|
1116
|
+
const row = aoa[r];
|
|
1117
|
+
let matched = false;
|
|
1118
|
+
if (idCol !== -1 && row[idCol] != null) {
|
|
1119
|
+
try {
|
|
1120
|
+
if (String(Math.floor(Number(row[idCol]))) === String(key)) matched = true;
|
|
1121
|
+
} catch { }
|
|
1122
|
+
}
|
|
1123
|
+
if (!matched && titleCol !== -1 && String(row[titleCol]) === String(key)) matched = true;
|
|
1124
|
+
if (matched) {
|
|
1125
|
+
// Write directly to cell to preserve formatting of other cells
|
|
1126
|
+
const cellRef = XLSX.utils.encode_cell({ r, c: targetCol });
|
|
1127
|
+
ws[cellRef] = { t: 's', v: text };
|
|
1128
|
+
logInfo(`[${sheetName}/${key}] ${colName} written (${text.length} chars)`);
|
|
1129
|
+
break;
|
|
1130
|
+
}
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
}
|
|
1134
|
+
|
|
1135
|
+
// Write content column
|
|
1136
|
+
writeColumn(null, COL_CONTENT, updates); // null sheetName means iterate all sheets
|
|
1137
|
+
for (const [sheetName, rowsObj] of groupBySheetMap(updates)) {
|
|
1138
|
+
writeColumn(sheetName, COL_CONTENT, Object.entries(rowsObj));
|
|
1139
|
+
}
|
|
1140
|
+
|
|
1141
|
+
// Write keywords column
|
|
1142
|
+
if (keywordsDict && keywordsDict.size) {
|
|
1143
|
+
const kwBySheet = new Map(); // sheetName -> [[key, text], ...]
|
|
1144
|
+
for (const [key, kwText] of keywordsDict) {
|
|
1145
|
+
const [sheetName, kwKey] = key.split('|');
|
|
1146
|
+
if (!kwBySheet.has(sheetName)) kwBySheet.set(sheetName, []);
|
|
1147
|
+
kwBySheet.get(sheetName).push([kwKey, kwText]);
|
|
1148
|
+
}
|
|
1149
|
+
for (const [sheetName, entries] of kwBySheet) {
|
|
1150
|
+
writeColumn(sheetName, COL_KEYWORDS, entries);
|
|
1151
|
+
}
|
|
1152
|
+
}
|
|
1153
|
+
|
|
1154
|
+
XLSX.writeFile(wb, EXCEL_FILE, { cellDates: true });
|
|
1155
|
+
logInfo('Excel write done');
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
function groupBySheetMap(updates) {
|
|
1159
|
+
const result = {};
|
|
1160
|
+
for (const [compositeKey, text] of updates) {
|
|
1161
|
+
const [sheetName, key] = compositeKey.split('|');
|
|
1162
|
+
if (!result[sheetName]) result[sheetName] = {};
|
|
1163
|
+
result[sheetName][key] = text;
|
|
1164
|
+
}
|
|
1165
|
+
return result;
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
// ============================== 报告 ==============================
|
|
1169
|
+
function generateReport(results, config) {
|
|
1170
|
+
fs.mkdirSync(REPORTS_DIR, { recursive: true });
|
|
1171
|
+
const ts = new Date().toISOString().replace(/[-:T]/g, '').slice(0, 15).replace(/(\d{8})(\d{6})/, '$1_$2');
|
|
1172
|
+
const reportFile = path.join(REPORTS_DIR, `report_${ts}.json`);
|
|
1173
|
+
|
|
1174
|
+
const success = results.filter(r => r.overall_status === 'success').length;
|
|
1175
|
+
const partial = results.filter(r => r.overall_status === 'partial').length;
|
|
1176
|
+
const failed = results.filter(r => r.overall_status === 'failed').length;
|
|
1177
|
+
const noVideo = results.filter(r => r.overall_status === 'no_video').length;
|
|
1178
|
+
|
|
1179
|
+
const report = {
|
|
1180
|
+
timestamp: new Date().toISOString(),
|
|
1181
|
+
config,
|
|
1182
|
+
summary: { total: results.length, success, partial, failed, no_video: noVideo },
|
|
1183
|
+
items: results.map(r => r.toJSON()),
|
|
1184
|
+
failed_items: results.filter(r => r.overall_status === 'failed' || r.overall_status === 'partial')
|
|
1185
|
+
.map(r => ({
|
|
1186
|
+
sheet: r.sheet, id: r.id_val, title: r.title, stem: r.stem,
|
|
1187
|
+
error: r.error,
|
|
1188
|
+
download_error: r.download.error,
|
|
1189
|
+
transcode_error: r.transcode.error,
|
|
1190
|
+
transcribe_error: r.transcribe.error,
|
|
1191
|
+
})),
|
|
1192
|
+
};
|
|
1193
|
+
|
|
1194
|
+
fs.writeFileSync(reportFile, JSON.stringify(report, null, 2), 'utf-8');
|
|
1195
|
+
logInfo(`report generated: ${reportFile}`);
|
|
1196
|
+
return reportFile;
|
|
1197
|
+
}
|
|
1198
|
+
|
|
1199
|
+
function printReportSummary(results) {
|
|
1200
|
+
const success = results.filter(r => r.overall_status === 'success').length;
|
|
1201
|
+
const partial = results.filter(r => r.overall_status === 'partial').length;
|
|
1202
|
+
const failed = results.filter(r => r.overall_status === 'failed').length;
|
|
1203
|
+
const noVid = results.filter(r => r.overall_status === 'no_video').length;
|
|
1204
|
+
|
|
1205
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1206
|
+
console.log(` 执行摘要`);
|
|
1207
|
+
console.log(`${'='.repeat(60)}`);
|
|
1208
|
+
console.log(` 总计: ${results.length}`);
|
|
1209
|
+
console.log(` ✅ 成功: ${success}`);
|
|
1210
|
+
console.log(` ⚠️ 部分成功: ${partial}`);
|
|
1211
|
+
console.log(` ❌ 失败: ${failed}`);
|
|
1212
|
+
console.log(` ⏭️ 无视频ID: ${noVid}`);
|
|
1213
|
+
console.log(`${'='.repeat(60)}`);
|
|
1214
|
+
|
|
1215
|
+
const failures = results.filter(r => r.overall_status !== 'success');
|
|
1216
|
+
if (failures.length) {
|
|
1217
|
+
console.log(`\n失败/异常详情:`);
|
|
1218
|
+
for (const r of failures) {
|
|
1219
|
+
const icon = { partial: '⚠️', failed: '❌', no_video: '⏭️' }[r.overall_status] || '?';
|
|
1220
|
+
console.log(` ${icon} [${r.sheet}] ${r.id_val} (${(r.title || 'N/A').slice(0, 30)})`);
|
|
1221
|
+
if (r.error) console.log(` 错误: ${r.error.slice(0, 120)}`);
|
|
1222
|
+
if (r.download.status === 'failed') console.log(` 下载失败: ${(r.download.error || 'N/A').slice(0, 120)}`);
|
|
1223
|
+
if (r.transcode.status === 'failed') console.log(` 转码失败: ${(r.transcode.error || 'N/A').slice(0, 120)}`);
|
|
1224
|
+
if (r.transcribe.status === 'failed') console.log(` 识别失败: ${(r.transcribe.error || 'N/A').slice(0, 120)}`);
|
|
1225
|
+
}
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
// ============================== 单任务处理 ==============================
|
|
1230
|
+
async function processOneTask(row, sheetName, steps, maxRetries, retryDelay, force,
|
|
1231
|
+
whisperAvailable, positionLabel = '', downloadTimeout = 600, transcodeTimeout = 600,
|
|
1232
|
+
transcribeTimeout = 600, analyzeTimeout = 300) {
|
|
1233
|
+
|
|
1234
|
+
const { pkey, vid } = getVideoId(row);
|
|
1235
|
+
const stem = stemName(row, sheetName);
|
|
1236
|
+
const key = rowKey(row);
|
|
1237
|
+
const title = String(row[COL_TITLE] || '');
|
|
1238
|
+
const videoUrl = pkey ? buildUrl(pkey, vid) : null;
|
|
1239
|
+
|
|
1240
|
+
const result = new TaskResult(sheetName, key, title, pkey, videoUrl, stem);
|
|
1241
|
+
|
|
1242
|
+
const tag = positionLabel ? `${positionLabel} ` : '';
|
|
1243
|
+
lockedPrint(`${tag}[${stem}] start (sheet=${sheetName}, platform=${pkey || 'N/A'}, title=${title.slice(0, 40)})`);
|
|
1244
|
+
logInfo(`[${stem}] start (sheet=${sheetName}, platform=${pkey || 'N/A'})`);
|
|
1245
|
+
|
|
1246
|
+
// ── download ──
|
|
1247
|
+
let dlFile = null;
|
|
1248
|
+
if (steps.includes('download')) {
|
|
1249
|
+
if (!pkey) {
|
|
1250
|
+
result.download = new StepResult('skipped');
|
|
1251
|
+
result.overall_status = 'no_video';
|
|
1252
|
+
result.error = 'no video ID';
|
|
1253
|
+
return result;
|
|
1254
|
+
}
|
|
1255
|
+
try {
|
|
1256
|
+
const { file, retries, error } = await stepDownload(row, sheetName, maxRetries, retryDelay, force, downloadTimeout);
|
|
1257
|
+
dlFile = file;
|
|
1258
|
+
result.download = new StepResult(file ? 'success' : 'failed', file, error, retries);
|
|
1259
|
+
} catch (e) {
|
|
1260
|
+
result.download = new StepResult('failed', null, String(e.message).slice(0, 500), maxRetries);
|
|
1261
|
+
}
|
|
1262
|
+
if (!dlFile) {
|
|
1263
|
+
result.overall_status = 'failed';
|
|
1264
|
+
result.error = `download failed: ${result.download.error}`;
|
|
1265
|
+
return result;
|
|
1266
|
+
}
|
|
1267
|
+
} else {
|
|
1268
|
+
const dlDir = path.join(DOWNLOADS_DIR, sheetName);
|
|
1269
|
+
dlFile = findDownloadedFile(dlDir, stem);
|
|
1270
|
+
if (dlFile) {
|
|
1271
|
+
result.download = new StepResult('success', dlFile);
|
|
1272
|
+
} else {
|
|
1273
|
+
result.download = new StepResult('skipped');
|
|
1274
|
+
}
|
|
1275
|
+
}
|
|
1276
|
+
|
|
1277
|
+
// ── transcode ──
|
|
1278
|
+
let tcFile = null;
|
|
1279
|
+
if (steps.includes('transcode') && dlFile) {
|
|
1280
|
+
try {
|
|
1281
|
+
const { file, retries, error } = await stepTranscode(dlFile, sheetName, maxRetries, retryDelay, force, transcodeTimeout);
|
|
1282
|
+
tcFile = file;
|
|
1283
|
+
result.transcode = new StepResult(file ? 'success' : 'failed', file, error, retries);
|
|
1284
|
+
} catch (e) {
|
|
1285
|
+
result.transcode = new StepResult('failed', null, String(e.message).slice(0, 500), maxRetries);
|
|
1286
|
+
}
|
|
1287
|
+
if (!tcFile) {
|
|
1288
|
+
result.overall_status = 'partial';
|
|
1289
|
+
result.error = `download success but transcode failed: ${result.transcode.error}`;
|
|
1290
|
+
return result;
|
|
1291
|
+
}
|
|
1292
|
+
} else {
|
|
1293
|
+
const tcDir = path.join(TRANSCODED_DIR, sheetName);
|
|
1294
|
+
const tcPath = path.join(tcDir, stem + TRANSCODE_EXT);
|
|
1295
|
+
if (fs.existsSync(tcPath)) {
|
|
1296
|
+
result.transcode = new StepResult('success', tcPath);
|
|
1297
|
+
tcFile = tcPath;
|
|
1298
|
+
} else {
|
|
1299
|
+
result.transcode = new StepResult('skipped');
|
|
1300
|
+
}
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
// ── transcribe ──
|
|
1304
|
+
if (steps.includes('transcribe') && tcFile) {
|
|
1305
|
+
if (!whisperAvailable) {
|
|
1306
|
+
result.transcribe = new StepResult('failed', null, `whisper unreachable (${WHISPER_SERVICE})`);
|
|
1307
|
+
result.overall_status = 'partial';
|
|
1308
|
+
result.error = 'download+transcode success but whisper unreachable';
|
|
1309
|
+
return result;
|
|
1310
|
+
}
|
|
1311
|
+
try {
|
|
1312
|
+
const { text, retries, error } = await stepTranscribe(tcFile, maxRetries, retryDelay, transcribeTimeout);
|
|
1313
|
+
result.transcribe = new StepResult(text ? 'success' : 'failed', text, error, retries);
|
|
1314
|
+
if (!text) {
|
|
1315
|
+
result.overall_status = 'partial';
|
|
1316
|
+
result.error = `download+transcode success but transcribe failed: ${error}`;
|
|
1317
|
+
} else {
|
|
1318
|
+
result.overall_status = 'success';
|
|
1319
|
+
}
|
|
1320
|
+
} catch (e) {
|
|
1321
|
+
result.transcribe = new StepResult('failed', null, String(e.message).slice(0, 500), maxRetries);
|
|
1322
|
+
result.overall_status = 'partial';
|
|
1323
|
+
result.error = `download+transcode success but transcribe failed: ${e.message}`;
|
|
1324
|
+
}
|
|
1325
|
+
}
|
|
1326
|
+
|
|
1327
|
+
// ── AI analyze ──
|
|
1328
|
+
if (steps.includes('analyze') && result.transcribe.status === 'success') {
|
|
1329
|
+
const aiEnabled = (process.env.AI_ENABLED || 'true').toLowerCase() === 'true';
|
|
1330
|
+
if (aiEnabled) {
|
|
1331
|
+
const txt = result.transcribe.file;
|
|
1332
|
+
if (txt) {
|
|
1333
|
+
try {
|
|
1334
|
+
const { text: kw, retries, error } = await stepAnalyze(txt, maxRetries, retryDelay, analyzeTimeout);
|
|
1335
|
+
result.analyze = new StepResult(kw ? 'success' : 'failed', kw, error, retries);
|
|
1336
|
+
if (kw) {
|
|
1337
|
+
lockedPrint(` [${result.stem}] AI analysis done (${kw.length} chars)`);
|
|
1338
|
+
} else {
|
|
1339
|
+
lockedPrint(` [${result.stem}] AI analysis failed: ${error}`);
|
|
1340
|
+
}
|
|
1341
|
+
} catch (e) {
|
|
1342
|
+
result.analyze = new StepResult('failed', null, String(e.message).slice(0, 500), maxRetries);
|
|
1343
|
+
}
|
|
1344
|
+
} else {
|
|
1345
|
+
result.analyze = new StepResult('skipped', null, 'content empty');
|
|
1346
|
+
}
|
|
1347
|
+
} else {
|
|
1348
|
+
result.analyze = new StepResult('skipped');
|
|
1349
|
+
}
|
|
1350
|
+
} else if (steps.includes('analyze') && result.transcribe.status !== 'success') {
|
|
1351
|
+
result.analyze = new StepResult('skipped', null, 'transcribe not successful, skip AI analysis');
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
if (result.overall_status === 'pending') {
|
|
1355
|
+
result.overall_status = 'success';
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
return result;
|
|
1359
|
+
}
|
|
1360
|
+
|
|
1361
|
+
// ============================== 主控流程 ==============================
|
|
1362
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
1363
|
+
// URL 直链流水线(--url 模式)
|
|
1364
|
+
// ═══════════════════════════════════════════════════════════════════
|
|
1365
|
+
|
|
1366
|
+
async function runUrlTask(opts) {
|
|
1367
|
+
const {
|
|
1368
|
+
watchUrl, platform, pkey, videoId, stem, dlDir, steps,
|
|
1369
|
+
maxRetries, retryDelay, force,
|
|
1370
|
+
downloadTimeout, transcodeTimeout, transcribeTimeout, analyzeTimeout,
|
|
1371
|
+
whisperAvailable,
|
|
1372
|
+
} = opts;
|
|
1373
|
+
|
|
1374
|
+
const sheetName = platform;
|
|
1375
|
+
const platformField = PLATFORM_CONFIG[pkey]?.field || '';
|
|
1376
|
+
|
|
1377
|
+
// 构建合成 row(模拟 Excel 行结构)
|
|
1378
|
+
const syntheticRow = { _stemCache: {} };
|
|
1379
|
+
syntheticRow._stemCache[sheetName] = stem;
|
|
1380
|
+
if (platformField) syntheticRow[platformField] = videoId;
|
|
1381
|
+
if (COL_ID) syntheticRow[COL_ID] = videoId;
|
|
1382
|
+
if (COL_TITLE) syntheticRow[COL_TITLE] = videoId;
|
|
1383
|
+
|
|
1384
|
+
console.log(c('dim', '\n── 开始执行 ──\n'));
|
|
1385
|
+
|
|
1386
|
+
const result = await processOneTask(
|
|
1387
|
+
syntheticRow, sheetName, steps, maxRetries, retryDelay, force,
|
|
1388
|
+
whisperAvailable, '', downloadTimeout, transcodeTimeout,
|
|
1389
|
+
transcribeTimeout, analyzeTimeout,
|
|
1390
|
+
);
|
|
1391
|
+
|
|
1392
|
+
// ── 展示结果 ──
|
|
1393
|
+
console.log(c('dim', '\n── 结果 ──\n'));
|
|
1394
|
+
const successes = [];
|
|
1395
|
+
|
|
1396
|
+
if (result.download) {
|
|
1397
|
+
if (result.download.file && fs.existsSync(result.download.file)) {
|
|
1398
|
+
const size = (fs.statSync(result.download.file).size / 1024 / 1024).toFixed(1) + ' MB';
|
|
1399
|
+
console.log(` \uD83D\uDCE5 下载: ${c('green', result.download.file)} (${size})`);
|
|
1400
|
+
successes.push('download');
|
|
1401
|
+
} else if (result.download.status === 'skipped') {
|
|
1402
|
+
console.log(` \uD83D\uDCE5 下载: ${c('yellow', '已跳过 (文件已存在)')}`);
|
|
1403
|
+
successes.push('download');
|
|
1404
|
+
} else {
|
|
1405
|
+
console.log(` \uD83D\uDCE5 下载: ${c('red', '失败')} — ${result.download.error || ''}`);
|
|
1406
|
+
}
|
|
1407
|
+
}
|
|
1408
|
+
|
|
1409
|
+
if (result.transcode) {
|
|
1410
|
+
if (result.transcode.file && fs.existsSync(result.transcode.file)) {
|
|
1411
|
+
const size = (fs.statSync(result.transcode.file).size / 1024 / 1024).toFixed(1) + ' MB';
|
|
1412
|
+
console.log(` \uD83C\uDFB5 转码: ${c('green', result.transcode.file)} (${size})`);
|
|
1413
|
+
successes.push('transcode');
|
|
1414
|
+
} else if (result.transcode.status === 'skipped') {
|
|
1415
|
+
console.log(` \uD83C\uDFB5 转码: ${c('yellow', '已跳过 (文件已存在)')}`);
|
|
1416
|
+
successes.push('transcode');
|
|
1417
|
+
} else {
|
|
1418
|
+
console.log(` \uD83C\uDFB5 转码: ${c('red', '失败')} — ${result.transcode.error || ''}`);
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
if (result.transcribe) {
|
|
1423
|
+
// transcribe 的 file 字段存放的是文本内容
|
|
1424
|
+
const text = result.transcribe.file;
|
|
1425
|
+
if (text && typeof text === 'string') {
|
|
1426
|
+
console.log(` \uD83D\uDCDD 识别: ${c('green', text.length + ' \u5B57\u7B26')}`);
|
|
1427
|
+
successes.push('transcribe');
|
|
1428
|
+
} else if (result.transcribe.status === 'skipped') {
|
|
1429
|
+
console.log(` \uD83D\uDCDD 识别: ${c('yellow', '已跳过')}`);
|
|
1430
|
+
successes.push('transcribe');
|
|
1431
|
+
} else {
|
|
1432
|
+
console.log(` \uD83D\uDCDD 识别: ${c('red', '失败')} — ${result.transcribe.error || ''}`);
|
|
1433
|
+
}
|
|
1434
|
+
}
|
|
1435
|
+
|
|
1436
|
+
if (result.analyze) {
|
|
1437
|
+
// analyze 的 file 字段存放的是关键词文本
|
|
1438
|
+
const text = result.analyze.file;
|
|
1439
|
+
if (text && typeof text === 'string') {
|
|
1440
|
+
console.log(` \uD83E\uDD16 AI\u5206\u6790: ${c('green', text.length + ' \u5B57\u7B26')}`);
|
|
1441
|
+
successes.push('analyze');
|
|
1442
|
+
} else if (result.analyze.status === 'skipped') {
|
|
1443
|
+
console.log(` \uD83E\uDD16 AI\u5206\u6790: ${c('yellow', '已跳过')}`);
|
|
1444
|
+
} else {
|
|
1445
|
+
console.log(` \uD83E\uDD16 AI\u5206\u6790: ${c('red', '失败')} — ${result.analyze.error || ''}`);
|
|
1446
|
+
}
|
|
1447
|
+
}
|
|
1448
|
+
|
|
1449
|
+
// 保存文本结果
|
|
1450
|
+
const transcribeText = (result.transcribe && typeof result.transcribe.file === 'string') ? result.transcribe.file : '';
|
|
1451
|
+
const analyzeText = (result.analyze && typeof result.analyze.file === 'string') ? result.analyze.file : '';
|
|
1452
|
+
|
|
1453
|
+
if (transcribeText || analyzeText) {
|
|
1454
|
+
const outDir = path.join(REPORTS_DIR, 'url-tasks');
|
|
1455
|
+
fs.mkdirSync(outDir, { recursive: true });
|
|
1456
|
+
const outFile = path.join(outDir, `${stem}.txt`);
|
|
1457
|
+
const lines = [
|
|
1458
|
+
`URL: ${watchUrl}`,
|
|
1459
|
+
`\u5E73\u53F0: ${platform}`,
|
|
1460
|
+
`\u89C6\u9891ID: ${videoId}`,
|
|
1461
|
+
'', '='.repeat(60), '',
|
|
1462
|
+
];
|
|
1463
|
+
if (transcribeText) {
|
|
1464
|
+
lines.push('\u3010\u8BED\u97F3\u8BC6\u522B\u5185\u5BB9\u3011', '', transcribeText, '');
|
|
1465
|
+
}
|
|
1466
|
+
if (analyzeText) {
|
|
1467
|
+
lines.push('\u3010AI\u5173\u952E\u8BCD\u5206\u6790\u3011', '', analyzeText, '');
|
|
1468
|
+
}
|
|
1469
|
+
fs.writeFileSync(outFile, lines.join('\n'), 'utf-8');
|
|
1470
|
+
console.log(`\n \uD83D\uDCC4 \u7ED3\u679C\u5DF2\u4FDD\u5B58\u81F3: ${c('cyan', outFile)}`);
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
console.log(c('bold', c('green', `\n\uD83C\uDF89 \u5168\u90E8\u5B8C\u6210! (${successes.length}/${steps.length} \u6B65\u6210\u529F)\n`)));
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
async function run({
|
|
1477
|
+
targetSheet, targetId, steps, maxRetries, retryDelay,
|
|
1478
|
+
concurrency, force, dryRun, retryFailed,
|
|
1479
|
+
downloadTimeout, transcodeTimeout, transcribeTimeout, analyzeTimeout,
|
|
1480
|
+
}) {
|
|
1481
|
+
// ── 重跑失败模式 ──
|
|
1482
|
+
if (retryFailed) {
|
|
1483
|
+
return runFromReport(retryFailed, steps, maxRetries, retryDelay, concurrency, force, dryRun,
|
|
1484
|
+
downloadTimeout, transcodeTimeout, transcribeTimeout, analyzeTimeout);
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
// ── 构建任务列表 ──
|
|
1488
|
+
const sheets = targetSheet ? [targetSheet] : VIDEO_SHEETS;
|
|
1489
|
+
const tasks = [];
|
|
1490
|
+
for (const sheetName of sheets) {
|
|
1491
|
+
let rows = readExcelSheet(sheetName);
|
|
1492
|
+
if (targetId) {
|
|
1493
|
+
rows = rows.filter(row => {
|
|
1494
|
+
if (row[COL_ID] != null) {
|
|
1495
|
+
try {
|
|
1496
|
+
if (String(Math.floor(Number(row[COL_ID]))) === String(targetId)) return true;
|
|
1497
|
+
} catch { }
|
|
1498
|
+
}
|
|
1499
|
+
if (String(row[COL_TITLE]) === String(targetId)) return true;
|
|
1500
|
+
return false;
|
|
1501
|
+
});
|
|
1502
|
+
if (!rows.length) {
|
|
1503
|
+
logError(`Sheet [${sheetName}] no match for id/title = ${targetId}`);
|
|
1504
|
+
continue;
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
precomputeStems(rows, sheetName);
|
|
1508
|
+
for (const row of rows) {
|
|
1509
|
+
tasks.push({ row, sheetName });
|
|
1510
|
+
}
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
logInfo(`tasks: ${tasks.length}, concurrency: ${concurrency}, max retries: ${maxRetries}`);
|
|
1514
|
+
|
|
1515
|
+
// ── 工具/服务预检 ──
|
|
1516
|
+
const envCheck = await checkEnvironmentAsync(steps);
|
|
1517
|
+
if (!envCheck.allOk) {
|
|
1518
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1519
|
+
console.log(' ⚠️ 工具/服务预检:以下依赖不可用');
|
|
1520
|
+
console.log('='.repeat(60));
|
|
1521
|
+
for (const issue of envCheck.issues) {
|
|
1522
|
+
console.log(` • ${issue}`);
|
|
1523
|
+
}
|
|
1524
|
+
console.log('\n 涉及的步骤将失败。');
|
|
1525
|
+
if (!dryRun) {
|
|
1526
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
1527
|
+
const answer = await new Promise(resolve => {
|
|
1528
|
+
rl.question('\n 是否继续执行?(输入 yes 继续,其他任意键取消): ', ans => {
|
|
1529
|
+
rl.close();
|
|
1530
|
+
resolve(ans.trim().toLowerCase());
|
|
1531
|
+
});
|
|
1532
|
+
});
|
|
1533
|
+
if (answer !== 'yes') {
|
|
1534
|
+
logInfo('用户取消执行(工具不可用)');
|
|
1535
|
+
return;
|
|
1536
|
+
}
|
|
1537
|
+
}
|
|
1538
|
+
}
|
|
1539
|
+
|
|
1540
|
+
// ── 干跑模式 ──
|
|
1541
|
+
if (dryRun) {
|
|
1542
|
+
printDryRun(tasks, steps, envCheck);
|
|
1543
|
+
return;
|
|
1544
|
+
}
|
|
1545
|
+
|
|
1546
|
+
// ── 检测 whisper ──
|
|
1547
|
+
let whisperAvailable = false;
|
|
1548
|
+
if (steps.includes('transcribe')) {
|
|
1549
|
+
whisperAvailable = await checkWhisperAvailable();
|
|
1550
|
+
if (!whisperAvailable) {
|
|
1551
|
+
const backend = WHISPER_BACKEND === 'local' ? 'local CLI' : WHISPER_SERVICE;
|
|
1552
|
+
logWarn(`⚠️ whisper not available (${backend}), transcribe step will fail`);
|
|
1553
|
+
}
|
|
1554
|
+
}
|
|
1555
|
+
|
|
1556
|
+
// ── 并发执行 ──
|
|
1557
|
+
const results = [];
|
|
1558
|
+
const overall = new OverallProgress(tasks.length);
|
|
1559
|
+
const limit = pLimit(Math.max(1, concurrency));
|
|
1560
|
+
|
|
1561
|
+
const taskFns = tasks.map(({ row, sheetName }, idx) =>
|
|
1562
|
+
limit(async () => {
|
|
1563
|
+
const posLabel = `[${idx + 1}/${tasks.length}]`;
|
|
1564
|
+
let result;
|
|
1565
|
+
try {
|
|
1566
|
+
result = await processOneTask(row, sheetName, steps, maxRetries, retryDelay, force,
|
|
1567
|
+
whisperAvailable, posLabel, downloadTimeout, transcodeTimeout, transcribeTimeout, analyzeTimeout);
|
|
1568
|
+
} catch (e) {
|
|
1569
|
+
const stem = stemName(row, sheetName);
|
|
1570
|
+
logError(`[${stem}] unhandled error: ${e.message}`);
|
|
1571
|
+
result = new TaskResult(sheetName, rowKey(row), String(row[COL_TITLE] || ''), null, null, stem);
|
|
1572
|
+
result.overall_status = 'failed';
|
|
1573
|
+
result.error = `unhandled error: ${String(e.message).slice(0, 500)}`;
|
|
1574
|
+
}
|
|
1575
|
+
results.push(result);
|
|
1576
|
+
overall.addResult(result.overall_status);
|
|
1577
|
+
console.log(`\n${overall.summaryLine()}\n`);
|
|
1578
|
+
return result;
|
|
1579
|
+
})
|
|
1580
|
+
);
|
|
1581
|
+
|
|
1582
|
+
await Promise.all(taskFns);
|
|
1583
|
+
|
|
1584
|
+
// ── 批量写回 Excel ──
|
|
1585
|
+
if (steps.includes('transcribe')) {
|
|
1586
|
+
const kwMap = new Map();
|
|
1587
|
+
for (const r of results) {
|
|
1588
|
+
if (r.analyze.status === 'success' && r.analyze.file) {
|
|
1589
|
+
kwMap.set(`${r.sheet}|${r.id_val}`, r.analyze.file);
|
|
1590
|
+
}
|
|
1591
|
+
}
|
|
1592
|
+
writeAllContentsToExcel(results, kwMap.size ? kwMap : null);
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
// ── 生成报告 ──
|
|
1596
|
+
const config = {
|
|
1597
|
+
sheets, target_id: targetId, steps, max_retries: maxRetries,
|
|
1598
|
+
retry_delay: retryDelay, concurrency, force,
|
|
1599
|
+
};
|
|
1600
|
+
const reportPath = generateReport(results, config);
|
|
1601
|
+
printReportSummary(results);
|
|
1602
|
+
|
|
1603
|
+
logInfo(`all done! report: ${reportPath}`);
|
|
1604
|
+
}
|
|
1605
|
+
|
|
1606
|
+
function printDryRun(tasks, steps, env) {
|
|
1607
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1608
|
+
console.log(` 干跑模式 - 任务清单 (${tasks.length} 条)`);
|
|
1609
|
+
console.log('='.repeat(60));
|
|
1610
|
+
|
|
1611
|
+
// 环境检测
|
|
1612
|
+
console.log('\n --- 环境检测 ---');
|
|
1613
|
+
if (steps.includes('download')) {
|
|
1614
|
+
console.log(` ${env.ytdlp ? '✅' : '❌'} yt-dlp: ${YTDLP}`);
|
|
1615
|
+
} else {
|
|
1616
|
+
console.log(` ⏭ yt-dlp: 未启用(步骤不含 download)`);
|
|
1617
|
+
}
|
|
1618
|
+
if (steps.includes('transcode')) {
|
|
1619
|
+
console.log(` ${env.ffmpeg ? '✅' : '❌'} ffmpeg: ${FFMPEG}`);
|
|
1620
|
+
console.log(` ${env.ffprobe ? '✅' : '❌'} ffprobe: ${FFPROBE}`);
|
|
1621
|
+
} else {
|
|
1622
|
+
console.log(` ⏭ ffmpeg: 未启用(步骤不含 transcode)`);
|
|
1623
|
+
console.log(` ⏭ ffprobe: 未启用(步骤不含 transcode)`);
|
|
1624
|
+
}
|
|
1625
|
+
if (steps.includes('transcribe')) {
|
|
1626
|
+
const backend = WHISPER_BACKEND === 'local' ? 'local CLI' : `service ${WHISPER_SERVICE}`;
|
|
1627
|
+
console.log(` ${env.whisper ? '✅' : '❌'} whisper (${backend})`);
|
|
1628
|
+
} else {
|
|
1629
|
+
console.log(` ⏭ whisper: 未启用(步骤不含 transcribe)`);
|
|
1630
|
+
}
|
|
1631
|
+
if (steps.includes('analyze')) {
|
|
1632
|
+
const aiModel = process.env.AI_MODEL || '';
|
|
1633
|
+
console.log(` ${env.ai ? `✅ AI分析 (${aiModel}): 配置完整` : `❌ AI分析: ${env.issues[env.issues.length - 1] || ''}`}`);
|
|
1634
|
+
} else {
|
|
1635
|
+
console.log(` ⏭ AI分析: 未启用(步骤不含 analyze)`);
|
|
1636
|
+
}
|
|
1637
|
+
|
|
1638
|
+
// 任务列表
|
|
1639
|
+
console.log('\n --- 任务步骤状态 ---');
|
|
1640
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
1641
|
+
const { row, sheetName } = tasks[i];
|
|
1642
|
+
const { pkey, vid } = getVideoId(row);
|
|
1643
|
+
const stem = stemName(row, sheetName);
|
|
1644
|
+
const url = pkey ? buildUrl(pkey, vid) : 'N/A';
|
|
1645
|
+
|
|
1646
|
+
const dlPath = path.join(DOWNLOADS_DIR, sheetName, `${stem}.mp4`);
|
|
1647
|
+
const tcPath = path.join(TRANSCODED_DIR, sheetName, `${stem}${TRANSCODE_EXT}`);
|
|
1648
|
+
const dlExists = fs.existsSync(dlPath);
|
|
1649
|
+
const tcExists = fs.existsSync(tcPath);
|
|
1650
|
+
|
|
1651
|
+
const contentVal = row[COL_CONTENT];
|
|
1652
|
+
const contentFilled = contentVal != null && String(contentVal).trim() !== '';
|
|
1653
|
+
const keywordsVal = row[COL_KEYWORDS];
|
|
1654
|
+
const keywordsFilled = keywordsVal != null && String(keywordsVal).trim() !== '';
|
|
1655
|
+
|
|
1656
|
+
console.log(`\n ${i + 1}. [${sheetName}] ${stem}`);
|
|
1657
|
+
console.log(` platform=${pkey}, url=${url}`);
|
|
1658
|
+
|
|
1659
|
+
if (!pkey) {
|
|
1660
|
+
console.log(' ⚠️ 无可用视频 ID');
|
|
1661
|
+
continue;
|
|
1662
|
+
}
|
|
1663
|
+
|
|
1664
|
+
if (steps.includes('download')) {
|
|
1665
|
+
let status;
|
|
1666
|
+
if (dlExists) status = '[跳过-已有文件]';
|
|
1667
|
+
else if (!env.ytdlp) status = '[不可用-yt-dlp]';
|
|
1668
|
+
else status = '[待执行]';
|
|
1669
|
+
console.log(` download : ${status}`);
|
|
1670
|
+
}
|
|
1671
|
+
if (steps.includes('transcode')) {
|
|
1672
|
+
let status;
|
|
1673
|
+
if (tcExists) status = '[跳过-已有文件]';
|
|
1674
|
+
else if (!env.ffmpeg) status = '[不可用-ffmpeg]';
|
|
1675
|
+
else if (!dlExists) status = '[等待-需先下载]';
|
|
1676
|
+
else status = '[待执行]';
|
|
1677
|
+
console.log(` transcode: ${status}`);
|
|
1678
|
+
}
|
|
1679
|
+
if (steps.includes('transcribe')) {
|
|
1680
|
+
let status;
|
|
1681
|
+
if (contentFilled) status = `[跳过-content已有${String(contentVal).length}字符]`;
|
|
1682
|
+
else if (!env.whisper) status = '[不可用-whisper]';
|
|
1683
|
+
else if (!tcExists) status = '[等待-需先转码]';
|
|
1684
|
+
else status = '[待执行]';
|
|
1685
|
+
console.log(` transcribe: ${status}`);
|
|
1686
|
+
}
|
|
1687
|
+
if (steps.includes('analyze')) {
|
|
1688
|
+
let status;
|
|
1689
|
+
if (keywordsFilled) status = `[跳过-keywords已有${String(keywordsVal).length}字符]`;
|
|
1690
|
+
else if (!env.ai) status = '[不可用-AI未配置]';
|
|
1691
|
+
else if (!contentFilled && !tcExists) status = '[等待-需先识别]';
|
|
1692
|
+
else status = '[待执行]';
|
|
1693
|
+
console.log(` analyze : ${status}`);
|
|
1694
|
+
}
|
|
1695
|
+
}
|
|
1696
|
+
}
|
|
1697
|
+
|
|
1698
|
+
async function runFromReport(reportPath, steps, maxRetries, retryDelay, concurrency, force, dryRun,
|
|
1699
|
+
downloadTimeout, transcodeTimeout, transcribeTimeout, analyzeTimeout) {
|
|
1700
|
+
|
|
1701
|
+
const report = JSON.parse(fs.readFileSync(reportPath, 'utf-8'));
|
|
1702
|
+
const failedItems = report.failed_items || [];
|
|
1703
|
+
if (!failedItems.length) {
|
|
1704
|
+
logInfo('no failed items in report');
|
|
1705
|
+
return;
|
|
1706
|
+
}
|
|
1707
|
+
logInfo(`loaded ${failedItems.length} failed items from report`);
|
|
1708
|
+
|
|
1709
|
+
const tasks = [];
|
|
1710
|
+
for (const item of failedItems) {
|
|
1711
|
+
const sheetName = item.sheet;
|
|
1712
|
+
const key = String(item.id);
|
|
1713
|
+
const rows = readExcelSheet(sheetName);
|
|
1714
|
+
const matched = rows.filter(row => {
|
|
1715
|
+
if (row[COL_ID] != null) {
|
|
1716
|
+
try {
|
|
1717
|
+
if (String(Math.floor(Number(row[COL_ID]))) === key) return true;
|
|
1718
|
+
} catch { }
|
|
1719
|
+
}
|
|
1720
|
+
if (String(row[COL_TITLE]) === key) return true;
|
|
1721
|
+
return false;
|
|
1722
|
+
});
|
|
1723
|
+
if (!matched.length) {
|
|
1724
|
+
logWarn(`[${sheetName}] not found ${key}, skip`);
|
|
1725
|
+
continue;
|
|
1726
|
+
}
|
|
1727
|
+
// Precompute stems
|
|
1728
|
+
precomputeStems(rows, sheetName);
|
|
1729
|
+
tasks.push({ row: matched[0], sheetName });
|
|
1730
|
+
}
|
|
1731
|
+
|
|
1732
|
+
if (!tasks.length) {
|
|
1733
|
+
logInfo('no valid items to retry');
|
|
1734
|
+
return;
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
if (dryRun) {
|
|
1738
|
+
console.log(`\n 干跑模式 - 重跑 ${tasks.length} 条失败项`);
|
|
1739
|
+
for (let i = 0; i < tasks.length; i++) {
|
|
1740
|
+
const { row, sheetName } = tasks[i];
|
|
1741
|
+
const { pkey, vid } = getVideoId(row);
|
|
1742
|
+
const stem = stemName(row, sheetName);
|
|
1743
|
+
const url = pkey ? buildUrl(pkey, vid) : 'N/A';
|
|
1744
|
+
console.log(` ${i + 1}. [${sheetName}] ${stem} platform=${pkey} url=${url}`);
|
|
1745
|
+
}
|
|
1746
|
+
return;
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
// ── 工具/服务预检 ──
|
|
1750
|
+
const envRfr = await checkEnvironmentAsync(steps);
|
|
1751
|
+
if (!envRfr.allOk) {
|
|
1752
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1753
|
+
console.log(' ⚠️ 工具/服务预检:以下依赖不可用');
|
|
1754
|
+
console.log('='.repeat(60));
|
|
1755
|
+
for (const issue of envRfr.issues) console.log(` • ${issue}`);
|
|
1756
|
+
console.log('\n 涉及的步骤将失败。');
|
|
1757
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
1758
|
+
const answer = await new Promise(resolve => {
|
|
1759
|
+
rl.question('\n 是否继续重跑?(输入 yes 继续,其他任意键取消): ', ans => {
|
|
1760
|
+
rl.close();
|
|
1761
|
+
resolve(ans.trim().toLowerCase());
|
|
1762
|
+
});
|
|
1763
|
+
});
|
|
1764
|
+
if (answer !== 'yes') {
|
|
1765
|
+
logInfo('用户取消重跑(工具不可用)');
|
|
1766
|
+
return;
|
|
1767
|
+
}
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
let whisperAvailable = false;
|
|
1771
|
+
if (steps.includes('transcribe')) {
|
|
1772
|
+
whisperAvailable = await checkWhisperAvailable();
|
|
1773
|
+
}
|
|
1774
|
+
|
|
1775
|
+
const results = [];
|
|
1776
|
+
const overall = new OverallProgress(tasks.length);
|
|
1777
|
+
const limit = pLimit(Math.max(1, concurrency));
|
|
1778
|
+
|
|
1779
|
+
const taskFns = tasks.map(({ row, sheetName }, idx) =>
|
|
1780
|
+
limit(async () => {
|
|
1781
|
+
const posLabel = `[${idx + 1}/${tasks.length}]`;
|
|
1782
|
+
let result;
|
|
1783
|
+
try {
|
|
1784
|
+
result = await processOneTask(row, sheetName, steps, maxRetries, retryDelay, force,
|
|
1785
|
+
whisperAvailable, posLabel, downloadTimeout, transcodeTimeout, transcribeTimeout, analyzeTimeout);
|
|
1786
|
+
} catch (e) {
|
|
1787
|
+
const stem = stemName(row, sheetName);
|
|
1788
|
+
logError(`[${stem}] unhandled error: ${e.message}`);
|
|
1789
|
+
result = new TaskResult(sheetName, rowKey(row), String(row[COL_TITLE] || ''), null, null, stem);
|
|
1790
|
+
result.overall_status = 'failed';
|
|
1791
|
+
result.error = `unhandled: ${String(e.message).slice(0, 500)}`;
|
|
1792
|
+
}
|
|
1793
|
+
results.push(result);
|
|
1794
|
+
overall.addResult(result.overall_status);
|
|
1795
|
+
console.log(`\n${overall.summaryLine()}\n`);
|
|
1796
|
+
return result;
|
|
1797
|
+
})
|
|
1798
|
+
);
|
|
1799
|
+
|
|
1800
|
+
await Promise.all(taskFns);
|
|
1801
|
+
|
|
1802
|
+
if (steps.includes('transcribe')) {
|
|
1803
|
+
const kwMap = new Map();
|
|
1804
|
+
for (const r of results) {
|
|
1805
|
+
if (r.analyze.status === 'success' && r.analyze.file) {
|
|
1806
|
+
kwMap.set(`${r.sheet}|${r.id_val}`, r.analyze.file);
|
|
1807
|
+
}
|
|
1808
|
+
}
|
|
1809
|
+
writeAllContentsToExcel(results, kwMap.size ? kwMap : null);
|
|
1810
|
+
}
|
|
1811
|
+
|
|
1812
|
+
const config = { retry_from: reportPath, steps, max_retries: maxRetries,
|
|
1813
|
+
retry_delay: retryDelay, concurrency, force };
|
|
1814
|
+
const reportFilePath = generateReport(results, config);
|
|
1815
|
+
printReportSummary(results);
|
|
1816
|
+
logInfo(`all done! report: ${reportFilePath}`);
|
|
1817
|
+
}
|
|
1818
|
+
|
|
1819
|
+
// ============================== CLI ==============================
|
|
1820
|
+
if (process.argv[1] === __filename || process.argv[1]?.endsWith('process_videos.js')) {
|
|
1821
|
+
program
|
|
1822
|
+
.name('process_videos')
|
|
1823
|
+
.description('视频下载、转码、文本识别、AI分析一体化流程')
|
|
1824
|
+
.option('--sheet <name>', '指定 sheet 名称')
|
|
1825
|
+
.option('--id <id>', '指定 extra.id 或 title(单条测试)')
|
|
1826
|
+
.option('--step <step>', '只执行某一步:download / transcode / transcribe / analyze', (val) => {
|
|
1827
|
+
const allowed = ['download', 'transcode', 'transcribe', 'analyze'];
|
|
1828
|
+
if (!allowed.includes(val)) {
|
|
1829
|
+
console.error(`Invalid step: ${val}. Must be one of: ${allowed.join(', ')}`);
|
|
1830
|
+
process.exit(1);
|
|
1831
|
+
}
|
|
1832
|
+
return val;
|
|
1833
|
+
})
|
|
1834
|
+
.option('--force', '强制重做下载+转码(忽略已有文件)')
|
|
1835
|
+
.option('--concurrency <n>', '并发数,默认 1', parseInt, 1)
|
|
1836
|
+
.option('--retry <n>', '每步失败最大重试次数,默认 0', parseInt, 0)
|
|
1837
|
+
.option('--retry-delay <n>', '重试间隔基数(秒),默认 5', parseFloat, 5.0)
|
|
1838
|
+
.option('--download-timeout <n>', '下载超时(秒),默认 600', parseInt, 600)
|
|
1839
|
+
.option('--transcode-timeout <n>', '转码超时(秒),默认 600', parseInt, 600)
|
|
1840
|
+
.option('--transcribe-timeout <n>', '识别超时(秒),默认 600', parseInt, 600)
|
|
1841
|
+
.option('--analyze-timeout <n>', 'AI 分析超时(秒),默认 300', parseInt, 300)
|
|
1842
|
+
.option('--dry-run', '干跑模式,只列任务不执行')
|
|
1843
|
+
.option('--retry-failed <path>', '从报告 JSON 重跑失败项')
|
|
1844
|
+
.option('--init', '复制 .env.example 到当前目录并重命名为 .env')
|
|
1845
|
+
.option('--file <path>', '指定 Excel 文件路径(优先级高于 EXCEL_FILE 环境变量)')
|
|
1846
|
+
.option('--url <url>', '直接指定视频下载链接(跳过 Excel),支持标准链接和内嵌链接')
|
|
1847
|
+
.option('--name <name>', '指定下载文件名,不含扩展名(与 --url 配合使用)')
|
|
1848
|
+
.option('--env-file <path>', '指定要加载的 .env 文件路径(默认: 当前目录 .env)');
|
|
1849
|
+
|
|
1850
|
+
program.parse();
|
|
1851
|
+
|
|
1852
|
+
const opts = program.opts();
|
|
1853
|
+
|
|
1854
|
+
// ── init 模式 ──
|
|
1855
|
+
if (opts.init) {
|
|
1856
|
+
const src = path.resolve(__dirname, '.env.example');
|
|
1857
|
+
if (!fs.existsSync(src)) {
|
|
1858
|
+
console.error(`错误: 找不到 ${src}`);
|
|
1859
|
+
process.exit(1);
|
|
1860
|
+
}
|
|
1861
|
+
let dest = path.resolve(process.cwd(), '.env');
|
|
1862
|
+
if (fs.existsSync(dest)) {
|
|
1863
|
+
console.log(`\n⚠️ 目标文件已存在: ${dest}`);
|
|
1864
|
+
const choice = await select({
|
|
1865
|
+
message: '如何处理冲突?',
|
|
1866
|
+
choices: [
|
|
1867
|
+
{ name: '覆盖 (overwrite)', value: 'overwrite', description: '用 .env.example 覆盖现有 .env 文件' },
|
|
1868
|
+
{ name: '保留现有 (keep existing)', value: 'keep', description: '不做任何修改,保留当前 .env' },
|
|
1869
|
+
{ name: '自定义文件名 (custom name)', value: 'custom', description: '使用自定义文件名创建 .env' },
|
|
1870
|
+
],
|
|
1871
|
+
});
|
|
1872
|
+
if (choice === 'overwrite') {
|
|
1873
|
+
fs.copyFileSync(src, dest);
|
|
1874
|
+
console.log(`✅ .env 已覆盖: ${dest}`);
|
|
1875
|
+
} else if (choice === 'custom') {
|
|
1876
|
+
const customName = await input({
|
|
1877
|
+
message: '请输入新文件名',
|
|
1878
|
+
default: '.env.prod',
|
|
1879
|
+
validate(val) {
|
|
1880
|
+
return val ? true : '文件名不能为空';
|
|
1881
|
+
},
|
|
1882
|
+
});
|
|
1883
|
+
if (!customName) {
|
|
1884
|
+
console.log('未输入文件名,已取消。');
|
|
1885
|
+
process.exit(0);
|
|
1886
|
+
}
|
|
1887
|
+
dest = path.resolve(process.cwd(), customName);
|
|
1888
|
+
if (fs.existsSync(dest)) {
|
|
1889
|
+
console.log(`⚠️ 文件 "${customName}" 也已存在,保留现有文件。`);
|
|
1890
|
+
} else {
|
|
1891
|
+
fs.copyFileSync(src, dest);
|
|
1892
|
+
console.log(`✅ .env 已创建为: ${dest}`);
|
|
1893
|
+
}
|
|
1894
|
+
} else {
|
|
1895
|
+
console.log('保留现有 .env 文件,未做修改。');
|
|
1896
|
+
}
|
|
1897
|
+
} else {
|
|
1898
|
+
fs.copyFileSync(src, dest);
|
|
1899
|
+
console.log(`✅ .env 已从 .env.example 创建: ${dest}`);
|
|
1900
|
+
}
|
|
1901
|
+
process.exit(0);
|
|
1902
|
+
}
|
|
1903
|
+
|
|
1904
|
+
// ── file 覆盖 ──
|
|
1905
|
+
if (opts.file) {
|
|
1906
|
+
EXCEL_FILE = path.resolve(opts.file);
|
|
1907
|
+
logInfo(`Excel 文件覆盖为: ${EXCEL_FILE}`);
|
|
1908
|
+
}
|
|
1909
|
+
const steps = opts.step ? [opts.step] : ['download', 'transcode', 'transcribe', 'analyze'];
|
|
1910
|
+
// ── --url 模式:直接处理单个视频链接 ──
|
|
1911
|
+
if (opts.url) {
|
|
1912
|
+
const parsed = parseUrl(opts.url);
|
|
1913
|
+
if (!parsed) {
|
|
1914
|
+
console.error(c('red', `❌ 无法识别的 URL: ${opts.url}`));
|
|
1915
|
+
console.error(c('yellow', '支持的平台: YouTube, B站, 腾讯视频, 优酷'));
|
|
1916
|
+
console.error(c('dim', 'URL 格式示例:'));
|
|
1917
|
+
console.error(c('dim', ' https://www.bilibili.com/video/BV1xxxyyyzzz'));
|
|
1918
|
+
console.error(c('dim', ' https://www.youtube.com/watch?v=xxxxxxxxxxx'));
|
|
1919
|
+
console.error(c('dim', ' https://v.qq.com/x/page/x0000xxxxx.html'));
|
|
1920
|
+
console.error(c('dim', ' https://v.youku.com/v_show/id_XXXXXXX.html'));
|
|
1921
|
+
process.exit(1);
|
|
1922
|
+
}
|
|
1923
|
+
|
|
1924
|
+
console.log(c('dim', '\n── URL 任务 ──'));
|
|
1925
|
+
console.log(` 平台: ${c('cyan', parsed.platform)}`);
|
|
1926
|
+
console.log(` 视频ID: ${c('cyan', parsed.videoId)}`);
|
|
1927
|
+
console.log(` 链接: ${c('cyan', parsed.watchUrl)}`);
|
|
1928
|
+
|
|
1929
|
+
// 构建文件路径: downloads/<platform>/<name>.mp4
|
|
1930
|
+
fs.mkdirSync(DOWNLOADS_DIR, { recursive: true });
|
|
1931
|
+
const dlDir = path.join(DOWNLOADS_DIR, parsed.platform);
|
|
1932
|
+
fs.mkdirSync(dlDir, { recursive: true });
|
|
1933
|
+
const fileName = opts.name || parsed.videoId;
|
|
1934
|
+
const proposedPath = path.join(dlDir, `${fileName}.mp4`);
|
|
1935
|
+
|
|
1936
|
+
// 冲突处理(--force 时直接覆盖)
|
|
1937
|
+
let finalPath, finalStem;
|
|
1938
|
+
if (opts.force) {
|
|
1939
|
+
finalPath = proposedPath;
|
|
1940
|
+
finalStem = fileName;
|
|
1941
|
+
} else {
|
|
1942
|
+
const conflict = await resolveUrlConflict(proposedPath);
|
|
1943
|
+
if (conflict.action === 'skip') {
|
|
1944
|
+
console.log(c('yellow', '\n⏭️ 已跳过\n'));
|
|
1945
|
+
process.exit(0);
|
|
1946
|
+
}
|
|
1947
|
+
finalPath = conflict.path;
|
|
1948
|
+
finalStem = path.basename(finalPath, '.mp4');
|
|
1949
|
+
}
|
|
1950
|
+
|
|
1951
|
+
console.log(` 文件: ${c('green', finalPath)}`);
|
|
1952
|
+
|
|
1953
|
+
// 检查 whisper 可用性
|
|
1954
|
+
let whisperAvailable = false;
|
|
1955
|
+
if (steps.includes('transcribe')) {
|
|
1956
|
+
whisperAvailable = await checkWhisperAvailable();
|
|
1957
|
+
if (!whisperAvailable) {
|
|
1958
|
+
const backend = WHISPER_BACKEND === 'local' ? 'local CLI' : WHISPER_SERVICE;
|
|
1959
|
+
logWarn(`⚠️ whisper not available (${backend}), transcribe step will fail`);
|
|
1960
|
+
}
|
|
1961
|
+
}
|
|
1962
|
+
|
|
1963
|
+
// 执行流水线
|
|
1964
|
+
await runUrlTask({
|
|
1965
|
+
watchUrl: parsed.watchUrl,
|
|
1966
|
+
platform: parsed.platform,
|
|
1967
|
+
pkey: parsed.pkey,
|
|
1968
|
+
videoId: parsed.videoId,
|
|
1969
|
+
stem: finalStem,
|
|
1970
|
+
dlDir,
|
|
1971
|
+
steps,
|
|
1972
|
+
maxRetries: opts.retry,
|
|
1973
|
+
retryDelay: opts.retryDelay,
|
|
1974
|
+
force: opts.force || false,
|
|
1975
|
+
downloadTimeout: opts.downloadTimeout,
|
|
1976
|
+
transcodeTimeout: opts.transcodeTimeout,
|
|
1977
|
+
transcribeTimeout: opts.transcribeTimeout,
|
|
1978
|
+
analyzeTimeout: opts.analyzeTimeout,
|
|
1979
|
+
whisperAvailable,
|
|
1980
|
+
});
|
|
1981
|
+
|
|
1982
|
+
process.exit(0);
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
|
|
1986
|
+
run({
|
|
1987
|
+
targetSheet: opts.sheet || null,
|
|
1988
|
+
targetId: opts.id || null,
|
|
1989
|
+
steps,
|
|
1990
|
+
maxRetries: opts.retry,
|
|
1991
|
+
retryDelay: opts.retryDelay,
|
|
1992
|
+
concurrency: opts.concurrency,
|
|
1993
|
+
force: opts.force || false,
|
|
1994
|
+
dryRun: opts.dryRun || false,
|
|
1995
|
+
retryFailed: opts.retryFailed || null,
|
|
1996
|
+
downloadTimeout: opts.downloadTimeout,
|
|
1997
|
+
transcodeTimeout: opts.transcodeTimeout,
|
|
1998
|
+
transcribeTimeout: opts.transcribeTimeout,
|
|
1999
|
+
analyzeTimeout: opts.analyzeTimeout,
|
|
2000
|
+
}).catch(err => {
|
|
2001
|
+
console.error('Fatal error:', err);
|
|
2002
|
+
process.exit(1);
|
|
2003
|
+
});
|
|
2004
|
+
|
|
2005
|
+
// Handle unhandled promise rejections
|
|
2006
|
+
process.on('unhandledRejection', (reason) => {
|
|
2007
|
+
console.error('Unhandled rejection:', reason);
|
|
2008
|
+
process.exit(1);
|
|
2009
|
+
});
|
|
2010
|
+
}
|