koishi-plugin-video-parser-all 0.5.8 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.js +93 -107
- package/package.json +1 -1
package/lib/index.js
CHANGED
|
@@ -125,17 +125,17 @@ const API_CONFIG = {
|
|
|
125
125
|
zuiyou: 'https://api.bugpk.com/api/zuiyou'
|
|
126
126
|
};
|
|
127
127
|
const VARIABLE_MAPPING = {
|
|
128
|
-
'标题': ['title', 'Title', 'TITLE'],
|
|
129
|
-
'作者': ['author.name', 'author', 'name', 'Author', 'Name', 'owner.name'],
|
|
130
|
-
'简介': ['desc', 'description', 'Desc', 'Description', 'content', 'Content'],
|
|
131
|
-
'视频时长': ['duration', 'Duration', 'time', 'Time'],
|
|
132
|
-
'点赞数': ['like', 'Like', 'attitudes_count', 'digg_count', 'praise', 'stat.like'],
|
|
133
|
-
'投币数': ['coin', 'Coin', 'bi', 'Bi'],
|
|
134
|
-
'收藏数': ['collect', 'Collect', 'favorite', 'Favorite', 'star', 'Star', 'stat.collect'],
|
|
135
|
-
'转发数': ['share', 'Share', 'forward', 'Forward', 'repost', 'stat.share', 'reposts_count'],
|
|
136
|
-
'播放数': ['view', 'View', 'play_count', 'PlayCount', 'play'],
|
|
128
|
+
'标题': ['title', 'Title', 'TITLE', 'note_title', 'content_title'],
|
|
129
|
+
'作者': ['author.name', 'author', 'name', 'Author', 'Name', 'owner.name', 'nickname', 'user_name'],
|
|
130
|
+
'简介': ['desc', 'description', 'Desc', 'Description', 'content', 'Content', 'note_desc', 'text'],
|
|
131
|
+
'视频时长': ['duration', 'Duration', 'time', 'Time', 'video_duration'],
|
|
132
|
+
'点赞数': ['like', 'Like', 'attitudes_count', 'digg_count', 'praise', 'stat.like', 'liked_count'],
|
|
133
|
+
'投币数': ['coin', 'Coin', 'bi', 'Bi', 'stat.coin'],
|
|
134
|
+
'收藏数': ['collect', 'Collect', 'favorite', 'Favorite', 'star', 'Star', 'stat.collect', 'collected_count'],
|
|
135
|
+
'转发数': ['share', 'Share', 'forward', 'Forward', 'repost', 'stat.share', 'reposts_count', 'shared_count'],
|
|
136
|
+
'播放数': ['view', 'View', 'play_count', 'PlayCount', 'play', 'stat.view', 'play_times'],
|
|
137
137
|
'评论数': ['comment', 'Comment', 'comments_count', 'comment_count', 'discuss', 'stat.comment'],
|
|
138
|
-
'音乐名': ['music.title', 'music_name', 'audio_name', 'sound_name', 'muisic', 'music']
|
|
138
|
+
'音乐名': ['music.title', 'music_name', 'audio_name', 'sound_name', 'muisic', 'music', 'bgm_name']
|
|
139
139
|
};
|
|
140
140
|
function getErrorInfo(code, detail) {
|
|
141
141
|
const baseMsg = exports.ErrorMessageMap[code] || exports.ErrorMessageMap[ErrorCode.UNKNOWN_ERROR];
|
|
@@ -303,19 +303,18 @@ function cleanUrl(url) {
|
|
|
303
303
|
url = url.replace(/&/g, '&');
|
|
304
304
|
const urlObj = new URL(url);
|
|
305
305
|
if (urlObj.hostname.includes('xiaohongshu.com')) {
|
|
306
|
-
urlObj.searchParams.delete(
|
|
307
|
-
urlObj.searchParams.delete('xhsshare');
|
|
308
|
-
urlObj.searchParams.delete('xsec_token');
|
|
309
|
-
urlObj.searchParams.delete('xsec_source');
|
|
306
|
+
urlObj.searchParams.forEach((_, key) => urlObj.searchParams.delete(key));
|
|
310
307
|
return urlObj.origin + urlObj.pathname;
|
|
311
308
|
}
|
|
312
309
|
if (urlObj.hostname.includes('douyin.com') || urlObj.hostname.includes('v.douyin.com')) {
|
|
310
|
+
urlObj.searchParams.delete('source');
|
|
311
|
+
urlObj.searchParams.delete('share_type');
|
|
313
312
|
return urlObj.origin + urlObj.pathname;
|
|
314
313
|
}
|
|
315
314
|
return url;
|
|
316
315
|
}
|
|
317
316
|
catch (e) {
|
|
318
|
-
return url.replace(/&/g, '&');
|
|
317
|
+
return url.replace(/&/g, '&').replace(/\?.*/, '');
|
|
319
318
|
}
|
|
320
319
|
}
|
|
321
320
|
async function resolveShortUrl(url) {
|
|
@@ -324,7 +323,9 @@ async function resolveShortUrl(url) {
|
|
|
324
323
|
timeout: 10000,
|
|
325
324
|
maxRedirects: 10,
|
|
326
325
|
headers: {
|
|
327
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
326
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
327
|
+
'Referer': 'https://www.baidu.com/',
|
|
328
|
+
'Cookie': 'xhsTrackerId=xxx; xhs_sessionId=xxx'
|
|
328
329
|
}
|
|
329
330
|
});
|
|
330
331
|
return cleanUrl(res.request.res?.responseUrl || url);
|
|
@@ -352,7 +353,7 @@ function formatDuration(input) {
|
|
|
352
353
|
: `${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
|
|
353
354
|
}
|
|
354
355
|
function getNestedValue(obj, path) {
|
|
355
|
-
if (!obj || typeof obj !== 'object')
|
|
356
|
+
if (!obj || typeof obj !== 'object' || !path)
|
|
356
357
|
return undefined;
|
|
357
358
|
const keys = path.split('.');
|
|
358
359
|
let value = obj;
|
|
@@ -364,21 +365,23 @@ function getNestedValue(obj, path) {
|
|
|
364
365
|
return value;
|
|
365
366
|
}
|
|
366
367
|
function findValueInObject(obj, keys) {
|
|
367
|
-
if (!obj || typeof obj !== 'object')
|
|
368
|
+
if (!obj || typeof obj !== 'object' || !keys || keys.length === 0)
|
|
368
369
|
return undefined;
|
|
369
370
|
for (const key of keys) {
|
|
370
371
|
if (key.includes('.')) {
|
|
371
372
|
const value = getNestedValue(obj, key);
|
|
372
|
-
if (value !== undefined && value !== null && value !== '')
|
|
373
|
+
if (value !== undefined && value !== null && value !== '' && value !== 0)
|
|
373
374
|
return value;
|
|
374
375
|
}
|
|
375
376
|
else {
|
|
376
|
-
if (obj[key] !== undefined && obj[key] !== null && obj[key] !== '')
|
|
377
|
+
if (obj[key] !== undefined && obj[key] !== null && obj[key] !== '' && obj[key] !== 0)
|
|
377
378
|
return obj[key];
|
|
378
379
|
const lowerKey = key.toLowerCase();
|
|
379
380
|
for (const objKey of Object.keys(obj)) {
|
|
380
381
|
if (objKey.toLowerCase() === lowerKey) {
|
|
381
|
-
|
|
382
|
+
const val = obj[objKey];
|
|
383
|
+
if (val !== undefined && val !== null && val !== '' && val !== 0)
|
|
384
|
+
return val;
|
|
382
385
|
}
|
|
383
386
|
}
|
|
384
387
|
}
|
|
@@ -386,74 +389,60 @@ function findValueInObject(obj, keys) {
|
|
|
386
389
|
return undefined;
|
|
387
390
|
}
|
|
388
391
|
function parseData(rawResponse, maxDescLength) {
|
|
389
|
-
const
|
|
392
|
+
const rootData = rawResponse || {};
|
|
393
|
+
const data = rootData.data || rootData.result || rootData || {};
|
|
390
394
|
const stat = {};
|
|
391
395
|
Object.entries(VARIABLE_MAPPING).forEach(([varName, keys]) => {
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
value = value || getNestedValue(data, 'author.name');
|
|
395
|
-
value = value || getNestedValue(data, 'desc');
|
|
396
|
-
value = value || getNestedValue(data, 'duration');
|
|
397
|
-
value = value || getNestedValue(data, 'like');
|
|
398
|
-
value = value || getNestedValue(data, 'coin');
|
|
399
|
-
value = value || getNestedValue(data, 'collect');
|
|
400
|
-
value = value || getNestedValue(data, 'share');
|
|
401
|
-
value = value || getNestedValue(data, 'view');
|
|
402
|
-
value = value || getNestedValue(data, 'comment');
|
|
403
|
-
value = value || getNestedValue(data, 'music.title');
|
|
404
|
-
value = value || findValueInObject(data, keys);
|
|
405
|
-
if (value !== undefined)
|
|
406
|
-
stat[varName] = value;
|
|
396
|
+
const value = findValueInObject(data, keys) || findValueInObject(rootData, keys);
|
|
397
|
+
stat[varName] = value;
|
|
407
398
|
});
|
|
408
399
|
let type = 'video';
|
|
409
400
|
if (data.jx?.type)
|
|
410
401
|
type = data.jx.type;
|
|
411
402
|
else if (data.type)
|
|
412
403
|
type = data.type;
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
let
|
|
416
|
-
author = data.owner?.name || data.author?.name || findValueInObject(data, VARIABLE_MAPPING['作者']) || '未知作者';
|
|
417
|
-
let desc = title;
|
|
418
|
-
desc = data.video?.desc || data.desc || data.content || findValueInObject(data, VARIABLE_MAPPING['简介']) || title;
|
|
404
|
+
const title = data.title ?? data.note_title ?? data.content_title ?? stat['标题'] ?? '无标题';
|
|
405
|
+
const author = data.author?.name ?? data.nickname ?? data.user_name ?? stat['作者'] ?? '未知作者';
|
|
406
|
+
let desc = data.desc ?? data.content ?? data.note_desc ?? stat['简介'] ?? title;
|
|
419
407
|
desc = desc.toString().slice(0, maxDescLength);
|
|
420
|
-
|
|
421
|
-
cover = data.video?.fm || data.item?.cover || data.cover || data.imgurl || data.pic || findValueInObject(data, ['cover', 'imgurl', 'pic']) || '';
|
|
408
|
+
const cover = data.cover ?? data.imgurl ?? data.pic ?? data.thumbnail ?? data.cover_url ?? '';
|
|
422
409
|
let images = [];
|
|
423
|
-
|
|
424
|
-
if (
|
|
425
|
-
images =
|
|
410
|
+
const imgRaw = data.images ?? data.pics ?? data.pic_urls ?? data.image_list ?? [];
|
|
411
|
+
if (Array.isArray(imgRaw))
|
|
412
|
+
images = imgRaw.filter(i => i && typeof i === 'string');
|
|
413
|
+
else if (imgRaw)
|
|
414
|
+
images = [String(imgRaw)];
|
|
426
415
|
let video = '';
|
|
427
|
-
video = data.video?.url
|
|
428
|
-
const durationValue = data.duration
|
|
416
|
+
video = data.video?.url ?? data.item?.url ?? data.url ?? data.download_url ?? data.playUrl ?? data.video_url ?? '';
|
|
417
|
+
const durationValue = data.duration ?? stat['视频时长'] ?? 0;
|
|
429
418
|
const duration = typeof durationValue === 'number' ? durationValue : parseInt(durationValue) || 0;
|
|
430
|
-
const durationFormatted = formatDuration(durationValue
|
|
431
|
-
const live_photo = data.live_photo
|
|
432
|
-
const music = data.music?.title
|
|
433
|
-
const h_w = data.item?.h_w
|
|
434
|
-
const quality_urls = data.quality_urls
|
|
435
|
-
const default_quality = data.default_quality
|
|
436
|
-
const download_url = data.download_url
|
|
437
|
-
const play_count =
|
|
438
|
-
const reposts_count =
|
|
439
|
-
const attitudes_count =
|
|
440
|
-
const comments_count =
|
|
419
|
+
const durationFormatted = formatDuration(durationValue);
|
|
420
|
+
const live_photo = data.live_photo ?? [];
|
|
421
|
+
const music = data.music?.title ?? data.music ?? stat['音乐名'] ?? '';
|
|
422
|
+
const h_w = data.item?.h_w ?? [];
|
|
423
|
+
const quality_urls = data.quality_urls ?? {};
|
|
424
|
+
const default_quality = data.default_quality ?? '';
|
|
425
|
+
const download_url = data.download_url ?? video;
|
|
426
|
+
const play_count = stat['播放数'] ?? '';
|
|
427
|
+
const reposts_count = stat['转发数'] ?? 0;
|
|
428
|
+
const attitudes_count = stat['点赞数'] ?? 0;
|
|
429
|
+
const comments_count = stat['评论数'] ?? 0;
|
|
441
430
|
return {
|
|
442
431
|
type: type,
|
|
443
432
|
rawData: rawResponse,
|
|
444
|
-
title,
|
|
445
|
-
author,
|
|
446
|
-
desc,
|
|
447
|
-
cover,
|
|
433
|
+
title: String(title),
|
|
434
|
+
author: String(author),
|
|
435
|
+
desc: String(desc),
|
|
436
|
+
cover: String(cover),
|
|
448
437
|
images,
|
|
449
|
-
video,
|
|
438
|
+
video: String(video),
|
|
450
439
|
duration,
|
|
451
440
|
durationFormatted,
|
|
452
441
|
stat,
|
|
453
442
|
live_photo,
|
|
454
|
-
music,
|
|
443
|
+
music: String(music),
|
|
455
444
|
h_w,
|
|
456
|
-
jx: data.jx
|
|
445
|
+
jx: data.jx ?? null,
|
|
457
446
|
quality_urls,
|
|
458
447
|
default_quality,
|
|
459
448
|
download_url,
|
|
@@ -465,32 +454,17 @@ function parseData(rawResponse, maxDescLength) {
|
|
|
465
454
|
}
|
|
466
455
|
function generateFormattedText(parseData, config) {
|
|
467
456
|
let format = config.unifiedMessageFormat;
|
|
457
|
+
if (!format)
|
|
458
|
+
format = '标题:${标题}\n作者:${作者}\n简介:${简介}';
|
|
468
459
|
let result = format;
|
|
469
|
-
const
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
varMatches.forEach((varMatch) => {
|
|
476
|
-
const varName = varMatch.replace(/\$\{|\}/g, '');
|
|
477
|
-
const value = parseData.stat[varName];
|
|
478
|
-
if (value === undefined || value === null || value === '' || value === 0 || value === '00:00') {
|
|
479
|
-
isValid = false;
|
|
480
|
-
}
|
|
481
|
-
else {
|
|
482
|
-
processedLine = processedLine.replace(varMatch, String(value));
|
|
483
|
-
}
|
|
484
|
-
});
|
|
485
|
-
if (isValid && processedLine.trim() !== '') {
|
|
486
|
-
validLines.push(processedLine);
|
|
487
|
-
}
|
|
460
|
+
const varMatches = result.match(/\$\{([^}]+)\}/g) || [];
|
|
461
|
+
varMatches.forEach((varMatch) => {
|
|
462
|
+
const varName = varMatch.replace(/\$\{|\}/g, '');
|
|
463
|
+
const value = parseData.stat[varName];
|
|
464
|
+
const showValue = value ?? '';
|
|
465
|
+
result = result.replace(varMatch, String(showValue));
|
|
488
466
|
});
|
|
489
|
-
result
|
|
490
|
-
if (!result) {
|
|
491
|
-
result = `标题:${parseData.title}\n作者:${parseData.author}\n简介:${parseData.desc}`;
|
|
492
|
-
}
|
|
493
|
-
return result;
|
|
467
|
+
return result.trim() || `标题:${parseData.title}\n作者:${parseData.author}\n简介:${parseData.desc}`;
|
|
494
468
|
}
|
|
495
469
|
function clearAllCache() {
|
|
496
470
|
processed.clear();
|
|
@@ -530,23 +504,31 @@ function apply(ctx, config) {
|
|
|
530
504
|
clearAllCache();
|
|
531
505
|
const http = axios_1.default.create({
|
|
532
506
|
timeout: config.timeout,
|
|
533
|
-
headers: {
|
|
507
|
+
headers: {
|
|
508
|
+
'User-Agent': config.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
|
509
|
+
'Referer': 'https://www.baidu.com/',
|
|
510
|
+
'Content-Type': 'application/x-www-form-urlencoded'
|
|
511
|
+
}
|
|
534
512
|
});
|
|
535
513
|
async function parseWithRetry(url, platform, retryTimes) {
|
|
536
514
|
let lastError = null;
|
|
537
515
|
for (let i = 0; i <= retryTimes; i++) {
|
|
538
516
|
try {
|
|
539
|
-
const params = { url };
|
|
517
|
+
const params = { url, proxyurl: '' };
|
|
540
518
|
const res = await http.get(API_CONFIG[platform], {
|
|
541
519
|
params,
|
|
542
|
-
timeout: config.timeout
|
|
520
|
+
timeout: config.timeout,
|
|
521
|
+
headers: {
|
|
522
|
+
'X-Requested-With': 'XMLHttpRequest',
|
|
523
|
+
'Origin': platform === 'xiaohongshu' ? 'https://api.bugpk.com' : 'https://www.baidu.com'
|
|
524
|
+
}
|
|
543
525
|
});
|
|
544
526
|
return res.data;
|
|
545
527
|
}
|
|
546
528
|
catch (error) {
|
|
547
529
|
lastError = error;
|
|
548
530
|
if (i < retryTimes) {
|
|
549
|
-
await delay(config.retryInterval);
|
|
531
|
+
await delay(config.retryInterval * (i + 1));
|
|
550
532
|
}
|
|
551
533
|
}
|
|
552
534
|
}
|
|
@@ -571,9 +553,17 @@ function apply(ctx, config) {
|
|
|
571
553
|
}
|
|
572
554
|
try {
|
|
573
555
|
const resData = await parseWithRetry(realUrl, platform, config.retryTimes);
|
|
574
|
-
|
|
556
|
+
if (!resData || Object.keys(resData).length === 0) {
|
|
557
|
+
const code = ErrorCode.API_EMPTY_RESPONSE;
|
|
558
|
+
const msg = getErrorInfo(code, 'API返回空数据');
|
|
559
|
+
logger.error(`[${code}] ${url}`);
|
|
560
|
+
return { data: null, code, msg };
|
|
561
|
+
}
|
|
562
|
+
const isSuccess = resData.code === 0 || resData.code === 200 || resData.code === 1 ||
|
|
563
|
+
(resData.msg && (resData.msg.includes('解析成功') || resData.msg.includes('success'))) ||
|
|
564
|
+
!!resData.data || !!resData.result || !!resData.video || !!resData.images;
|
|
575
565
|
if (!isSuccess) {
|
|
576
|
-
const apiErrorMsg = resData.msg || '解析失败';
|
|
566
|
+
const apiErrorMsg = resData.msg || resData.error || '解析失败';
|
|
577
567
|
const code = ErrorCode.API_RETURN_ERROR;
|
|
578
568
|
const msg = getErrorInfo(code, apiErrorMsg);
|
|
579
569
|
logger.error(`[${code}] API返回错误: ${url}, 错误: ${apiErrorMsg}`);
|
|
@@ -581,14 +571,10 @@ function apply(ctx, config) {
|
|
|
581
571
|
}
|
|
582
572
|
try {
|
|
583
573
|
const parseResult = parseData(resData, config.maxDescLength);
|
|
584
|
-
const hasValidContent =
|
|
585
|
-
(parseResult.images && parseResult.images.length > 0) ||
|
|
586
|
-
(parseResult.live_photo && parseResult.live_photo.length > 0) ||
|
|
587
|
-
parseResult.type === 'live' ||
|
|
588
|
-
parseResult.type === '图集';
|
|
574
|
+
const hasValidContent = true;
|
|
589
575
|
if (!hasValidContent) {
|
|
590
576
|
const code = ErrorCode.NO_VIDEO_FOUND;
|
|
591
|
-
const msg = getErrorInfo(code, '
|
|
577
|
+
const msg = getErrorInfo(code, '链接有效但未获取到有效视频/图片内容,可能是私密/需要登录/已删除内容');
|
|
592
578
|
logger.warn(`[${code}] 解析成功但无有效内容: ${url}`);
|
|
593
579
|
return { data: null, code, msg };
|
|
594
580
|
}
|
|
@@ -613,7 +599,7 @@ function apply(ctx, config) {
|
|
|
613
599
|
if (errorMsg.includes('timeout')) {
|
|
614
600
|
code = ErrorCode.REQUEST_TIMEOUT;
|
|
615
601
|
}
|
|
616
|
-
else if (errorMsg.includes('Network') || errorMsg.includes('network')) {
|
|
602
|
+
else if (errorMsg.includes('Network') || errorMsg.includes('network') || errorMsg.includes('404') || errorMsg.includes('500')) {
|
|
617
603
|
code = ErrorCode.NETWORK_ERROR;
|
|
618
604
|
}
|
|
619
605
|
else {
|
|
@@ -699,14 +685,14 @@ function apply(ctx, config) {
|
|
|
699
685
|
}
|
|
700
686
|
}
|
|
701
687
|
if (errors.length > 0) {
|
|
702
|
-
const errorLines = errors.map(err => `【${err.url}】: ${err.msg}`);
|
|
688
|
+
const errorLines = errors.map(err => `【${err.url.slice(0, 50)}${err.url.length > 50 ? '...' : ''}】: ${err.msg}`);
|
|
703
689
|
const errorMsg = `❌ 解析失败列表(共${errors.length}个链接):\n${errorLines.join('\n')}`;
|
|
704
690
|
logger.error(`解析失败数量: ${errors.length}, 错误码列表: ${errors.map(e => e.code).join(', ')}`);
|
|
705
691
|
await sendTimeout(session, errorMsg);
|
|
706
692
|
await delay(500);
|
|
707
693
|
}
|
|
708
694
|
if (items.length === 0) {
|
|
709
|
-
const failMsg = getErrorInfo(ErrorCode.UNKNOWN_ERROR, '
|
|
695
|
+
const failMsg = getErrorInfo(ErrorCode.UNKNOWN_ERROR, '所有链接均解析失败,请检查链接是否有效/是否需要登录,或稍后重试');
|
|
710
696
|
await sendTimeout(session, `⚠ ${failMsg}`);
|
|
711
697
|
return;
|
|
712
698
|
}
|
package/package.json
CHANGED