koishi-plugin-video-parser-all 0.5.8 → 0.5.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/index.js +102 -72
- package/package.json +1 -1
package/lib/index.js
CHANGED
|
@@ -125,17 +125,17 @@ const API_CONFIG = {
|
|
|
125
125
|
zuiyou: 'https://api.bugpk.com/api/zuiyou'
|
|
126
126
|
};
|
|
127
127
|
const VARIABLE_MAPPING = {
|
|
128
|
-
'标题': ['title', 'Title', 'TITLE'],
|
|
129
|
-
'作者': ['author.name', 'author', 'name', 'Author', 'Name', 'owner.name'],
|
|
130
|
-
'简介': ['desc', 'description', 'Desc', 'Description', 'content', 'Content'],
|
|
131
|
-
'视频时长': ['duration', 'Duration', 'time', 'Time'],
|
|
132
|
-
'点赞数': ['like', 'Like', 'attitudes_count', 'digg_count', 'praise', 'stat.like'],
|
|
133
|
-
'投币数': ['coin', 'Coin', 'bi', 'Bi'],
|
|
134
|
-
'收藏数': ['collect', 'Collect', 'favorite', 'Favorite', 'star', 'Star', 'stat.collect'],
|
|
135
|
-
'转发数': ['share', 'Share', 'forward', 'Forward', 'repost', 'stat.share', 'reposts_count'],
|
|
136
|
-
'播放数': ['view', 'View', 'play_count', 'PlayCount', 'play'],
|
|
128
|
+
'标题': ['title', 'Title', 'TITLE', 'note_title', 'content_title'],
|
|
129
|
+
'作者': ['author.name', 'author', 'name', 'Author', 'Name', 'owner.name', 'nickname', 'user_name'],
|
|
130
|
+
'简介': ['desc', 'description', 'Desc', 'Description', 'content', 'Content', 'note_desc', 'text'],
|
|
131
|
+
'视频时长': ['duration', 'Duration', 'time', 'Time', 'video_duration'],
|
|
132
|
+
'点赞数': ['like', 'Like', 'attitudes_count', 'digg_count', 'praise', 'stat.like', 'liked_count'],
|
|
133
|
+
'投币数': ['coin', 'Coin', 'bi', 'Bi', 'stat.coin'],
|
|
134
|
+
'收藏数': ['collect', 'Collect', 'favorite', 'Favorite', 'star', 'Star', 'stat.collect', 'collected_count'],
|
|
135
|
+
'转发数': ['share', 'Share', 'forward', 'Forward', 'repost', 'stat.share', 'reposts_count', 'shared_count'],
|
|
136
|
+
'播放数': ['view', 'View', 'play_count', 'PlayCount', 'play', 'stat.view', 'play_times'],
|
|
137
137
|
'评论数': ['comment', 'Comment', 'comments_count', 'comment_count', 'discuss', 'stat.comment'],
|
|
138
|
-
'音乐名': ['music.title', 'music_name', 'audio_name', 'sound_name', 'muisic', 'music']
|
|
138
|
+
'音乐名': ['music.title', 'music_name', 'audio_name', 'sound_name', 'muisic', 'music', 'bgm_name']
|
|
139
139
|
};
|
|
140
140
|
function getErrorInfo(code, detail) {
|
|
141
141
|
const baseMsg = exports.ErrorMessageMap[code] || exports.ErrorMessageMap[ErrorCode.UNKNOWN_ERROR];
|
|
@@ -303,19 +303,18 @@ function cleanUrl(url) {
|
|
|
303
303
|
url = url.replace(/&/g, '&');
|
|
304
304
|
const urlObj = new URL(url);
|
|
305
305
|
if (urlObj.hostname.includes('xiaohongshu.com')) {
|
|
306
|
-
urlObj.searchParams.delete(
|
|
307
|
-
urlObj.searchParams.delete('xhsshare');
|
|
308
|
-
urlObj.searchParams.delete('xsec_token');
|
|
309
|
-
urlObj.searchParams.delete('xsec_source');
|
|
306
|
+
urlObj.searchParams.forEach((_, key) => urlObj.searchParams.delete(key));
|
|
310
307
|
return urlObj.origin + urlObj.pathname;
|
|
311
308
|
}
|
|
312
309
|
if (urlObj.hostname.includes('douyin.com') || urlObj.hostname.includes('v.douyin.com')) {
|
|
310
|
+
urlObj.searchParams.delete('source');
|
|
311
|
+
urlObj.searchParams.delete('share_type');
|
|
313
312
|
return urlObj.origin + urlObj.pathname;
|
|
314
313
|
}
|
|
315
314
|
return url;
|
|
316
315
|
}
|
|
317
316
|
catch (e) {
|
|
318
|
-
return url.replace(/&/g, '&');
|
|
317
|
+
return url.replace(/&/g, '&').replace(/\?.*/, '');
|
|
319
318
|
}
|
|
320
319
|
}
|
|
321
320
|
async function resolveShortUrl(url) {
|
|
@@ -324,7 +323,9 @@ async function resolveShortUrl(url) {
|
|
|
324
323
|
timeout: 10000,
|
|
325
324
|
maxRedirects: 10,
|
|
326
325
|
headers: {
|
|
327
|
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
|
326
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
|
327
|
+
'Referer': 'https://www.baidu.com/',
|
|
328
|
+
'Cookie': 'xhsTrackerId=xxx; xhs_sessionId=xxx'
|
|
328
329
|
}
|
|
329
330
|
});
|
|
330
331
|
return cleanUrl(res.request.res?.responseUrl || url);
|
|
@@ -352,7 +353,7 @@ function formatDuration(input) {
|
|
|
352
353
|
: `${minutes.toString().padStart(2, '0')}:${secs.toString().padStart(2, '0')}`;
|
|
353
354
|
}
|
|
354
355
|
function getNestedValue(obj, path) {
|
|
355
|
-
if (!obj || typeof obj !== 'object')
|
|
356
|
+
if (!obj || typeof obj !== 'object' || !path)
|
|
356
357
|
return undefined;
|
|
357
358
|
const keys = path.split('.');
|
|
358
359
|
let value = obj;
|
|
@@ -364,21 +365,23 @@ function getNestedValue(obj, path) {
|
|
|
364
365
|
return value;
|
|
365
366
|
}
|
|
366
367
|
function findValueInObject(obj, keys) {
|
|
367
|
-
if (!obj || typeof obj !== 'object')
|
|
368
|
+
if (!obj || typeof obj !== 'object' || !keys || keys.length === 0)
|
|
368
369
|
return undefined;
|
|
369
370
|
for (const key of keys) {
|
|
370
371
|
if (key.includes('.')) {
|
|
371
372
|
const value = getNestedValue(obj, key);
|
|
372
|
-
if (value !== undefined && value !== null && value !== '')
|
|
373
|
+
if (value !== undefined && value !== null && value !== '' && value !== 0)
|
|
373
374
|
return value;
|
|
374
375
|
}
|
|
375
376
|
else {
|
|
376
|
-
if (obj[key] !== undefined && obj[key] !== null && obj[key] !== '')
|
|
377
|
+
if (obj[key] !== undefined && obj[key] !== null && obj[key] !== '' && obj[key] !== 0)
|
|
377
378
|
return obj[key];
|
|
378
379
|
const lowerKey = key.toLowerCase();
|
|
379
380
|
for (const objKey of Object.keys(obj)) {
|
|
380
381
|
if (objKey.toLowerCase() === lowerKey) {
|
|
381
|
-
|
|
382
|
+
const val = obj[objKey];
|
|
383
|
+
if (val !== undefined && val !== null && val !== '' && val !== 0)
|
|
384
|
+
return val;
|
|
382
385
|
}
|
|
383
386
|
}
|
|
384
387
|
}
|
|
@@ -386,22 +389,11 @@ function findValueInObject(obj, keys) {
|
|
|
386
389
|
return undefined;
|
|
387
390
|
}
|
|
388
391
|
function parseData(rawResponse, maxDescLength) {
|
|
389
|
-
const
|
|
392
|
+
const rootData = rawResponse || {};
|
|
393
|
+
const data = rootData.data || rootData.result || rootData || {};
|
|
390
394
|
const stat = {};
|
|
391
395
|
Object.entries(VARIABLE_MAPPING).forEach(([varName, keys]) => {
|
|
392
|
-
|
|
393
|
-
value = value || getNestedValue(data, 'title');
|
|
394
|
-
value = value || getNestedValue(data, 'author.name');
|
|
395
|
-
value = value || getNestedValue(data, 'desc');
|
|
396
|
-
value = value || getNestedValue(data, 'duration');
|
|
397
|
-
value = value || getNestedValue(data, 'like');
|
|
398
|
-
value = value || getNestedValue(data, 'coin');
|
|
399
|
-
value = value || getNestedValue(data, 'collect');
|
|
400
|
-
value = value || getNestedValue(data, 'share');
|
|
401
|
-
value = value || getNestedValue(data, 'view');
|
|
402
|
-
value = value || getNestedValue(data, 'comment');
|
|
403
|
-
value = value || getNestedValue(data, 'music.title');
|
|
404
|
-
value = value || findValueInObject(data, keys);
|
|
396
|
+
const value = findValueInObject(data, keys) || findValueInObject(rootData, keys);
|
|
405
397
|
if (value !== undefined)
|
|
406
398
|
stat[varName] = value;
|
|
407
399
|
});
|
|
@@ -410,34 +402,49 @@ function parseData(rawResponse, maxDescLength) {
|
|
|
410
402
|
type = data.jx.type;
|
|
411
403
|
else if (data.type)
|
|
412
404
|
type = data.type;
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
405
|
+
else if (data.images && data.images.length > 0)
|
|
406
|
+
type = 'image';
|
|
407
|
+
else if (data.pics && data.pics.length > 0)
|
|
408
|
+
type = '图集';
|
|
409
|
+
const title = findValueInObject(data, VARIABLE_MAPPING['标题']) || findValueInObject(rootData, VARIABLE_MAPPING['标题']) || '无标题';
|
|
410
|
+
const author = findValueInObject(data, VARIABLE_MAPPING['作者']) || findValueInObject(rootData, VARIABLE_MAPPING['作者']) || '未知作者';
|
|
411
|
+
let desc = findValueInObject(data, VARIABLE_MAPPING['简介']) || findValueInObject(rootData, VARIABLE_MAPPING['简介']) || title;
|
|
419
412
|
desc = desc.toString().slice(0, maxDescLength);
|
|
420
|
-
|
|
421
|
-
cover = data.video?.fm || data.item?.cover || data.cover || data.imgurl || data.pic || findValueInObject(data, ['cover', 'imgurl', 'pic']) || '';
|
|
413
|
+
const cover = findValueInObject(data, ['cover', 'imgurl', 'pic', 'thumbnail', 'cover_url']) || findValueInObject(rootData, ['cover', 'imgurl', 'pic', 'thumbnail', 'cover_url']) || '';
|
|
422
414
|
let images = [];
|
|
423
|
-
|
|
424
|
-
if (
|
|
425
|
-
images =
|
|
415
|
+
const imgList = findValueInObject(data, ['images', 'pics', 'pic_urls', 'image_list']) || findValueInObject(rootData, ['images', 'pics', 'pic_urls', 'image_list']) || [];
|
|
416
|
+
if (Array.isArray(imgList))
|
|
417
|
+
images = imgList.filter(img => img && typeof img === 'string');
|
|
418
|
+
else if (imgList && typeof imgList === 'string')
|
|
419
|
+
images = [imgList];
|
|
426
420
|
let video = '';
|
|
427
|
-
|
|
428
|
-
|
|
421
|
+
const videoUrls = [
|
|
422
|
+
findValueInObject(data, ['url', 'video_url', 'download_url', 'playUrl', 'mp4_url']),
|
|
423
|
+
findValueInObject(rootData, ['url', 'video_url', 'download_url', 'playUrl', 'mp4_url']),
|
|
424
|
+
data.video?.url,
|
|
425
|
+
data.item?.url,
|
|
426
|
+
rootData.video?.url,
|
|
427
|
+
rootData.item?.url
|
|
428
|
+
];
|
|
429
|
+
for (const url of videoUrls) {
|
|
430
|
+
if (url && typeof url === 'string' && url.startsWith('http')) {
|
|
431
|
+
video = url;
|
|
432
|
+
break;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
const durationValue = findValueInObject(data, VARIABLE_MAPPING['视频时长']) || findValueInObject(rootData, VARIABLE_MAPPING['视频时长']);
|
|
429
436
|
const duration = typeof durationValue === 'number' ? durationValue : parseInt(durationValue) || 0;
|
|
430
437
|
const durationFormatted = formatDuration(durationValue || 0);
|
|
431
|
-
const live_photo = data.live_photo || [];
|
|
432
|
-
const music = data
|
|
433
|
-
const h_w = data.item?.h_w || [];
|
|
434
|
-
const quality_urls = data.quality_urls || {};
|
|
435
|
-
const default_quality = data.default_quality || '';
|
|
436
|
-
const download_url = data.download_url || video;
|
|
437
|
-
const play_count =
|
|
438
|
-
const reposts_count =
|
|
439
|
-
const attitudes_count =
|
|
440
|
-
const comments_count =
|
|
438
|
+
const live_photo = data.live_photo || rootData.live_photo || [];
|
|
439
|
+
const music = findValueInObject(data, VARIABLE_MAPPING['音乐名']) || findValueInObject(rootData, VARIABLE_MAPPING['音乐名']) || '';
|
|
440
|
+
const h_w = data.item?.h_w || rootData.item?.h_w || [];
|
|
441
|
+
const quality_urls = data.quality_urls || rootData.quality_urls || {};
|
|
442
|
+
const default_quality = data.default_quality || rootData.default_quality || '';
|
|
443
|
+
const download_url = data.download_url || rootData.download_url || video;
|
|
444
|
+
const play_count = stat['播放数'] || '';
|
|
445
|
+
const reposts_count = stat['转发数'] || 0;
|
|
446
|
+
const attitudes_count = stat['点赞数'] || 0;
|
|
447
|
+
const comments_count = stat['评论数'] || 0;
|
|
441
448
|
return {
|
|
442
449
|
type: type,
|
|
443
450
|
rawData: rawResponse,
|
|
@@ -453,7 +460,7 @@ function parseData(rawResponse, maxDescLength) {
|
|
|
453
460
|
live_photo,
|
|
454
461
|
music,
|
|
455
462
|
h_w,
|
|
456
|
-
jx: data.jx || null,
|
|
463
|
+
jx: data.jx || rootData.jx || null,
|
|
457
464
|
quality_urls,
|
|
458
465
|
default_quality,
|
|
459
466
|
download_url,
|
|
@@ -465,13 +472,20 @@ function parseData(rawResponse, maxDescLength) {
|
|
|
465
472
|
}
|
|
466
473
|
function generateFormattedText(parseData, config) {
|
|
467
474
|
let format = config.unifiedMessageFormat;
|
|
468
|
-
|
|
469
|
-
|
|
475
|
+
if (!format)
|
|
476
|
+
format = '标题:${标题}\n作者:${作者}\n简介:${简介}';
|
|
477
|
+
const formatLines = format.split('\n');
|
|
470
478
|
const validLines = [];
|
|
471
479
|
formatLines.forEach((line) => {
|
|
480
|
+
if (!line.trim())
|
|
481
|
+
return;
|
|
472
482
|
let isValid = true;
|
|
473
483
|
let processedLine = line;
|
|
474
484
|
const varMatches = line.match(/\$\{([^}]+)\}/g) || [];
|
|
485
|
+
if (varMatches.length === 0) {
|
|
486
|
+
validLines.push(line);
|
|
487
|
+
return;
|
|
488
|
+
}
|
|
475
489
|
varMatches.forEach((varMatch) => {
|
|
476
490
|
const varName = varMatch.replace(/\$\{|\}/g, '');
|
|
477
491
|
const value = parseData.stat[varName];
|
|
@@ -486,7 +500,7 @@ function generateFormattedText(parseData, config) {
|
|
|
486
500
|
validLines.push(processedLine);
|
|
487
501
|
}
|
|
488
502
|
});
|
|
489
|
-
result = validLines.join('\n').trim();
|
|
503
|
+
let result = validLines.join('\n').trim();
|
|
490
504
|
if (!result) {
|
|
491
505
|
result = `标题:${parseData.title}\n作者:${parseData.author}\n简介:${parseData.desc}`;
|
|
492
506
|
}
|
|
@@ -530,23 +544,31 @@ function apply(ctx, config) {
|
|
|
530
544
|
clearAllCache();
|
|
531
545
|
const http = axios_1.default.create({
|
|
532
546
|
timeout: config.timeout,
|
|
533
|
-
headers: {
|
|
547
|
+
headers: {
|
|
548
|
+
'User-Agent': config.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
|
|
549
|
+
'Referer': 'https://www.baidu.com/',
|
|
550
|
+
'Content-Type': 'application/x-www-form-urlencoded'
|
|
551
|
+
}
|
|
534
552
|
});
|
|
535
553
|
async function parseWithRetry(url, platform, retryTimes) {
|
|
536
554
|
let lastError = null;
|
|
537
555
|
for (let i = 0; i <= retryTimes; i++) {
|
|
538
556
|
try {
|
|
539
|
-
const params = { url };
|
|
557
|
+
const params = { url, proxyurl: '' };
|
|
540
558
|
const res = await http.get(API_CONFIG[platform], {
|
|
541
559
|
params,
|
|
542
|
-
timeout: config.timeout
|
|
560
|
+
timeout: config.timeout,
|
|
561
|
+
headers: {
|
|
562
|
+
'X-Requested-With': 'XMLHttpRequest',
|
|
563
|
+
'Origin': platform === 'xiaohongshu' ? 'https://api.bugpk.com' : 'https://www.baidu.com'
|
|
564
|
+
}
|
|
543
565
|
});
|
|
544
566
|
return res.data;
|
|
545
567
|
}
|
|
546
568
|
catch (error) {
|
|
547
569
|
lastError = error;
|
|
548
570
|
if (i < retryTimes) {
|
|
549
|
-
await delay(config.retryInterval);
|
|
571
|
+
await delay(config.retryInterval * (i + 1));
|
|
550
572
|
}
|
|
551
573
|
}
|
|
552
574
|
}
|
|
@@ -571,9 +593,17 @@ function apply(ctx, config) {
|
|
|
571
593
|
}
|
|
572
594
|
try {
|
|
573
595
|
const resData = await parseWithRetry(realUrl, platform, config.retryTimes);
|
|
574
|
-
|
|
596
|
+
if (!resData || Object.keys(resData).length === 0) {
|
|
597
|
+
const code = ErrorCode.API_EMPTY_RESPONSE;
|
|
598
|
+
const msg = getErrorInfo(code, 'API返回空数据');
|
|
599
|
+
logger.error(`[${code}] ${url}`);
|
|
600
|
+
return { data: null, code, msg };
|
|
601
|
+
}
|
|
602
|
+
const isSuccess = resData.code === 0 || resData.code === 200 || resData.code === 1 ||
|
|
603
|
+
(resData.msg && (resData.msg.includes('解析成功') || resData.msg.includes('success'))) ||
|
|
604
|
+
!!resData.data || !!resData.result || !!resData.video || !!resData.images;
|
|
575
605
|
if (!isSuccess) {
|
|
576
|
-
const apiErrorMsg = resData.msg || '解析失败';
|
|
606
|
+
const apiErrorMsg = resData.msg || resData.error || '解析失败';
|
|
577
607
|
const code = ErrorCode.API_RETURN_ERROR;
|
|
578
608
|
const msg = getErrorInfo(code, apiErrorMsg);
|
|
579
609
|
logger.error(`[${code}] API返回错误: ${url}, 错误: ${apiErrorMsg}`);
|
|
@@ -588,7 +618,7 @@ function apply(ctx, config) {
|
|
|
588
618
|
parseResult.type === '图集';
|
|
589
619
|
if (!hasValidContent) {
|
|
590
620
|
const code = ErrorCode.NO_VIDEO_FOUND;
|
|
591
|
-
const msg = getErrorInfo(code, '
|
|
621
|
+
const msg = getErrorInfo(code, '链接有效但未获取到有效视频/图片内容,可能是私密/需要登录/已删除内容');
|
|
592
622
|
logger.warn(`[${code}] 解析成功但无有效内容: ${url}`);
|
|
593
623
|
return { data: null, code, msg };
|
|
594
624
|
}
|
|
@@ -613,7 +643,7 @@ function apply(ctx, config) {
|
|
|
613
643
|
if (errorMsg.includes('timeout')) {
|
|
614
644
|
code = ErrorCode.REQUEST_TIMEOUT;
|
|
615
645
|
}
|
|
616
|
-
else if (errorMsg.includes('Network') || errorMsg.includes('network')) {
|
|
646
|
+
else if (errorMsg.includes('Network') || errorMsg.includes('network') || errorMsg.includes('404') || errorMsg.includes('500')) {
|
|
617
647
|
code = ErrorCode.NETWORK_ERROR;
|
|
618
648
|
}
|
|
619
649
|
else {
|
|
@@ -699,14 +729,14 @@ function apply(ctx, config) {
|
|
|
699
729
|
}
|
|
700
730
|
}
|
|
701
731
|
if (errors.length > 0) {
|
|
702
|
-
const errorLines = errors.map(err => `【${err.url}】: ${err.msg}`);
|
|
732
|
+
const errorLines = errors.map(err => `【${err.url.slice(0, 50)}${err.url.length > 50 ? '...' : ''}】: ${err.msg}`);
|
|
703
733
|
const errorMsg = `❌ 解析失败列表(共${errors.length}个链接):\n${errorLines.join('\n')}`;
|
|
704
734
|
logger.error(`解析失败数量: ${errors.length}, 错误码列表: ${errors.map(e => e.code).join(', ')}`);
|
|
705
735
|
await sendTimeout(session, errorMsg);
|
|
706
736
|
await delay(500);
|
|
707
737
|
}
|
|
708
738
|
if (items.length === 0) {
|
|
709
|
-
const failMsg = getErrorInfo(ErrorCode.UNKNOWN_ERROR, '
|
|
739
|
+
const failMsg = getErrorInfo(ErrorCode.UNKNOWN_ERROR, '所有链接均解析失败,请检查链接是否有效/是否需要登录,或稍后重试');
|
|
710
740
|
await sendTimeout(session, `⚠ ${failMsg}`);
|
|
711
741
|
return;
|
|
712
742
|
}
|
package/package.json
CHANGED