koishi-plugin-douyin-local-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +41 -0
- package/lib/index.d.ts +27 -0
- package/lib/index.js +128 -0
- package/lib/parser.d.ts +45 -0
- package/lib/parser.js +335 -0
- package/package.json +34 -0
- package/src/index.ts +156 -0
- package/src/parser.ts +401 -0
package/README.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# koishi-plugin-douyin-parser
|
|
2
|
+
|
|
3
|
+
抖音链接解析 Koishi 插件,支持普通链接、短链和聊天平台卡片消息中的链接提取,可选合并转发。
|
|
4
|
+
|
|
5
|
+
解析逻辑参考了 [fllesser/nonebot-plugin-parser](https://github.com/fllesser/nonebot-plugin-parser) 的抖音解析实现,并按本地 `xhs-parser` 的 Koishi 插件结构改写。
|
|
6
|
+
|
|
7
|
+
## 功能
|
|
8
|
+
|
|
9
|
+
- 解析 `https://v.douyin.com/...`、`https://jx.douyin.com/...` 短链
|
|
10
|
+
- 解析 `https://www.douyin.com/video/...`
|
|
11
|
+
- 解析 `https://www.douyin.com/note/...`
|
|
12
|
+
- 解析 `https://www.iesdouyin.com/share/video|note|slides/...`
|
|
13
|
+
- 解析 `https://m.douyin.com/share/video|note|slides/...`
|
|
14
|
+
- 解析 `https://jingxuan.douyin.com/m/video|note|slides/...`
|
|
15
|
+
- 从 Koishi 卡片元素的 `data` 字段、转义 JSON、普通文本中提取抖音链接
|
|
16
|
+
- 支持图片、动图、视频、原文链接和作者信息返回
|
|
17
|
+
- 支持 OneBot / Red 适配器的合并转发元素
|
|
18
|
+
|
|
19
|
+
## 本地测试
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npm install
|
|
23
|
+
npm test
|
|
24
|
+
npm run build
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
也可以直接跑一次真实解析:
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
npm run dev -- "https://v.douyin.com/_2ljF4AmKL8/"
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Koishi 使用
|
|
34
|
+
|
|
35
|
+
构建后在 Koishi 配置中加载本地插件:
|
|
36
|
+
|
|
37
|
+
```yaml
|
|
38
|
+
plugins:
|
|
39
|
+
/absolute/path/to/douyin-parser/lib:
|
|
40
|
+
enabled: true
|
|
41
|
+
```
|
package/lib/index.d.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { Context, Schema } from 'koishi';
|
|
2
|
+
export declare const name = "douyin-parser";
|
|
3
|
+
export interface Config {
|
|
4
|
+
enabled: boolean;
|
|
5
|
+
parseMode: ('link' | 'card')[];
|
|
6
|
+
waitTip?: string | null;
|
|
7
|
+
useForward: boolean;
|
|
8
|
+
quote: boolean;
|
|
9
|
+
middleware: boolean;
|
|
10
|
+
parseLimit: number;
|
|
11
|
+
minimumInterval: number;
|
|
12
|
+
userAgent: string;
|
|
13
|
+
cookie?: string;
|
|
14
|
+
timeout: number;
|
|
15
|
+
showImages: boolean;
|
|
16
|
+
maxImages: number;
|
|
17
|
+
maxDescLength: number;
|
|
18
|
+
descTruncateSuffix: string;
|
|
19
|
+
showVideo: boolean;
|
|
20
|
+
showAuthor: boolean;
|
|
21
|
+
showLink: boolean;
|
|
22
|
+
showError: boolean;
|
|
23
|
+
loggerinfo: boolean;
|
|
24
|
+
}
|
|
25
|
+
export declare const Config: Schema<Config>;
|
|
26
|
+
export declare const usage = "\n\u53D1\u9001\u6296\u97F3\u94FE\u63A5\u6216\u5E73\u53F0\u5361\u7247\u5373\u53EF\u81EA\u52A8\u89E3\u6790\u3002\n\n\u652F\u6301\u793A\u4F8B\uFF1A\n\n- https://v.douyin.com/_2ljF4AmKL8/\n- https://www.douyin.com/video/7521023890996514083\n- https://www.douyin.com/note/7469411074119322899\n- https://www.iesdouyin.com/share/video/7521023890996514083\n";
|
|
27
|
+
export declare function apply(ctx: Context, config: Config): void;
|
package/lib/index.js
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.usage = exports.Config = exports.name = void 0;
|
|
4
|
+
exports.apply = apply;
|
|
5
|
+
const koishi_1 = require("koishi");
|
|
6
|
+
const parser_1 = require("./parser");
|
|
7
|
+
exports.name = 'douyin-parser';
|
|
8
|
+
const logger = new koishi_1.Logger(exports.name);
|
|
9
|
+
exports.Config = koishi_1.Schema.intersect([
|
|
10
|
+
koishi_1.Schema.object({
|
|
11
|
+
enabled: koishi_1.Schema.boolean().default(true).description('开启抖音链接/卡片解析。'),
|
|
12
|
+
parseMode: koishi_1.Schema.array(koishi_1.Schema.union([
|
|
13
|
+
koishi_1.Schema.const('link').description('普通链接'),
|
|
14
|
+
koishi_1.Schema.const('card').description('卡片消息'),
|
|
15
|
+
])).role('checkbox').default(['link', 'card']).description('选择解析来源。'),
|
|
16
|
+
waitTip: koishi_1.Schema.union([
|
|
17
|
+
koishi_1.Schema.const(null).description('不发送提示'),
|
|
18
|
+
koishi_1.Schema.string().description('解析前发送提示语').default('正在解析抖音链接...'),
|
|
19
|
+
]).default(null).description('等待提示。'),
|
|
20
|
+
}).description('基础设置'),
|
|
21
|
+
koishi_1.Schema.object({
|
|
22
|
+
useForward: koishi_1.Schema.boolean().default(false).description('开启合并转发。主要适用于 onebot / red 适配器。').experimental(),
|
|
23
|
+
quote: koishi_1.Schema.boolean().default(true).description('普通发送时引用原消息。'),
|
|
24
|
+
middleware: koishi_1.Schema.boolean().default(false).description('以前置中间件模式捕获消息。').experimental(),
|
|
25
|
+
parseLimit: koishi_1.Schema.number().min(1).max(10).step(1).default(3).description('单条消息最多解析的链接数量。'),
|
|
26
|
+
minimumInterval: koishi_1.Schema.number().min(0).max(3600).step(1).default(180).description('同频道同链接去重间隔,单位秒。0 表示不去重。'),
|
|
27
|
+
}).description('发送设置'),
|
|
28
|
+
koishi_1.Schema.object({
|
|
29
|
+
showImages: koishi_1.Schema.boolean().default(true).description('返回图片/动图。'),
|
|
30
|
+
maxImages: koishi_1.Schema.number().min(0).max(18).step(1).default(9).description('单个作品最多发送图片数。'),
|
|
31
|
+
maxDescLength: koishi_1.Schema.number().min(0).max(2000).step(10).default(160).description('描述最大字数。设为 0 时不展示描述。'),
|
|
32
|
+
descTruncateSuffix: koishi_1.Schema.string().default('...(已截断)').description('描述超出最大字数时追加的截断标志。'),
|
|
33
|
+
showVideo: koishi_1.Schema.boolean().default(true).description('返回视频元素。'),
|
|
34
|
+
showAuthor: koishi_1.Schema.boolean().default(true).description('展示作者。'),
|
|
35
|
+
showLink: koishi_1.Schema.boolean().default(true).description('展示原文链接。'),
|
|
36
|
+
}).description('内容设置'),
|
|
37
|
+
koishi_1.Schema.object({
|
|
38
|
+
userAgent: koishi_1.Schema.string().default('Mozilla/5.0 (iPhone; CPU iPhone OS 18_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Mobile/15E148 Safari/604.1').description('请求抖音页面时使用的 User-Agent。'),
|
|
39
|
+
cookie: koishi_1.Schema.string().role('textarea').default('').description('可选 Cookie。遇到风控或无法读取页面数据时可填写。'),
|
|
40
|
+
timeout: koishi_1.Schema.number().min(3).max(60).step(1).default(15).description('请求超时时间,单位秒。'),
|
|
41
|
+
showError: koishi_1.Schema.boolean().default(false).description('解析失败时向聊天发送错误提示。'),
|
|
42
|
+
loggerinfo: koishi_1.Schema.boolean().default(false).description('输出调试日志。').experimental(),
|
|
43
|
+
}).description('网络与调试'),
|
|
44
|
+
]);
|
|
45
|
+
exports.usage = `
|
|
46
|
+
发送抖音链接或平台卡片即可自动解析。
|
|
47
|
+
|
|
48
|
+
支持示例:
|
|
49
|
+
|
|
50
|
+
- https://v.douyin.com/_2ljF4AmKL8/
|
|
51
|
+
- https://www.douyin.com/video/7521023890996514083
|
|
52
|
+
- https://www.douyin.com/note/7469411074119322899
|
|
53
|
+
- https://www.iesdouyin.com/share/video/7521023890996514083
|
|
54
|
+
`;
|
|
55
|
+
function apply(ctx, config) {
|
|
56
|
+
if (!config.enabled)
|
|
57
|
+
return;
|
|
58
|
+
const recent = new Map();
|
|
59
|
+
ctx.middleware(async (session, next) => {
|
|
60
|
+
const content = session.content || session.stripped?.content || '';
|
|
61
|
+
const isCard = /^<\w+\s/i.test(content) || content.includes('data=');
|
|
62
|
+
if (isCard && !config.parseMode.includes('card'))
|
|
63
|
+
return next();
|
|
64
|
+
if (!isCard && !config.parseMode.includes('link'))
|
|
65
|
+
return next();
|
|
66
|
+
const links = (0, parser_1.extractDouyinLinks)(content).slice(0, config.parseLimit);
|
|
67
|
+
if (!links.length)
|
|
68
|
+
return next();
|
|
69
|
+
const targets = links.filter((link) => shouldProcess(recent, session.channelId || session.guildId || 'private', link, config.minimumInterval));
|
|
70
|
+
if (!targets.length)
|
|
71
|
+
return next();
|
|
72
|
+
handleLinks(session, targets, config).catch((error) => {
|
|
73
|
+
logger.warn(error);
|
|
74
|
+
});
|
|
75
|
+
return next();
|
|
76
|
+
}, config.middleware);
|
|
77
|
+
}
|
|
78
|
+
async function handleLinks(session, links, config) {
|
|
79
|
+
let waitTipMessageId;
|
|
80
|
+
if (config.waitTip) {
|
|
81
|
+
const result = await session.send(`${koishi_1.h.quote(session.messageId)}${config.waitTip}`);
|
|
82
|
+
waitTipMessageId = Array.isArray(result) ? result[0] : result;
|
|
83
|
+
}
|
|
84
|
+
try {
|
|
85
|
+
const allMessages = [];
|
|
86
|
+
for (const link of links) {
|
|
87
|
+
if (config.loggerinfo)
|
|
88
|
+
logger.info(`parse ${link}`);
|
|
89
|
+
const post = await (0, parser_1.fetchDouyinPost)(link, config);
|
|
90
|
+
allMessages.push(...(0, parser_1.buildDouyinMessages)(post, config, session));
|
|
91
|
+
}
|
|
92
|
+
if (!allMessages.length)
|
|
93
|
+
return;
|
|
94
|
+
if (config.useForward && (session.platform === 'onebot' || session.platform === 'red')) {
|
|
95
|
+
await session.send((0, koishi_1.h)('figure', { children: allMessages }));
|
|
96
|
+
return;
|
|
97
|
+
}
|
|
98
|
+
if (config.quote) {
|
|
99
|
+
await session.send((0, koishi_1.h)('message', koishi_1.h.quote(session.messageId), allMessages[0].children));
|
|
100
|
+
for (const message of allMessages.slice(1))
|
|
101
|
+
await session.send(message);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
for (const message of allMessages)
|
|
105
|
+
await session.send(message);
|
|
106
|
+
}
|
|
107
|
+
catch (error) {
|
|
108
|
+
logger.warn(error);
|
|
109
|
+
if (config.showError)
|
|
110
|
+
await session.send(`抖音解析失败:${error instanceof Error ? error.message : String(error)}`);
|
|
111
|
+
}
|
|
112
|
+
finally {
|
|
113
|
+
if (waitTipMessageId) {
|
|
114
|
+
await session.bot?.deleteMessage?.(session.channelId, waitTipMessageId).catch?.(() => undefined);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
function shouldProcess(recent, channelId, link, seconds) {
|
|
119
|
+
if (seconds <= 0)
|
|
120
|
+
return true;
|
|
121
|
+
const key = `${channelId}:${link}`;
|
|
122
|
+
const now = Date.now();
|
|
123
|
+
const last = recent.get(key);
|
|
124
|
+
if (last && now - last < seconds * 1000)
|
|
125
|
+
return false;
|
|
126
|
+
recent.set(key, now);
|
|
127
|
+
return true;
|
|
128
|
+
}
|
package/lib/parser.d.ts
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import { h } from 'koishi';
|
|
2
|
+
export interface DouyinConfigLike {
|
|
3
|
+
userAgent: string;
|
|
4
|
+
timeout: number;
|
|
5
|
+
showVideo: boolean;
|
|
6
|
+
showImages: boolean;
|
|
7
|
+
maxImages: number;
|
|
8
|
+
maxDescLength: number;
|
|
9
|
+
descTruncateSuffix: string;
|
|
10
|
+
showAuthor: boolean;
|
|
11
|
+
showLink: boolean;
|
|
12
|
+
cookie?: string;
|
|
13
|
+
}
|
|
14
|
+
export interface DouyinPost {
|
|
15
|
+
id: string;
|
|
16
|
+
url: string;
|
|
17
|
+
title: string;
|
|
18
|
+
desc: string;
|
|
19
|
+
type: 'video' | 'note' | 'slides' | 'unknown';
|
|
20
|
+
authorName: string;
|
|
21
|
+
authorAvatar?: string;
|
|
22
|
+
createTime?: number;
|
|
23
|
+
duration?: number;
|
|
24
|
+
coverUrl?: string;
|
|
25
|
+
imageUrls: string[];
|
|
26
|
+
dynamicImageUrls: string[];
|
|
27
|
+
videoUrls: string[];
|
|
28
|
+
}
|
|
29
|
+
interface ParsedDouyinUrl {
|
|
30
|
+
url: string;
|
|
31
|
+
id: string;
|
|
32
|
+
type: 'video' | 'note' | 'slides' | 'unknown';
|
|
33
|
+
}
|
|
34
|
+
export declare function extractDouyinLinks(content: string): string[];
|
|
35
|
+
export declare function resolveDouyinLink(rawUrl: string, config: DouyinConfigLike): Promise<string>;
|
|
36
|
+
export declare function fetchDouyinPost(rawUrl: string, config: DouyinConfigLike): Promise<DouyinPost>;
|
|
37
|
+
export declare function buildDouyinMessages(post: DouyinPost, config: DouyinConfigLike, session?: {
|
|
38
|
+
userId?: string;
|
|
39
|
+
username?: string;
|
|
40
|
+
author?: {
|
|
41
|
+
nickname?: string;
|
|
42
|
+
};
|
|
43
|
+
}): h[];
|
|
44
|
+
export declare function extractRouterDataPost(html: string, parsed?: ParsedDouyinUrl, sourceUrl?: string): DouyinPost | null;
|
|
45
|
+
export {};
|
package/lib/parser.js
ADDED
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.extractDouyinLinks = extractDouyinLinks;
|
|
4
|
+
exports.resolveDouyinLink = resolveDouyinLink;
|
|
5
|
+
exports.fetchDouyinPost = fetchDouyinPost;
|
|
6
|
+
exports.buildDouyinMessages = buildDouyinMessages;
|
|
7
|
+
exports.extractRouterDataPost = extractRouterDataPost;
|
|
8
|
+
const koishi_1 = require("koishi");
|
|
9
|
+
const URL_BOUNDARY = '[^\\s"\'<>\\\\^`{|},。;!?、【】《》]+';
|
|
10
|
+
const LINK_PATTERNS = [
|
|
11
|
+
new RegExp(`https?://v\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
|
|
12
|
+
new RegExp(`https?://jx\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
|
|
13
|
+
new RegExp(`https?://(?:www\\.)?douyin\\.com/(?:video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
14
|
+
new RegExp(`https?://(?:www\\.)?iesdouyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
15
|
+
new RegExp(`https?://m\\.douyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
16
|
+
new RegExp(`https?://jingxuan\\.douyin\\.com/m/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
17
|
+
];
|
|
18
|
+
const IOS_HEADERS = {
|
|
19
|
+
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
20
|
+
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
21
|
+
referer: 'https://www.douyin.com/',
|
|
22
|
+
};
|
|
23
|
+
const ANDROID_HEADERS = {
|
|
24
|
+
accept: 'application/json,text/plain,*/*',
|
|
25
|
+
referer: 'https://www.douyin.com/',
|
|
26
|
+
};
|
|
27
|
+
function extractDouyinLinks(content) {
|
|
28
|
+
const candidates = expandTextCandidates(content);
|
|
29
|
+
const links = [];
|
|
30
|
+
for (const candidate of candidates) {
|
|
31
|
+
const normalized = normalizeText(candidate);
|
|
32
|
+
for (const pattern of LINK_PATTERNS) {
|
|
33
|
+
pattern.lastIndex = 0;
|
|
34
|
+
let match;
|
|
35
|
+
while ((match = pattern.exec(normalized))) {
|
|
36
|
+
links.push(cleanUrl(match[0]));
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
return [...new Set(links)];
|
|
41
|
+
}
|
|
42
|
+
async function resolveDouyinLink(rawUrl, config) {
|
|
43
|
+
const url = ensureProtocol(rawUrl);
|
|
44
|
+
if (!/(?:v|jx)\.douyin\.com/i.test(url))
|
|
45
|
+
return url;
|
|
46
|
+
const response = await fetchWithTimeout(url, config, { redirect: 'manual', headers: IOS_HEADERS });
|
|
47
|
+
const location = response.headers.get('location');
|
|
48
|
+
if (location)
|
|
49
|
+
return new URL(location, url).toString();
|
|
50
|
+
if (response.status >= 300 && response.status < 400)
|
|
51
|
+
return url;
|
|
52
|
+
const followed = await fetchWithTimeout(url, config, { redirect: 'follow', headers: IOS_HEADERS });
|
|
53
|
+
return followed.url || url;
|
|
54
|
+
}
|
|
55
|
+
async function fetchDouyinPost(rawUrl, config) {
|
|
56
|
+
const resolvedUrl = await resolveDouyinLink(rawUrl, config);
|
|
57
|
+
const parsed = parseDouyinUrl(resolvedUrl);
|
|
58
|
+
if (!parsed.id)
|
|
59
|
+
throw new Error('未能识别抖音作品 ID。');
|
|
60
|
+
if (parsed.type === 'slides')
|
|
61
|
+
return fetchSlidesPost(parsed, config);
|
|
62
|
+
const errors = [];
|
|
63
|
+
for (const url of buildPageCandidates(parsed)) {
|
|
64
|
+
try {
|
|
65
|
+
const html = await fetchText(url, config, IOS_HEADERS);
|
|
66
|
+
const post = extractRouterDataPost(html, parsed, url);
|
|
67
|
+
if (post)
|
|
68
|
+
return post;
|
|
69
|
+
errors.push(`${url}: 未找到 _ROUTER_DATA`);
|
|
70
|
+
}
|
|
71
|
+
catch (error) {
|
|
72
|
+
errors.push(`${url}: ${error instanceof Error ? error.message : String(error)}`);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
try {
|
|
76
|
+
return await fetchSlidesPost(parsed, config);
|
|
77
|
+
}
|
|
78
|
+
catch (error) {
|
|
79
|
+
errors.push(`slidesinfo: ${error instanceof Error ? error.message : String(error)}`);
|
|
80
|
+
}
|
|
81
|
+
throw new Error(`抖音解析失败:${errors.join(';')}`);
|
|
82
|
+
}
|
|
83
|
+
function buildDouyinMessages(post, config, session) {
|
|
84
|
+
const messages = [];
|
|
85
|
+
const attrs = {
|
|
86
|
+
userId: session?.userId,
|
|
87
|
+
nickname: session?.author?.nickname || session?.username,
|
|
88
|
+
};
|
|
89
|
+
messages.push((0, koishi_1.h)('message', attrs, koishi_1.h.text(formatPostText(post, config))));
|
|
90
|
+
if (config.showImages) {
|
|
91
|
+
for (const imageUrl of post.imageUrls.slice(0, config.maxImages)) {
|
|
92
|
+
messages.push((0, koishi_1.h)('message', attrs, koishi_1.h.image(imageUrl)));
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
if (config.showVideo) {
|
|
96
|
+
for (const videoUrl of [...post.dynamicImageUrls, ...post.videoUrls].slice(0, 1)) {
|
|
97
|
+
messages.push((0, koishi_1.h)('message', attrs, koishi_1.h.video(videoUrl)));
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
return messages;
|
|
101
|
+
}
|
|
102
|
+
function extractRouterDataPost(html, parsed = { id: '', type: 'unknown', url: '' }, sourceUrl = parsed.url) {
|
|
103
|
+
const match = html.match(/window\._ROUTER_DATA\s*=\s*([\s\S]*?)<\/script>/);
|
|
104
|
+
if (!match?.[1])
|
|
105
|
+
return null;
|
|
106
|
+
const routerData = parseJsonLike(match[1].trim().replace(/;+\s*$/, ''));
|
|
107
|
+
const item = firstDefined(deepGet(routerData, ['loaderData', 'video_(id)/page', 'videoInfoRes', 'item_list', 0]), deepGet(routerData, ['loaderData', 'note_(id)/page', 'videoInfoRes', 'item_list', 0]));
|
|
108
|
+
if (!item)
|
|
109
|
+
return null;
|
|
110
|
+
return buildPostFromAweme(item, parsed, sourceUrl);
|
|
111
|
+
}
|
|
112
|
+
function parseDouyinUrl(url) {
|
|
113
|
+
const normalized = ensureProtocol(url);
|
|
114
|
+
const match = normalized.match(/(?:douyin\.com\/(?:video|note)|(?:iesdouyin|m\.douyin)\.com\/share\/(?:slides|video|note)|jingxuan\.douyin\.com\/m\/(?:slides|video|note))\/(\d+)/i);
|
|
115
|
+
const typeMatch = normalized.match(/douyin\.com\/(video|note)\/\d+|(?:iesdouyin|m\.douyin)\.com\/share\/(slides|video|note)\/\d+|jingxuan\.douyin\.com\/m\/(slides|video|note)\/\d+/i);
|
|
116
|
+
return {
|
|
117
|
+
url: normalized,
|
|
118
|
+
id: match?.[1] || '',
|
|
119
|
+
type: (typeMatch?.[1] || typeMatch?.[2] || typeMatch?.[3] || 'unknown'),
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
function buildPageCandidates(parsed) {
|
|
123
|
+
const type = parsed.type === 'unknown' ? 'video' : parsed.type;
|
|
124
|
+
return [
|
|
125
|
+
`https://m.douyin.com/share/${type}/${parsed.id}`,
|
|
126
|
+
`https://www.iesdouyin.com/share/${type}/${parsed.id}`,
|
|
127
|
+
];
|
|
128
|
+
}
|
|
129
|
+
async function fetchSlidesPost(parsed, config) {
|
|
130
|
+
const api = new URL('https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/');
|
|
131
|
+
api.searchParams.set('aweme_ids', `[${parsed.id}]`);
|
|
132
|
+
api.searchParams.set('request_source', '200');
|
|
133
|
+
const response = await fetchWithTimeout(api.toString(), config, { redirect: 'follow', headers: ANDROID_HEADERS });
|
|
134
|
+
if (!response.ok)
|
|
135
|
+
throw new Error(`请求抖音图集接口失败:HTTP ${response.status}`);
|
|
136
|
+
const data = await response.json();
|
|
137
|
+
const item = data?.aweme_details?.[0];
|
|
138
|
+
if (!item)
|
|
139
|
+
throw new Error('图集接口未返回作品数据。');
|
|
140
|
+
return buildPostFromAweme(item, { ...parsed, type: 'slides' }, parsed.url);
|
|
141
|
+
}
|
|
142
|
+
function buildPostFromAweme(item, parsed, sourceUrl) {
|
|
143
|
+
const video = item?.video;
|
|
144
|
+
const images = Array.isArray(item?.images) ? item.images : [];
|
|
145
|
+
const imageUrls = [];
|
|
146
|
+
const dynamicImageUrls = [];
|
|
147
|
+
for (const image of images) {
|
|
148
|
+
const dynamicUrl = pickUrl(image?.video?.play_addr?.url_list);
|
|
149
|
+
if (dynamicUrl) {
|
|
150
|
+
dynamicImageUrls.push(removeWatermark(dynamicUrl));
|
|
151
|
+
continue;
|
|
152
|
+
}
|
|
153
|
+
const imageUrl = pickUrl(image?.url_list);
|
|
154
|
+
if (imageUrl)
|
|
155
|
+
imageUrls.push(imageUrl);
|
|
156
|
+
}
|
|
157
|
+
const hasImageContent = imageUrls.length > 0 || dynamicImageUrls.length > 0;
|
|
158
|
+
const videoUrl = hasImageContent ? undefined : pickUrl(video?.play_addr?.url_list);
|
|
159
|
+
const coverUrl = pickUrl(video?.cover?.url_list);
|
|
160
|
+
const avatarUrl = pickUrl(item?.author?.avatar_thumb?.url_list) || pickUrl(item?.author?.avatar_medium?.url_list);
|
|
161
|
+
return {
|
|
162
|
+
id: parsed.id || String(item?.aweme_id || ''),
|
|
163
|
+
url: canonicalUrl(parsed, sourceUrl),
|
|
164
|
+
title: String(item?.desc || '抖音作品'),
|
|
165
|
+
desc: String(item?.desc || ''),
|
|
166
|
+
type: parsed.type,
|
|
167
|
+
authorName: String(item?.author?.nickname || '未知作者'),
|
|
168
|
+
authorAvatar: avatarUrl,
|
|
169
|
+
createTime: numberOrUndefined(item?.create_time),
|
|
170
|
+
duration: typeof video?.duration === 'number' ? Math.round(video.duration / 1000) : undefined,
|
|
171
|
+
coverUrl,
|
|
172
|
+
imageUrls: unique(imageUrls),
|
|
173
|
+
dynamicImageUrls: unique(dynamicImageUrls),
|
|
174
|
+
videoUrls: videoUrl ? [removeWatermark(videoUrl)] : [],
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
function formatPostText(post, config) {
|
|
178
|
+
const lines = [`抖音:${post.title || '抖音作品'}`];
|
|
179
|
+
if (config.showAuthor)
|
|
180
|
+
lines.push(`作者:${post.authorName}`);
|
|
181
|
+
if (post.desc && post.desc !== post.title && config.maxDescLength > 0) {
|
|
182
|
+
lines.push(trimText(post.desc, config.maxDescLength, config.descTruncateSuffix));
|
|
183
|
+
}
|
|
184
|
+
if (config.showLink)
|
|
185
|
+
lines.push(post.url);
|
|
186
|
+
return lines.join('\n');
|
|
187
|
+
}
|
|
188
|
+
function expandTextCandidates(content) {
|
|
189
|
+
const values = new Set([content]);
|
|
190
|
+
try {
|
|
191
|
+
for (const element of koishi_1.h.parse(content))
|
|
192
|
+
collectElementText(element, values);
|
|
193
|
+
}
|
|
194
|
+
catch {
|
|
195
|
+
// Some adapters deliver partial XML snippets; regex extraction below still handles them.
|
|
196
|
+
}
|
|
197
|
+
for (const match of content.matchAll(/\bdata=(?:"([^"]*)"|'([^']*)')/gi)) {
|
|
198
|
+
values.add(match[1] || match[2] || '');
|
|
199
|
+
}
|
|
200
|
+
for (const value of [...values]) {
|
|
201
|
+
const decoded = decodeHtmlEntities(value);
|
|
202
|
+
values.add(decoded);
|
|
203
|
+
maybeCollectJsonValues(decoded, values);
|
|
204
|
+
}
|
|
205
|
+
return [...values];
|
|
206
|
+
}
|
|
207
|
+
function collectElementText(element, values) {
|
|
208
|
+
if (typeof element === 'string') {
|
|
209
|
+
values.add(element);
|
|
210
|
+
return;
|
|
211
|
+
}
|
|
212
|
+
for (const value of Object.values(element.attrs || {})) {
|
|
213
|
+
if (typeof value === 'string')
|
|
214
|
+
values.add(value);
|
|
215
|
+
}
|
|
216
|
+
for (const child of element.children || []) {
|
|
217
|
+
collectElementText(child, values);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
function maybeCollectJsonValues(text, values) {
|
|
221
|
+
try {
|
|
222
|
+
const json = JSON.parse(text);
|
|
223
|
+
walkJson(json, values);
|
|
224
|
+
}
|
|
225
|
+
catch {
|
|
226
|
+
const unescaped = text.replace(/\\"/g, '"').replace(/\\\//g, '/');
|
|
227
|
+
if (unescaped !== text)
|
|
228
|
+
values.add(unescaped);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
function walkJson(value, values) {
|
|
232
|
+
if (typeof value === 'string') {
|
|
233
|
+
values.add(value);
|
|
234
|
+
return;
|
|
235
|
+
}
|
|
236
|
+
if (Array.isArray(value)) {
|
|
237
|
+
for (const item of value)
|
|
238
|
+
walkJson(item, values);
|
|
239
|
+
return;
|
|
240
|
+
}
|
|
241
|
+
if (value && typeof value === 'object') {
|
|
242
|
+
for (const item of Object.values(value))
|
|
243
|
+
walkJson(item, values);
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
function normalizeText(text) {
|
|
247
|
+
let value = decodeHtmlEntities(text).replace(/\\\//g, '/');
|
|
248
|
+
try {
|
|
249
|
+
value = decodeURIComponent(value);
|
|
250
|
+
}
|
|
251
|
+
catch {
|
|
252
|
+
// Keep the original if it is only partly percent-encoded.
|
|
253
|
+
}
|
|
254
|
+
return value;
|
|
255
|
+
}
|
|
256
|
+
function decodeHtmlEntities(text) {
|
|
257
|
+
return text
|
|
258
|
+
.replace(/"/g, '"')
|
|
259
|
+
.replace(/"/g, '"')
|
|
260
|
+
.replace(/"/gi, '"')
|
|
261
|
+
.replace(/'/g, "'")
|
|
262
|
+
.replace(/'/g, "'")
|
|
263
|
+
.replace(/'/gi, "'")
|
|
264
|
+
.replace(/&/g, '&')
|
|
265
|
+
.replace(/</g, '<')
|
|
266
|
+
.replace(/>/g, '>');
|
|
267
|
+
}
|
|
268
|
+
function cleanUrl(url) {
|
|
269
|
+
return ensureProtocol(url)
|
|
270
|
+
.replace(/[),,。;;!?!]+$/g, '')
|
|
271
|
+
.replace(/&/g, '&');
|
|
272
|
+
}
|
|
273
|
+
function ensureProtocol(url) {
|
|
274
|
+
return /^https?:\/\//i.test(url) ? url : `https://${url}`;
|
|
275
|
+
}
|
|
276
|
+
async function fetchText(url, config, headers) {
|
|
277
|
+
const response = await fetchWithTimeout(url, config, { redirect: 'follow', headers });
|
|
278
|
+
if (!response.ok)
|
|
279
|
+
throw new Error(`请求抖音页面失败:HTTP ${response.status}`);
|
|
280
|
+
return response.text();
|
|
281
|
+
}
|
|
282
|
+
async function fetchWithTimeout(url, config, init) {
|
|
283
|
+
const controller = new AbortController();
|
|
284
|
+
const timer = setTimeout(() => controller.abort(), config.timeout * 1000);
|
|
285
|
+
try {
|
|
286
|
+
return await fetch(url, {
|
|
287
|
+
...init,
|
|
288
|
+
signal: controller.signal,
|
|
289
|
+
headers: {
|
|
290
|
+
'user-agent': config.userAgent,
|
|
291
|
+
...(config.cookie ? { cookie: config.cookie } : {}),
|
|
292
|
+
...(init.headers || {}),
|
|
293
|
+
},
|
|
294
|
+
});
|
|
295
|
+
}
|
|
296
|
+
finally {
|
|
297
|
+
clearTimeout(timer);
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
function parseJsonLike(payload) {
|
|
301
|
+
return JSON.parse(payload.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\u007f]/g, ''));
|
|
302
|
+
}
|
|
303
|
+
function deepGet(source, path) {
|
|
304
|
+
let cursor = source;
|
|
305
|
+
for (const key of path) {
|
|
306
|
+
if (cursor == null)
|
|
307
|
+
return undefined;
|
|
308
|
+
cursor = cursor[key];
|
|
309
|
+
}
|
|
310
|
+
return cursor;
|
|
311
|
+
}
|
|
312
|
+
function firstDefined(...values) {
|
|
313
|
+
return values.find((value) => value != null);
|
|
314
|
+
}
|
|
315
|
+
function pickUrl(value) {
|
|
316
|
+
return Array.isArray(value) && typeof value[0] === 'string' ? value[0] : undefined;
|
|
317
|
+
}
|
|
318
|
+
function removeWatermark(url) {
|
|
319
|
+
return url.replace('playwm', 'play');
|
|
320
|
+
}
|
|
321
|
+
function canonicalUrl(parsed, fallback) {
|
|
322
|
+
if (!parsed.id)
|
|
323
|
+
return fallback;
|
|
324
|
+
const type = parsed.type === 'slides' ? 'note' : parsed.type === 'unknown' ? 'video' : parsed.type;
|
|
325
|
+
return `https://www.douyin.com/${type}/${parsed.id}`;
|
|
326
|
+
}
|
|
327
|
+
function numberOrUndefined(value) {
|
|
328
|
+
return typeof value === 'number' ? value : undefined;
|
|
329
|
+
}
|
|
330
|
+
function unique(values) {
|
|
331
|
+
return [...new Set(values.filter(Boolean))];
|
|
332
|
+
}
|
|
333
|
+
function trimText(text, maxLength, suffix) {
|
|
334
|
+
return text.length > maxLength ? `${text.slice(0, maxLength)}${suffix}` : text;
|
|
335
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "koishi-plugin-douyin-local-parser",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Parse Douyin links and cards for Koishi.",
|
|
5
|
+
"main": "lib/index.js",
|
|
6
|
+
"typings": "lib/index.d.ts",
|
|
7
|
+
"files": [
|
|
8
|
+
"lib",
|
|
9
|
+
"src"
|
|
10
|
+
],
|
|
11
|
+
"scripts": {
|
|
12
|
+
"build": "tsc -p tsconfig.json",
|
|
13
|
+
"test": "vitest run",
|
|
14
|
+
"dev": "tsx scripts/local-test.ts"
|
|
15
|
+
},
|
|
16
|
+
"keywords": [
|
|
17
|
+
"chatbot",
|
|
18
|
+
"koishi",
|
|
19
|
+
"plugin",
|
|
20
|
+
"douyin",
|
|
21
|
+
"parser"
|
|
22
|
+
],
|
|
23
|
+
"license": "MIT",
|
|
24
|
+
"peerDependencies": {
|
|
25
|
+
"koishi": "^4.16.8"
|
|
26
|
+
},
|
|
27
|
+
"devDependencies": {
|
|
28
|
+
"@types/node": "^20.14.10",
|
|
29
|
+
"koishi": "^4.18.8",
|
|
30
|
+
"tsx": "^4.20.3",
|
|
31
|
+
"typescript": "^5.5.4",
|
|
32
|
+
"vitest": "^1.6.0"
|
|
33
|
+
}
|
|
34
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
import { Context, Logger, Schema, h } from 'koishi'
|
|
2
|
+
import { buildDouyinMessages, extractDouyinLinks, fetchDouyinPost } from './parser'
|
|
3
|
+
|
|
4
|
+
export const name = 'douyin-parser'
|
|
5
|
+
|
|
6
|
+
const logger = new Logger(name)
|
|
7
|
+
|
|
8
|
+
export interface Config {
|
|
9
|
+
enabled: boolean
|
|
10
|
+
parseMode: ('link' | 'card')[]
|
|
11
|
+
waitTip?: string | null
|
|
12
|
+
useForward: boolean
|
|
13
|
+
quote: boolean
|
|
14
|
+
middleware: boolean
|
|
15
|
+
parseLimit: number
|
|
16
|
+
minimumInterval: number
|
|
17
|
+
userAgent: string
|
|
18
|
+
cookie?: string
|
|
19
|
+
timeout: number
|
|
20
|
+
showImages: boolean
|
|
21
|
+
maxImages: number
|
|
22
|
+
maxDescLength: number
|
|
23
|
+
descTruncateSuffix: string
|
|
24
|
+
showVideo: boolean
|
|
25
|
+
showAuthor: boolean
|
|
26
|
+
showLink: boolean
|
|
27
|
+
showError: boolean
|
|
28
|
+
loggerinfo: boolean
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export const Config: Schema<Config> = Schema.intersect([
|
|
32
|
+
Schema.object({
|
|
33
|
+
enabled: Schema.boolean().default(true).description('开启抖音链接/卡片解析。'),
|
|
34
|
+
parseMode: Schema.array(Schema.union([
|
|
35
|
+
Schema.const('link').description('普通链接'),
|
|
36
|
+
Schema.const('card').description('卡片消息'),
|
|
37
|
+
])).role('checkbox').default(['link', 'card']).description('选择解析来源。'),
|
|
38
|
+
waitTip: Schema.union([
|
|
39
|
+
Schema.const(null).description('不发送提示'),
|
|
40
|
+
Schema.string().description('解析前发送提示语').default('正在解析抖音链接...'),
|
|
41
|
+
]).default(null).description('等待提示。'),
|
|
42
|
+
}).description('基础设置'),
|
|
43
|
+
Schema.object({
|
|
44
|
+
useForward: Schema.boolean().default(false).description('开启合并转发。主要适用于 onebot / red 适配器。').experimental(),
|
|
45
|
+
quote: Schema.boolean().default(true).description('普通发送时引用原消息。'),
|
|
46
|
+
middleware: Schema.boolean().default(false).description('以前置中间件模式捕获消息。').experimental(),
|
|
47
|
+
parseLimit: Schema.number().min(1).max(10).step(1).default(3).description('单条消息最多解析的链接数量。'),
|
|
48
|
+
minimumInterval: Schema.number().min(0).max(3600).step(1).default(180).description('同频道同链接去重间隔,单位秒。0 表示不去重。'),
|
|
49
|
+
}).description('发送设置'),
|
|
50
|
+
Schema.object({
|
|
51
|
+
showImages: Schema.boolean().default(true).description('返回图片/动图。'),
|
|
52
|
+
maxImages: Schema.number().min(0).max(18).step(1).default(9).description('单个作品最多发送图片数。'),
|
|
53
|
+
maxDescLength: Schema.number().min(0).max(2000).step(10).default(160).description('描述最大字数。设为 0 时不展示描述。'),
|
|
54
|
+
descTruncateSuffix: Schema.string().default('...(已截断)').description('描述超出最大字数时追加的截断标志。'),
|
|
55
|
+
showVideo: Schema.boolean().default(true).description('返回视频元素。'),
|
|
56
|
+
showAuthor: Schema.boolean().default(true).description('展示作者。'),
|
|
57
|
+
showLink: Schema.boolean().default(true).description('展示原文链接。'),
|
|
58
|
+
}).description('内容设置'),
|
|
59
|
+
Schema.object({
|
|
60
|
+
userAgent: Schema.string().default('Mozilla/5.0 (iPhone; CPU iPhone OS 18_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Mobile/15E148 Safari/604.1').description('请求抖音页面时使用的 User-Agent。'),
|
|
61
|
+
cookie: Schema.string().role('textarea').default('').description('可选 Cookie。遇到风控或无法读取页面数据时可填写。'),
|
|
62
|
+
timeout: Schema.number().min(3).max(60).step(1).default(15).description('请求超时时间,单位秒。'),
|
|
63
|
+
showError: Schema.boolean().default(false).description('解析失败时向聊天发送错误提示。'),
|
|
64
|
+
loggerinfo: Schema.boolean().default(false).description('输出调试日志。').experimental(),
|
|
65
|
+
}).description('网络与调试'),
|
|
66
|
+
])
|
|
67
|
+
|
|
68
|
+
export const usage = `
|
|
69
|
+
发送抖音链接或平台卡片即可自动解析。
|
|
70
|
+
|
|
71
|
+
支持示例:
|
|
72
|
+
|
|
73
|
+
- https://v.douyin.com/_2ljF4AmKL8/
|
|
74
|
+
- https://www.douyin.com/video/7521023890996514083
|
|
75
|
+
- https://www.douyin.com/note/7469411074119322899
|
|
76
|
+
- https://www.iesdouyin.com/share/video/7521023890996514083
|
|
77
|
+
`
|
|
78
|
+
|
|
79
|
+
export function apply(ctx: Context, config: Config) {
|
|
80
|
+
if (!config.enabled) return
|
|
81
|
+
|
|
82
|
+
const recent = new Map<string, number>()
|
|
83
|
+
|
|
84
|
+
ctx.middleware(async (session, next) => {
|
|
85
|
+
const content = session.content || session.stripped?.content || ''
|
|
86
|
+
const isCard = /^<\w+\s/i.test(content) || content.includes('data=')
|
|
87
|
+
|
|
88
|
+
if (isCard && !config.parseMode.includes('card')) return next()
|
|
89
|
+
if (!isCard && !config.parseMode.includes('link')) return next()
|
|
90
|
+
|
|
91
|
+
const links = extractDouyinLinks(content).slice(0, config.parseLimit)
|
|
92
|
+
if (!links.length) return next()
|
|
93
|
+
|
|
94
|
+
const targets = links.filter((link) => shouldProcess(recent, session.channelId || session.guildId || 'private', link, config.minimumInterval))
|
|
95
|
+
if (!targets.length) return next()
|
|
96
|
+
|
|
97
|
+
handleLinks(session, targets, config).catch((error) => {
|
|
98
|
+
logger.warn(error)
|
|
99
|
+
})
|
|
100
|
+
|
|
101
|
+
return next()
|
|
102
|
+
}, config.middleware)
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async function handleLinks(session: any, links: string[], config: Config) {
|
|
106
|
+
let waitTipMessageId: string | undefined
|
|
107
|
+
|
|
108
|
+
if (config.waitTip) {
|
|
109
|
+
const result = await session.send(`${h.quote(session.messageId)}${config.waitTip}`)
|
|
110
|
+
waitTipMessageId = Array.isArray(result) ? result[0] : result
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
const allMessages: h[] = []
|
|
115
|
+
|
|
116
|
+
for (const link of links) {
|
|
117
|
+
if (config.loggerinfo) logger.info(`parse ${link}`)
|
|
118
|
+
const post = await fetchDouyinPost(link, config)
|
|
119
|
+
allMessages.push(...buildDouyinMessages(post, config, session))
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
if (!allMessages.length) return
|
|
123
|
+
|
|
124
|
+
if (config.useForward && (session.platform === 'onebot' || session.platform === 'red')) {
|
|
125
|
+
await session.send(h('figure', { children: allMessages }))
|
|
126
|
+
return
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (config.quote) {
|
|
130
|
+
await session.send(h('message', h.quote(session.messageId), allMessages[0].children))
|
|
131
|
+
for (const message of allMessages.slice(1)) await session.send(message)
|
|
132
|
+
return
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
for (const message of allMessages) await session.send(message)
|
|
136
|
+
} catch (error) {
|
|
137
|
+
logger.warn(error)
|
|
138
|
+
if (config.showError) await session.send(`抖音解析失败:${error instanceof Error ? error.message : String(error)}`)
|
|
139
|
+
} finally {
|
|
140
|
+
if (waitTipMessageId) {
|
|
141
|
+
await session.bot?.deleteMessage?.(session.channelId, waitTipMessageId).catch?.(() => undefined)
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function shouldProcess(recent: Map<string, number>, channelId: string, link: string, seconds: number) {
|
|
147
|
+
if (seconds <= 0) return true
|
|
148
|
+
|
|
149
|
+
const key = `${channelId}:${link}`
|
|
150
|
+
const now = Date.now()
|
|
151
|
+
const last = recent.get(key)
|
|
152
|
+
if (last && now - last < seconds * 1000) return false
|
|
153
|
+
|
|
154
|
+
recent.set(key, now)
|
|
155
|
+
return true
|
|
156
|
+
}
|
package/src/parser.ts
ADDED
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
import { h } from 'koishi'
|
|
2
|
+
|
|
3
|
+
export interface DouyinConfigLike {
|
|
4
|
+
userAgent: string
|
|
5
|
+
timeout: number
|
|
6
|
+
showVideo: boolean
|
|
7
|
+
showImages: boolean
|
|
8
|
+
maxImages: number
|
|
9
|
+
maxDescLength: number
|
|
10
|
+
descTruncateSuffix: string
|
|
11
|
+
showAuthor: boolean
|
|
12
|
+
showLink: boolean
|
|
13
|
+
cookie?: string
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface DouyinPost {
|
|
17
|
+
id: string
|
|
18
|
+
url: string
|
|
19
|
+
title: string
|
|
20
|
+
desc: string
|
|
21
|
+
type: 'video' | 'note' | 'slides' | 'unknown'
|
|
22
|
+
authorName: string
|
|
23
|
+
authorAvatar?: string
|
|
24
|
+
createTime?: number
|
|
25
|
+
duration?: number
|
|
26
|
+
coverUrl?: string
|
|
27
|
+
imageUrls: string[]
|
|
28
|
+
dynamicImageUrls: string[]
|
|
29
|
+
videoUrls: string[]
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
interface ParsedDouyinUrl {
|
|
33
|
+
url: string
|
|
34
|
+
id: string
|
|
35
|
+
type: 'video' | 'note' | 'slides' | 'unknown'
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const URL_BOUNDARY = '[^\\s"\'<>\\\\^`{|},。;!?、【】《》]+'
|
|
39
|
+
const LINK_PATTERNS = [
|
|
40
|
+
new RegExp(`https?://v\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
|
|
41
|
+
new RegExp(`https?://jx\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
|
|
42
|
+
new RegExp(`https?://(?:www\\.)?douyin\\.com/(?:video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
43
|
+
new RegExp(`https?://(?:www\\.)?iesdouyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
44
|
+
new RegExp(`https?://m\\.douyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
45
|
+
new RegExp(`https?://jingxuan\\.douyin\\.com/m/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
|
|
46
|
+
]
|
|
47
|
+
|
|
48
|
+
const IOS_HEADERS = {
|
|
49
|
+
accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
|
|
50
|
+
'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
51
|
+
referer: 'https://www.douyin.com/',
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
const ANDROID_HEADERS = {
|
|
55
|
+
accept: 'application/json,text/plain,*/*',
|
|
56
|
+
referer: 'https://www.douyin.com/',
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export function extractDouyinLinks(content: string): string[] {
|
|
60
|
+
const candidates = expandTextCandidates(content)
|
|
61
|
+
const links: string[] = []
|
|
62
|
+
|
|
63
|
+
for (const candidate of candidates) {
|
|
64
|
+
const normalized = normalizeText(candidate)
|
|
65
|
+
for (const pattern of LINK_PATTERNS) {
|
|
66
|
+
pattern.lastIndex = 0
|
|
67
|
+
let match: RegExpExecArray | null
|
|
68
|
+
while ((match = pattern.exec(normalized))) {
|
|
69
|
+
links.push(cleanUrl(match[0]))
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return [...new Set(links)]
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export async function resolveDouyinLink(rawUrl: string, config: DouyinConfigLike): Promise<string> {
|
|
78
|
+
const url = ensureProtocol(rawUrl)
|
|
79
|
+
if (!/(?:v|jx)\.douyin\.com/i.test(url)) return url
|
|
80
|
+
|
|
81
|
+
const response = await fetchWithTimeout(url, config, { redirect: 'manual', headers: IOS_HEADERS })
|
|
82
|
+
const location = response.headers.get('location')
|
|
83
|
+
if (location) return new URL(location, url).toString()
|
|
84
|
+
|
|
85
|
+
if (response.status >= 300 && response.status < 400) return url
|
|
86
|
+
|
|
87
|
+
const followed = await fetchWithTimeout(url, config, { redirect: 'follow', headers: IOS_HEADERS })
|
|
88
|
+
return followed.url || url
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export async function fetchDouyinPost(rawUrl: string, config: DouyinConfigLike): Promise<DouyinPost> {
|
|
92
|
+
const resolvedUrl = await resolveDouyinLink(rawUrl, config)
|
|
93
|
+
const parsed = parseDouyinUrl(resolvedUrl)
|
|
94
|
+
if (!parsed.id) throw new Error('未能识别抖音作品 ID。')
|
|
95
|
+
|
|
96
|
+
if (parsed.type === 'slides') return fetchSlidesPost(parsed, config)
|
|
97
|
+
|
|
98
|
+
const errors: string[] = []
|
|
99
|
+
for (const url of buildPageCandidates(parsed)) {
|
|
100
|
+
try {
|
|
101
|
+
const html = await fetchText(url, config, IOS_HEADERS)
|
|
102
|
+
const post = extractRouterDataPost(html, parsed, url)
|
|
103
|
+
if (post) return post
|
|
104
|
+
errors.push(`${url}: 未找到 _ROUTER_DATA`)
|
|
105
|
+
} catch (error) {
|
|
106
|
+
errors.push(`${url}: ${error instanceof Error ? error.message : String(error)}`)
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
return await fetchSlidesPost(parsed, config)
|
|
112
|
+
} catch (error) {
|
|
113
|
+
errors.push(`slidesinfo: ${error instanceof Error ? error.message : String(error)}`)
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
throw new Error(`抖音解析失败:${errors.join(';')}`)
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export function buildDouyinMessages(post: DouyinPost, config: DouyinConfigLike, session?: { userId?: string, username?: string, author?: { nickname?: string } }) {
|
|
120
|
+
const messages: h[] = []
|
|
121
|
+
const attrs = {
|
|
122
|
+
userId: session?.userId,
|
|
123
|
+
nickname: session?.author?.nickname || session?.username,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
messages.push(h('message', attrs, h.text(formatPostText(post, config))))
|
|
127
|
+
|
|
128
|
+
if (config.showImages) {
|
|
129
|
+
for (const imageUrl of post.imageUrls.slice(0, config.maxImages)) {
|
|
130
|
+
messages.push(h('message', attrs, h.image(imageUrl)))
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (config.showVideo) {
|
|
135
|
+
for (const videoUrl of [...post.dynamicImageUrls, ...post.videoUrls].slice(0, 1)) {
|
|
136
|
+
messages.push(h('message', attrs, h.video(videoUrl)))
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return messages
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function extractRouterDataPost(html: string, parsed: ParsedDouyinUrl = { id: '', type: 'unknown', url: '' }, sourceUrl = parsed.url): DouyinPost | null {
|
|
144
|
+
const match = html.match(/window\._ROUTER_DATA\s*=\s*([\s\S]*?)<\/script>/)
|
|
145
|
+
if (!match?.[1]) return null
|
|
146
|
+
|
|
147
|
+
const routerData = parseJsonLike(match[1].trim().replace(/;+\s*$/, ''))
|
|
148
|
+
const item = firstDefined(
|
|
149
|
+
deepGet(routerData, ['loaderData', 'video_(id)/page', 'videoInfoRes', 'item_list', 0]),
|
|
150
|
+
deepGet(routerData, ['loaderData', 'note_(id)/page', 'videoInfoRes', 'item_list', 0]),
|
|
151
|
+
)
|
|
152
|
+
if (!item) return null
|
|
153
|
+
|
|
154
|
+
return buildPostFromAweme(item, parsed, sourceUrl)
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
function parseDouyinUrl(url: string): ParsedDouyinUrl {
|
|
158
|
+
const normalized = ensureProtocol(url)
|
|
159
|
+
const match = normalized.match(/(?:douyin\.com\/(?:video|note)|(?:iesdouyin|m\.douyin)\.com\/share\/(?:slides|video|note)|jingxuan\.douyin\.com\/m\/(?:slides|video|note))\/(\d+)/i)
|
|
160
|
+
const typeMatch = normalized.match(/douyin\.com\/(video|note)\/\d+|(?:iesdouyin|m\.douyin)\.com\/share\/(slides|video|note)\/\d+|jingxuan\.douyin\.com\/m\/(slides|video|note)\/\d+/i)
|
|
161
|
+
return {
|
|
162
|
+
url: normalized,
|
|
163
|
+
id: match?.[1] || '',
|
|
164
|
+
type: (typeMatch?.[1] || typeMatch?.[2] || typeMatch?.[3] || 'unknown') as ParsedDouyinUrl['type'],
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
function buildPageCandidates(parsed: ParsedDouyinUrl) {
|
|
169
|
+
const type = parsed.type === 'unknown' ? 'video' : parsed.type
|
|
170
|
+
return [
|
|
171
|
+
`https://m.douyin.com/share/${type}/${parsed.id}`,
|
|
172
|
+
`https://www.iesdouyin.com/share/${type}/${parsed.id}`,
|
|
173
|
+
]
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async function fetchSlidesPost(parsed: ParsedDouyinUrl, config: DouyinConfigLike): Promise<DouyinPost> {
|
|
177
|
+
const api = new URL('https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/')
|
|
178
|
+
api.searchParams.set('aweme_ids', `[${parsed.id}]`)
|
|
179
|
+
api.searchParams.set('request_source', '200')
|
|
180
|
+
|
|
181
|
+
const response = await fetchWithTimeout(api.toString(), config, { redirect: 'follow', headers: ANDROID_HEADERS })
|
|
182
|
+
if (!response.ok) throw new Error(`请求抖音图集接口失败:HTTP ${response.status}`)
|
|
183
|
+
|
|
184
|
+
const data = await response.json() as any
|
|
185
|
+
const item = data?.aweme_details?.[0]
|
|
186
|
+
if (!item) throw new Error('图集接口未返回作品数据。')
|
|
187
|
+
|
|
188
|
+
return buildPostFromAweme(item, { ...parsed, type: 'slides' }, parsed.url)
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function buildPostFromAweme(item: any, parsed: ParsedDouyinUrl, sourceUrl: string): DouyinPost {
|
|
192
|
+
const video = item?.video
|
|
193
|
+
const images = Array.isArray(item?.images) ? item.images : []
|
|
194
|
+
const imageUrls: string[] = []
|
|
195
|
+
const dynamicImageUrls: string[] = []
|
|
196
|
+
|
|
197
|
+
for (const image of images) {
|
|
198
|
+
const dynamicUrl = pickUrl(image?.video?.play_addr?.url_list)
|
|
199
|
+
if (dynamicUrl) {
|
|
200
|
+
dynamicImageUrls.push(removeWatermark(dynamicUrl))
|
|
201
|
+
continue
|
|
202
|
+
}
|
|
203
|
+
const imageUrl = pickUrl(image?.url_list)
|
|
204
|
+
if (imageUrl) imageUrls.push(imageUrl)
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
const hasImageContent = imageUrls.length > 0 || dynamicImageUrls.length > 0
|
|
208
|
+
const videoUrl = hasImageContent ? undefined : pickUrl(video?.play_addr?.url_list)
|
|
209
|
+
const coverUrl = pickUrl(video?.cover?.url_list)
|
|
210
|
+
const avatarUrl = pickUrl(item?.author?.avatar_thumb?.url_list) || pickUrl(item?.author?.avatar_medium?.url_list)
|
|
211
|
+
|
|
212
|
+
return {
|
|
213
|
+
id: parsed.id || String(item?.aweme_id || ''),
|
|
214
|
+
url: canonicalUrl(parsed, sourceUrl),
|
|
215
|
+
title: String(item?.desc || '抖音作品'),
|
|
216
|
+
desc: String(item?.desc || ''),
|
|
217
|
+
type: parsed.type,
|
|
218
|
+
authorName: String(item?.author?.nickname || '未知作者'),
|
|
219
|
+
authorAvatar: avatarUrl,
|
|
220
|
+
createTime: numberOrUndefined(item?.create_time),
|
|
221
|
+
duration: typeof video?.duration === 'number' ? Math.round(video.duration / 1000) : undefined,
|
|
222
|
+
coverUrl,
|
|
223
|
+
imageUrls: unique(imageUrls),
|
|
224
|
+
dynamicImageUrls: unique(dynamicImageUrls),
|
|
225
|
+
videoUrls: videoUrl ? [removeWatermark(videoUrl)] : [],
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
function formatPostText(post: DouyinPost, config: DouyinConfigLike) {
|
|
230
|
+
const lines = [`抖音:${post.title || '抖音作品'}`]
|
|
231
|
+
|
|
232
|
+
if (config.showAuthor) lines.push(`作者:${post.authorName}`)
|
|
233
|
+
if (post.desc && post.desc !== post.title && config.maxDescLength > 0) {
|
|
234
|
+
lines.push(trimText(post.desc, config.maxDescLength, config.descTruncateSuffix))
|
|
235
|
+
}
|
|
236
|
+
if (config.showLink) lines.push(post.url)
|
|
237
|
+
|
|
238
|
+
return lines.join('\n')
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
function expandTextCandidates(content: string): string[] {
|
|
242
|
+
const values = new Set<string>([content])
|
|
243
|
+
|
|
244
|
+
try {
|
|
245
|
+
for (const element of h.parse(content)) collectElementText(element, values)
|
|
246
|
+
} catch {
|
|
247
|
+
// Some adapters deliver partial XML snippets; regex extraction below still handles them.
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
for (const match of content.matchAll(/\bdata=(?:"([^"]*)"|'([^']*)')/gi)) {
|
|
251
|
+
values.add(match[1] || match[2] || '')
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
for (const value of [...values]) {
|
|
255
|
+
const decoded = decodeHtmlEntities(value)
|
|
256
|
+
values.add(decoded)
|
|
257
|
+
maybeCollectJsonValues(decoded, values)
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return [...values]
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
function collectElementText(element: h, values: Set<string>) {
|
|
264
|
+
if (typeof element === 'string') {
|
|
265
|
+
values.add(element)
|
|
266
|
+
return
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
for (const value of Object.values(element.attrs || {})) {
|
|
270
|
+
if (typeof value === 'string') values.add(value)
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
for (const child of element.children || []) {
|
|
274
|
+
collectElementText(child as h, values)
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
function maybeCollectJsonValues(text: string, values: Set<string>) {
|
|
279
|
+
try {
|
|
280
|
+
const json = JSON.parse(text)
|
|
281
|
+
walkJson(json, values)
|
|
282
|
+
} catch {
|
|
283
|
+
const unescaped = text.replace(/\\"/g, '"').replace(/\\\//g, '/')
|
|
284
|
+
if (unescaped !== text) values.add(unescaped)
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
function walkJson(value: unknown, values: Set<string>) {
|
|
289
|
+
if (typeof value === 'string') {
|
|
290
|
+
values.add(value)
|
|
291
|
+
return
|
|
292
|
+
}
|
|
293
|
+
if (Array.isArray(value)) {
|
|
294
|
+
for (const item of value) walkJson(item, values)
|
|
295
|
+
return
|
|
296
|
+
}
|
|
297
|
+
if (value && typeof value === 'object') {
|
|
298
|
+
for (const item of Object.values(value)) walkJson(item, values)
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function normalizeText(text: string) {
|
|
303
|
+
let value = decodeHtmlEntities(text).replace(/\\\//g, '/')
|
|
304
|
+
try {
|
|
305
|
+
value = decodeURIComponent(value)
|
|
306
|
+
} catch {
|
|
307
|
+
// Keep the original if it is only partly percent-encoded.
|
|
308
|
+
}
|
|
309
|
+
return value
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
function decodeHtmlEntities(text: string) {
|
|
313
|
+
return text
|
|
314
|
+
.replace(/"/g, '"')
|
|
315
|
+
.replace(/"/g, '"')
|
|
316
|
+
.replace(/"/gi, '"')
|
|
317
|
+
.replace(/'/g, "'")
|
|
318
|
+
.replace(/'/g, "'")
|
|
319
|
+
.replace(/'/gi, "'")
|
|
320
|
+
.replace(/&/g, '&')
|
|
321
|
+
.replace(/</g, '<')
|
|
322
|
+
.replace(/>/g, '>')
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
function cleanUrl(url: string) {
|
|
326
|
+
return ensureProtocol(url)
|
|
327
|
+
.replace(/[),,。;;!?!]+$/g, '')
|
|
328
|
+
.replace(/&/g, '&')
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
function ensureProtocol(url: string) {
|
|
332
|
+
return /^https?:\/\//i.test(url) ? url : `https://${url}`
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
async function fetchText(url: string, config: DouyinConfigLike, headers: Record<string, string>) {
|
|
336
|
+
const response = await fetchWithTimeout(url, config, { redirect: 'follow', headers })
|
|
337
|
+
if (!response.ok) throw new Error(`请求抖音页面失败:HTTP ${response.status}`)
|
|
338
|
+
return response.text()
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
async function fetchWithTimeout(url: string, config: DouyinConfigLike, init: RequestInit) {
|
|
342
|
+
const controller = new AbortController()
|
|
343
|
+
const timer = setTimeout(() => controller.abort(), config.timeout * 1000)
|
|
344
|
+
|
|
345
|
+
try {
|
|
346
|
+
return await fetch(url, {
|
|
347
|
+
...init,
|
|
348
|
+
signal: controller.signal,
|
|
349
|
+
headers: {
|
|
350
|
+
'user-agent': config.userAgent,
|
|
351
|
+
...(config.cookie ? { cookie: config.cookie } : {}),
|
|
352
|
+
...(init.headers || {}),
|
|
353
|
+
},
|
|
354
|
+
})
|
|
355
|
+
} finally {
|
|
356
|
+
clearTimeout(timer)
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
function parseJsonLike(payload: string) {
|
|
361
|
+
return JSON.parse(payload.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\u007f]/g, ''))
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function deepGet(source: any, path: Array<string | number>) {
|
|
365
|
+
let cursor = source
|
|
366
|
+
for (const key of path) {
|
|
367
|
+
if (cursor == null) return undefined
|
|
368
|
+
cursor = cursor[key]
|
|
369
|
+
}
|
|
370
|
+
return cursor
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function firstDefined<T>(...values: T[]) {
|
|
374
|
+
return values.find((value) => value != null)
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
function pickUrl(value: unknown) {
|
|
378
|
+
return Array.isArray(value) && typeof value[0] === 'string' ? value[0] : undefined
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
function removeWatermark(url: string) {
|
|
382
|
+
return url.replace('playwm', 'play')
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
function canonicalUrl(parsed: ParsedDouyinUrl, fallback: string) {
|
|
386
|
+
if (!parsed.id) return fallback
|
|
387
|
+
const type = parsed.type === 'slides' ? 'note' : parsed.type === 'unknown' ? 'video' : parsed.type
|
|
388
|
+
return `https://www.douyin.com/${type}/${parsed.id}`
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
function numberOrUndefined(value: unknown) {
|
|
392
|
+
return typeof value === 'number' ? value : undefined
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
function unique(values: string[]) {
|
|
396
|
+
return [...new Set(values.filter(Boolean))]
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
function trimText(text: string, maxLength: number, suffix: string) {
|
|
400
|
+
return text.length > maxLength ? `${text.slice(0, maxLength)}${suffix}` : text
|
|
401
|
+
}
|