koishi-plugin-douyin-local-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,41 @@
1
+ # koishi-plugin-douyin-parser
2
+
3
+ 抖音链接解析 Koishi 插件,支持普通链接、短链和聊天平台卡片消息中的链接提取,可选合并转发。
4
+
5
+ 解析逻辑参考了 [fllesser/nonebot-plugin-parser](https://github.com/fllesser/nonebot-plugin-parser) 的抖音解析实现,并按本地 `xhs-parser` 的 Koishi 插件结构改写。
6
+
7
+ ## 功能
8
+
9
+ - 解析 `https://v.douyin.com/...`、`https://jx.douyin.com/...` 短链
10
+ - 解析 `https://www.douyin.com/video/...`
11
+ - 解析 `https://www.douyin.com/note/...`
12
+ - 解析 `https://www.iesdouyin.com/share/video|note|slides/...`
13
+ - 解析 `https://m.douyin.com/share/video|note|slides/...`
14
+ - 解析 `https://jingxuan.douyin.com/m/video|note|slides/...`
15
+ - 从 Koishi 卡片元素的 `data` 字段、转义 JSON、普通文本中提取抖音链接
16
+ - 支持图片、动图、视频、原文链接和作者信息返回
17
+ - 支持 OneBot / Red 适配器的合并转发元素
18
+
19
+ ## 本地测试
20
+
21
+ ```bash
22
+ npm install
23
+ npm test
24
+ npm run build
25
+ ```
26
+
27
+ 也可以直接跑一次真实解析:
28
+
29
+ ```bash
30
+ npm run dev -- "https://v.douyin.com/_2ljF4AmKL8/"
31
+ ```
32
+
33
+ ## Koishi 使用
34
+
35
+ 构建后在 Koishi 配置中加载本地插件:
36
+
37
+ ```yaml
38
+ plugins:
39
+ /absolute/path/to/douyin-parser/lib:
40
+ enabled: true
41
+ ```
package/lib/index.d.ts ADDED
@@ -0,0 +1,27 @@
1
+ import { Context, Schema } from 'koishi';
2
+ export declare const name = "douyin-parser";
3
+ export interface Config {
4
+ enabled: boolean;
5
+ parseMode: ('link' | 'card')[];
6
+ waitTip?: string | null;
7
+ useForward: boolean;
8
+ quote: boolean;
9
+ middleware: boolean;
10
+ parseLimit: number;
11
+ minimumInterval: number;
12
+ userAgent: string;
13
+ cookie?: string;
14
+ timeout: number;
15
+ showImages: boolean;
16
+ maxImages: number;
17
+ maxDescLength: number;
18
+ descTruncateSuffix: string;
19
+ showVideo: boolean;
20
+ showAuthor: boolean;
21
+ showLink: boolean;
22
+ showError: boolean;
23
+ loggerinfo: boolean;
24
+ }
25
+ export declare const Config: Schema<Config>;
26
+ export declare const usage = "\n\u53D1\u9001\u6296\u97F3\u94FE\u63A5\u6216\u5E73\u53F0\u5361\u7247\u5373\u53EF\u81EA\u52A8\u89E3\u6790\u3002\n\n\u652F\u6301\u793A\u4F8B\uFF1A\n\n- https://v.douyin.com/_2ljF4AmKL8/\n- https://www.douyin.com/video/7521023890996514083\n- https://www.douyin.com/note/7469411074119322899\n- https://www.iesdouyin.com/share/video/7521023890996514083\n";
27
+ export declare function apply(ctx: Context, config: Config): void;
package/lib/index.js ADDED
@@ -0,0 +1,128 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.usage = exports.Config = exports.name = void 0;
4
+ exports.apply = apply;
5
+ const koishi_1 = require("koishi");
6
+ const parser_1 = require("./parser");
7
+ exports.name = 'douyin-parser';
8
+ const logger = new koishi_1.Logger(exports.name);
9
+ exports.Config = koishi_1.Schema.intersect([
10
+ koishi_1.Schema.object({
11
+ enabled: koishi_1.Schema.boolean().default(true).description('开启抖音链接/卡片解析。'),
12
+ parseMode: koishi_1.Schema.array(koishi_1.Schema.union([
13
+ koishi_1.Schema.const('link').description('普通链接'),
14
+ koishi_1.Schema.const('card').description('卡片消息'),
15
+ ])).role('checkbox').default(['link', 'card']).description('选择解析来源。'),
16
+ waitTip: koishi_1.Schema.union([
17
+ koishi_1.Schema.const(null).description('不发送提示'),
18
+ koishi_1.Schema.string().description('解析前发送提示语').default('正在解析抖音链接...'),
19
+ ]).default(null).description('等待提示。'),
20
+ }).description('基础设置'),
21
+ koishi_1.Schema.object({
22
+ useForward: koishi_1.Schema.boolean().default(false).description('开启合并转发。主要适用于 onebot / red 适配器。').experimental(),
23
+ quote: koishi_1.Schema.boolean().default(true).description('普通发送时引用原消息。'),
24
+ middleware: koishi_1.Schema.boolean().default(false).description('以前置中间件模式捕获消息。').experimental(),
25
+ parseLimit: koishi_1.Schema.number().min(1).max(10).step(1).default(3).description('单条消息最多解析的链接数量。'),
26
+ minimumInterval: koishi_1.Schema.number().min(0).max(3600).step(1).default(180).description('同频道同链接去重间隔,单位秒。0 表示不去重。'),
27
+ }).description('发送设置'),
28
+ koishi_1.Schema.object({
29
+ showImages: koishi_1.Schema.boolean().default(true).description('返回图片/动图。'),
30
+ maxImages: koishi_1.Schema.number().min(0).max(18).step(1).default(9).description('单个作品最多发送图片数。'),
31
+ maxDescLength: koishi_1.Schema.number().min(0).max(2000).step(10).default(160).description('描述最大字数。设为 0 时不展示描述。'),
32
+ descTruncateSuffix: koishi_1.Schema.string().default('...(已截断)').description('描述超出最大字数时追加的截断标志。'),
33
+ showVideo: koishi_1.Schema.boolean().default(true).description('返回视频元素。'),
34
+ showAuthor: koishi_1.Schema.boolean().default(true).description('展示作者。'),
35
+ showLink: koishi_1.Schema.boolean().default(true).description('展示原文链接。'),
36
+ }).description('内容设置'),
37
+ koishi_1.Schema.object({
38
+ userAgent: koishi_1.Schema.string().default('Mozilla/5.0 (iPhone; CPU iPhone OS 18_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Mobile/15E148 Safari/604.1').description('请求抖音页面时使用的 User-Agent。'),
39
+ cookie: koishi_1.Schema.string().role('textarea').default('').description('可选 Cookie。遇到风控或无法读取页面数据时可填写。'),
40
+ timeout: koishi_1.Schema.number().min(3).max(60).step(1).default(15).description('请求超时时间,单位秒。'),
41
+ showError: koishi_1.Schema.boolean().default(false).description('解析失败时向聊天发送错误提示。'),
42
+ loggerinfo: koishi_1.Schema.boolean().default(false).description('输出调试日志。').experimental(),
43
+ }).description('网络与调试'),
44
+ ]);
45
+ exports.usage = `
46
+ 发送抖音链接或平台卡片即可自动解析。
47
+
48
+ 支持示例:
49
+
50
+ - https://v.douyin.com/_2ljF4AmKL8/
51
+ - https://www.douyin.com/video/7521023890996514083
52
+ - https://www.douyin.com/note/7469411074119322899
53
+ - https://www.iesdouyin.com/share/video/7521023890996514083
54
+ `;
55
+ function apply(ctx, config) {
56
+ if (!config.enabled)
57
+ return;
58
+ const recent = new Map();
59
+ ctx.middleware(async (session, next) => {
60
+ const content = session.content || session.stripped?.content || '';
61
+ const isCard = /^<\w+\s/i.test(content) || content.includes('data=');
62
+ if (isCard && !config.parseMode.includes('card'))
63
+ return next();
64
+ if (!isCard && !config.parseMode.includes('link'))
65
+ return next();
66
+ const links = (0, parser_1.extractDouyinLinks)(content).slice(0, config.parseLimit);
67
+ if (!links.length)
68
+ return next();
69
+ const targets = links.filter((link) => shouldProcess(recent, session.channelId || session.guildId || 'private', link, config.minimumInterval));
70
+ if (!targets.length)
71
+ return next();
72
+ handleLinks(session, targets, config).catch((error) => {
73
+ logger.warn(error);
74
+ });
75
+ return next();
76
+ }, config.middleware);
77
+ }
78
+ async function handleLinks(session, links, config) {
79
+ let waitTipMessageId;
80
+ if (config.waitTip) {
81
+ const result = await session.send(`${koishi_1.h.quote(session.messageId)}${config.waitTip}`);
82
+ waitTipMessageId = Array.isArray(result) ? result[0] : result;
83
+ }
84
+ try {
85
+ const allMessages = [];
86
+ for (const link of links) {
87
+ if (config.loggerinfo)
88
+ logger.info(`parse ${link}`);
89
+ const post = await (0, parser_1.fetchDouyinPost)(link, config);
90
+ allMessages.push(...(0, parser_1.buildDouyinMessages)(post, config, session));
91
+ }
92
+ if (!allMessages.length)
93
+ return;
94
+ if (config.useForward && (session.platform === 'onebot' || session.platform === 'red')) {
95
+ await session.send((0, koishi_1.h)('figure', { children: allMessages }));
96
+ return;
97
+ }
98
+ if (config.quote) {
99
+ await session.send((0, koishi_1.h)('message', koishi_1.h.quote(session.messageId), allMessages[0].children));
100
+ for (const message of allMessages.slice(1))
101
+ await session.send(message);
102
+ return;
103
+ }
104
+ for (const message of allMessages)
105
+ await session.send(message);
106
+ }
107
+ catch (error) {
108
+ logger.warn(error);
109
+ if (config.showError)
110
+ await session.send(`抖音解析失败:${error instanceof Error ? error.message : String(error)}`);
111
+ }
112
+ finally {
113
+ if (waitTipMessageId) {
114
+ await session.bot?.deleteMessage?.(session.channelId, waitTipMessageId).catch?.(() => undefined);
115
+ }
116
+ }
117
+ }
118
+ function shouldProcess(recent, channelId, link, seconds) {
119
+ if (seconds <= 0)
120
+ return true;
121
+ const key = `${channelId}:${link}`;
122
+ const now = Date.now();
123
+ const last = recent.get(key);
124
+ if (last && now - last < seconds * 1000)
125
+ return false;
126
+ recent.set(key, now);
127
+ return true;
128
+ }
@@ -0,0 +1,45 @@
1
+ import { h } from 'koishi';
2
+ export interface DouyinConfigLike {
3
+ userAgent: string;
4
+ timeout: number;
5
+ showVideo: boolean;
6
+ showImages: boolean;
7
+ maxImages: number;
8
+ maxDescLength: number;
9
+ descTruncateSuffix: string;
10
+ showAuthor: boolean;
11
+ showLink: boolean;
12
+ cookie?: string;
13
+ }
14
+ export interface DouyinPost {
15
+ id: string;
16
+ url: string;
17
+ title: string;
18
+ desc: string;
19
+ type: 'video' | 'note' | 'slides' | 'unknown';
20
+ authorName: string;
21
+ authorAvatar?: string;
22
+ createTime?: number;
23
+ duration?: number;
24
+ coverUrl?: string;
25
+ imageUrls: string[];
26
+ dynamicImageUrls: string[];
27
+ videoUrls: string[];
28
+ }
29
+ interface ParsedDouyinUrl {
30
+ url: string;
31
+ id: string;
32
+ type: 'video' | 'note' | 'slides' | 'unknown';
33
+ }
34
+ export declare function extractDouyinLinks(content: string): string[];
35
+ export declare function resolveDouyinLink(rawUrl: string, config: DouyinConfigLike): Promise<string>;
36
+ export declare function fetchDouyinPost(rawUrl: string, config: DouyinConfigLike): Promise<DouyinPost>;
37
+ export declare function buildDouyinMessages(post: DouyinPost, config: DouyinConfigLike, session?: {
38
+ userId?: string;
39
+ username?: string;
40
+ author?: {
41
+ nickname?: string;
42
+ };
43
+ }): h[];
44
+ export declare function extractRouterDataPost(html: string, parsed?: ParsedDouyinUrl, sourceUrl?: string): DouyinPost | null;
45
+ export {};
package/lib/parser.js ADDED
@@ -0,0 +1,335 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.extractDouyinLinks = extractDouyinLinks;
4
+ exports.resolveDouyinLink = resolveDouyinLink;
5
+ exports.fetchDouyinPost = fetchDouyinPost;
6
+ exports.buildDouyinMessages = buildDouyinMessages;
7
+ exports.extractRouterDataPost = extractRouterDataPost;
8
+ const koishi_1 = require("koishi");
9
+ const URL_BOUNDARY = '[^\\s"\'<>\\\\^`{|},。;!?、【】《》]+';
10
+ const LINK_PATTERNS = [
11
+ new RegExp(`https?://v\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
12
+ new RegExp(`https?://jx\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
13
+ new RegExp(`https?://(?:www\\.)?douyin\\.com/(?:video|note)/${URL_BOUNDARY}`, 'gi'),
14
+ new RegExp(`https?://(?:www\\.)?iesdouyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
15
+ new RegExp(`https?://m\\.douyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
16
+ new RegExp(`https?://jingxuan\\.douyin\\.com/m/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
17
+ ];
18
+ const IOS_HEADERS = {
19
+ accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
20
+ 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
21
+ referer: 'https://www.douyin.com/',
22
+ };
23
+ const ANDROID_HEADERS = {
24
+ accept: 'application/json,text/plain,*/*',
25
+ referer: 'https://www.douyin.com/',
26
+ };
27
+ function extractDouyinLinks(content) {
28
+ const candidates = expandTextCandidates(content);
29
+ const links = [];
30
+ for (const candidate of candidates) {
31
+ const normalized = normalizeText(candidate);
32
+ for (const pattern of LINK_PATTERNS) {
33
+ pattern.lastIndex = 0;
34
+ let match;
35
+ while ((match = pattern.exec(normalized))) {
36
+ links.push(cleanUrl(match[0]));
37
+ }
38
+ }
39
+ }
40
+ return [...new Set(links)];
41
+ }
42
+ async function resolveDouyinLink(rawUrl, config) {
43
+ const url = ensureProtocol(rawUrl);
44
+ if (!/(?:v|jx)\.douyin\.com/i.test(url))
45
+ return url;
46
+ const response = await fetchWithTimeout(url, config, { redirect: 'manual', headers: IOS_HEADERS });
47
+ const location = response.headers.get('location');
48
+ if (location)
49
+ return new URL(location, url).toString();
50
+ if (response.status >= 300 && response.status < 400)
51
+ return url;
52
+ const followed = await fetchWithTimeout(url, config, { redirect: 'follow', headers: IOS_HEADERS });
53
+ return followed.url || url;
54
+ }
55
+ async function fetchDouyinPost(rawUrl, config) {
56
+ const resolvedUrl = await resolveDouyinLink(rawUrl, config);
57
+ const parsed = parseDouyinUrl(resolvedUrl);
58
+ if (!parsed.id)
59
+ throw new Error('未能识别抖音作品 ID。');
60
+ if (parsed.type === 'slides')
61
+ return fetchSlidesPost(parsed, config);
62
+ const errors = [];
63
+ for (const url of buildPageCandidates(parsed)) {
64
+ try {
65
+ const html = await fetchText(url, config, IOS_HEADERS);
66
+ const post = extractRouterDataPost(html, parsed, url);
67
+ if (post)
68
+ return post;
69
+ errors.push(`${url}: 未找到 _ROUTER_DATA`);
70
+ }
71
+ catch (error) {
72
+ errors.push(`${url}: ${error instanceof Error ? error.message : String(error)}`);
73
+ }
74
+ }
75
+ try {
76
+ return await fetchSlidesPost(parsed, config);
77
+ }
78
+ catch (error) {
79
+ errors.push(`slidesinfo: ${error instanceof Error ? error.message : String(error)}`);
80
+ }
81
+ throw new Error(`抖音解析失败:${errors.join(';')}`);
82
+ }
83
+ function buildDouyinMessages(post, config, session) {
84
+ const messages = [];
85
+ const attrs = {
86
+ userId: session?.userId,
87
+ nickname: session?.author?.nickname || session?.username,
88
+ };
89
+ messages.push((0, koishi_1.h)('message', attrs, koishi_1.h.text(formatPostText(post, config))));
90
+ if (config.showImages) {
91
+ for (const imageUrl of post.imageUrls.slice(0, config.maxImages)) {
92
+ messages.push((0, koishi_1.h)('message', attrs, koishi_1.h.image(imageUrl)));
93
+ }
94
+ }
95
+ if (config.showVideo) {
96
+ for (const videoUrl of [...post.dynamicImageUrls, ...post.videoUrls].slice(0, 1)) {
97
+ messages.push((0, koishi_1.h)('message', attrs, koishi_1.h.video(videoUrl)));
98
+ }
99
+ }
100
+ return messages;
101
+ }
102
+ function extractRouterDataPost(html, parsed = { id: '', type: 'unknown', url: '' }, sourceUrl = parsed.url) {
103
+ const match = html.match(/window\._ROUTER_DATA\s*=\s*([\s\S]*?)<\/script>/);
104
+ if (!match?.[1])
105
+ return null;
106
+ const routerData = parseJsonLike(match[1].trim().replace(/;+\s*$/, ''));
107
+ const item = firstDefined(deepGet(routerData, ['loaderData', 'video_(id)/page', 'videoInfoRes', 'item_list', 0]), deepGet(routerData, ['loaderData', 'note_(id)/page', 'videoInfoRes', 'item_list', 0]));
108
+ if (!item)
109
+ return null;
110
+ return buildPostFromAweme(item, parsed, sourceUrl);
111
+ }
112
+ function parseDouyinUrl(url) {
113
+ const normalized = ensureProtocol(url);
114
+ const match = normalized.match(/(?:douyin\.com\/(?:video|note)|(?:iesdouyin|m\.douyin)\.com\/share\/(?:slides|video|note)|jingxuan\.douyin\.com\/m\/(?:slides|video|note))\/(\d+)/i);
115
+ const typeMatch = normalized.match(/douyin\.com\/(video|note)\/\d+|(?:iesdouyin|m\.douyin)\.com\/share\/(slides|video|note)\/\d+|jingxuan\.douyin\.com\/m\/(slides|video|note)\/\d+/i);
116
+ return {
117
+ url: normalized,
118
+ id: match?.[1] || '',
119
+ type: (typeMatch?.[1] || typeMatch?.[2] || typeMatch?.[3] || 'unknown'),
120
+ };
121
+ }
122
+ function buildPageCandidates(parsed) {
123
+ const type = parsed.type === 'unknown' ? 'video' : parsed.type;
124
+ return [
125
+ `https://m.douyin.com/share/${type}/${parsed.id}`,
126
+ `https://www.iesdouyin.com/share/${type}/${parsed.id}`,
127
+ ];
128
+ }
129
+ async function fetchSlidesPost(parsed, config) {
130
+ const api = new URL('https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/');
131
+ api.searchParams.set('aweme_ids', `[${parsed.id}]`);
132
+ api.searchParams.set('request_source', '200');
133
+ const response = await fetchWithTimeout(api.toString(), config, { redirect: 'follow', headers: ANDROID_HEADERS });
134
+ if (!response.ok)
135
+ throw new Error(`请求抖音图集接口失败:HTTP ${response.status}`);
136
+ const data = await response.json();
137
+ const item = data?.aweme_details?.[0];
138
+ if (!item)
139
+ throw new Error('图集接口未返回作品数据。');
140
+ return buildPostFromAweme(item, { ...parsed, type: 'slides' }, parsed.url);
141
+ }
142
+ function buildPostFromAweme(item, parsed, sourceUrl) {
143
+ const video = item?.video;
144
+ const images = Array.isArray(item?.images) ? item.images : [];
145
+ const imageUrls = [];
146
+ const dynamicImageUrls = [];
147
+ for (const image of images) {
148
+ const dynamicUrl = pickUrl(image?.video?.play_addr?.url_list);
149
+ if (dynamicUrl) {
150
+ dynamicImageUrls.push(removeWatermark(dynamicUrl));
151
+ continue;
152
+ }
153
+ const imageUrl = pickUrl(image?.url_list);
154
+ if (imageUrl)
155
+ imageUrls.push(imageUrl);
156
+ }
157
+ const hasImageContent = imageUrls.length > 0 || dynamicImageUrls.length > 0;
158
+ const videoUrl = hasImageContent ? undefined : pickUrl(video?.play_addr?.url_list);
159
+ const coverUrl = pickUrl(video?.cover?.url_list);
160
+ const avatarUrl = pickUrl(item?.author?.avatar_thumb?.url_list) || pickUrl(item?.author?.avatar_medium?.url_list);
161
+ return {
162
+ id: parsed.id || String(item?.aweme_id || ''),
163
+ url: canonicalUrl(parsed, sourceUrl),
164
+ title: String(item?.desc || '抖音作品'),
165
+ desc: String(item?.desc || ''),
166
+ type: parsed.type,
167
+ authorName: String(item?.author?.nickname || '未知作者'),
168
+ authorAvatar: avatarUrl,
169
+ createTime: numberOrUndefined(item?.create_time),
170
+ duration: typeof video?.duration === 'number' ? Math.round(video.duration / 1000) : undefined,
171
+ coverUrl,
172
+ imageUrls: unique(imageUrls),
173
+ dynamicImageUrls: unique(dynamicImageUrls),
174
+ videoUrls: videoUrl ? [removeWatermark(videoUrl)] : [],
175
+ };
176
+ }
177
+ function formatPostText(post, config) {
178
+ const lines = [`抖音:${post.title || '抖音作品'}`];
179
+ if (config.showAuthor)
180
+ lines.push(`作者:${post.authorName}`);
181
+ if (post.desc && post.desc !== post.title && config.maxDescLength > 0) {
182
+ lines.push(trimText(post.desc, config.maxDescLength, config.descTruncateSuffix));
183
+ }
184
+ if (config.showLink)
185
+ lines.push(post.url);
186
+ return lines.join('\n');
187
+ }
188
+ function expandTextCandidates(content) {
189
+ const values = new Set([content]);
190
+ try {
191
+ for (const element of koishi_1.h.parse(content))
192
+ collectElementText(element, values);
193
+ }
194
+ catch {
195
+ // Some adapters deliver partial XML snippets; regex extraction below still handles them.
196
+ }
197
+ for (const match of content.matchAll(/\bdata=(?:"([^"]*)"|'([^']*)')/gi)) {
198
+ values.add(match[1] || match[2] || '');
199
+ }
200
+ for (const value of [...values]) {
201
+ const decoded = decodeHtmlEntities(value);
202
+ values.add(decoded);
203
+ maybeCollectJsonValues(decoded, values);
204
+ }
205
+ return [...values];
206
+ }
207
+ function collectElementText(element, values) {
208
+ if (typeof element === 'string') {
209
+ values.add(element);
210
+ return;
211
+ }
212
+ for (const value of Object.values(element.attrs || {})) {
213
+ if (typeof value === 'string')
214
+ values.add(value);
215
+ }
216
+ for (const child of element.children || []) {
217
+ collectElementText(child, values);
218
+ }
219
+ }
220
+ function maybeCollectJsonValues(text, values) {
221
+ try {
222
+ const json = JSON.parse(text);
223
+ walkJson(json, values);
224
+ }
225
+ catch {
226
+ const unescaped = text.replace(/\\"/g, '"').replace(/\\\//g, '/');
227
+ if (unescaped !== text)
228
+ values.add(unescaped);
229
+ }
230
+ }
231
+ function walkJson(value, values) {
232
+ if (typeof value === 'string') {
233
+ values.add(value);
234
+ return;
235
+ }
236
+ if (Array.isArray(value)) {
237
+ for (const item of value)
238
+ walkJson(item, values);
239
+ return;
240
+ }
241
+ if (value && typeof value === 'object') {
242
+ for (const item of Object.values(value))
243
+ walkJson(item, values);
244
+ }
245
+ }
246
+ function normalizeText(text) {
247
+ let value = decodeHtmlEntities(text).replace(/\\\//g, '/');
248
+ try {
249
+ value = decodeURIComponent(value);
250
+ }
251
+ catch {
252
+ // Keep the original if it is only partly percent-encoded.
253
+ }
254
+ return value;
255
+ }
256
+ function decodeHtmlEntities(text) {
257
+ return text
258
+ .replace(/&quot;/g, '"')
259
+ .replace(/&#34;/g, '"')
260
+ .replace(/&#x22;/gi, '"')
261
+ .replace(/&apos;/g, "'")
262
+ .replace(/&#39;/g, "'")
263
+ .replace(/&#x27;/gi, "'")
264
+ .replace(/&amp;/g, '&')
265
+ .replace(/&lt;/g, '<')
266
+ .replace(/&gt;/g, '>');
267
+ }
268
+ function cleanUrl(url) {
269
+ return ensureProtocol(url)
270
+ .replace(/[),,。;;!?!]+$/g, '')
271
+ .replace(/&amp;/g, '&');
272
+ }
273
+ function ensureProtocol(url) {
274
+ return /^https?:\/\//i.test(url) ? url : `https://${url}`;
275
+ }
276
+ async function fetchText(url, config, headers) {
277
+ const response = await fetchWithTimeout(url, config, { redirect: 'follow', headers });
278
+ if (!response.ok)
279
+ throw new Error(`请求抖音页面失败:HTTP ${response.status}`);
280
+ return response.text();
281
+ }
282
+ async function fetchWithTimeout(url, config, init) {
283
+ const controller = new AbortController();
284
+ const timer = setTimeout(() => controller.abort(), config.timeout * 1000);
285
+ try {
286
+ return await fetch(url, {
287
+ ...init,
288
+ signal: controller.signal,
289
+ headers: {
290
+ 'user-agent': config.userAgent,
291
+ ...(config.cookie ? { cookie: config.cookie } : {}),
292
+ ...(init.headers || {}),
293
+ },
294
+ });
295
+ }
296
+ finally {
297
+ clearTimeout(timer);
298
+ }
299
+ }
300
+ function parseJsonLike(payload) {
301
+ return JSON.parse(payload.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\u007f]/g, ''));
302
+ }
303
+ function deepGet(source, path) {
304
+ let cursor = source;
305
+ for (const key of path) {
306
+ if (cursor == null)
307
+ return undefined;
308
+ cursor = cursor[key];
309
+ }
310
+ return cursor;
311
+ }
312
+ function firstDefined(...values) {
313
+ return values.find((value) => value != null);
314
+ }
315
+ function pickUrl(value) {
316
+ return Array.isArray(value) && typeof value[0] === 'string' ? value[0] : undefined;
317
+ }
318
+ function removeWatermark(url) {
319
+ return url.replace('playwm', 'play');
320
+ }
321
+ function canonicalUrl(parsed, fallback) {
322
+ if (!parsed.id)
323
+ return fallback;
324
+ const type = parsed.type === 'slides' ? 'note' : parsed.type === 'unknown' ? 'video' : parsed.type;
325
+ return `https://www.douyin.com/${type}/${parsed.id}`;
326
+ }
327
+ function numberOrUndefined(value) {
328
+ return typeof value === 'number' ? value : undefined;
329
+ }
330
+ function unique(values) {
331
+ return [...new Set(values.filter(Boolean))];
332
+ }
333
+ function trimText(text, maxLength, suffix) {
334
+ return text.length > maxLength ? `${text.slice(0, maxLength)}${suffix}` : text;
335
+ }
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "koishi-plugin-douyin-local-parser",
3
+ "version": "0.1.0",
4
+ "description": "Parse Douyin links and cards for Koishi.",
5
+ "main": "lib/index.js",
6
+ "typings": "lib/index.d.ts",
7
+ "files": [
8
+ "lib",
9
+ "src"
10
+ ],
11
+ "scripts": {
12
+ "build": "tsc -p tsconfig.json",
13
+ "test": "vitest run",
14
+ "dev": "tsx scripts/local-test.ts"
15
+ },
16
+ "keywords": [
17
+ "chatbot",
18
+ "koishi",
19
+ "plugin",
20
+ "douyin",
21
+ "parser"
22
+ ],
23
+ "license": "MIT",
24
+ "peerDependencies": {
25
+ "koishi": "^4.16.8"
26
+ },
27
+ "devDependencies": {
28
+ "@types/node": "^20.14.10",
29
+ "koishi": "^4.18.8",
30
+ "tsx": "^4.20.3",
31
+ "typescript": "^5.5.4",
32
+ "vitest": "^1.6.0"
33
+ }
34
+ }
package/src/index.ts ADDED
@@ -0,0 +1,156 @@
1
+ import { Context, Logger, Schema, h } from 'koishi'
2
+ import { buildDouyinMessages, extractDouyinLinks, fetchDouyinPost } from './parser'
3
+
4
+ export const name = 'douyin-parser'
5
+
6
+ const logger = new Logger(name)
7
+
8
+ export interface Config {
9
+ enabled: boolean
10
+ parseMode: ('link' | 'card')[]
11
+ waitTip?: string | null
12
+ useForward: boolean
13
+ quote: boolean
14
+ middleware: boolean
15
+ parseLimit: number
16
+ minimumInterval: number
17
+ userAgent: string
18
+ cookie?: string
19
+ timeout: number
20
+ showImages: boolean
21
+ maxImages: number
22
+ maxDescLength: number
23
+ descTruncateSuffix: string
24
+ showVideo: boolean
25
+ showAuthor: boolean
26
+ showLink: boolean
27
+ showError: boolean
28
+ loggerinfo: boolean
29
+ }
30
+
31
+ export const Config: Schema<Config> = Schema.intersect([
32
+ Schema.object({
33
+ enabled: Schema.boolean().default(true).description('开启抖音链接/卡片解析。'),
34
+ parseMode: Schema.array(Schema.union([
35
+ Schema.const('link').description('普通链接'),
36
+ Schema.const('card').description('卡片消息'),
37
+ ])).role('checkbox').default(['link', 'card']).description('选择解析来源。'),
38
+ waitTip: Schema.union([
39
+ Schema.const(null).description('不发送提示'),
40
+ Schema.string().description('解析前发送提示语').default('正在解析抖音链接...'),
41
+ ]).default(null).description('等待提示。'),
42
+ }).description('基础设置'),
43
+ Schema.object({
44
+ useForward: Schema.boolean().default(false).description('开启合并转发。主要适用于 onebot / red 适配器。').experimental(),
45
+ quote: Schema.boolean().default(true).description('普通发送时引用原消息。'),
46
+ middleware: Schema.boolean().default(false).description('以前置中间件模式捕获消息。').experimental(),
47
+ parseLimit: Schema.number().min(1).max(10).step(1).default(3).description('单条消息最多解析的链接数量。'),
48
+ minimumInterval: Schema.number().min(0).max(3600).step(1).default(180).description('同频道同链接去重间隔,单位秒。0 表示不去重。'),
49
+ }).description('发送设置'),
50
+ Schema.object({
51
+ showImages: Schema.boolean().default(true).description('返回图片/动图。'),
52
+ maxImages: Schema.number().min(0).max(18).step(1).default(9).description('单个作品最多发送图片数。'),
53
+ maxDescLength: Schema.number().min(0).max(2000).step(10).default(160).description('描述最大字数。设为 0 时不展示描述。'),
54
+ descTruncateSuffix: Schema.string().default('...(已截断)').description('描述超出最大字数时追加的截断标志。'),
55
+ showVideo: Schema.boolean().default(true).description('返回视频元素。'),
56
+ showAuthor: Schema.boolean().default(true).description('展示作者。'),
57
+ showLink: Schema.boolean().default(true).description('展示原文链接。'),
58
+ }).description('内容设置'),
59
+ Schema.object({
60
+ userAgent: Schema.string().default('Mozilla/5.0 (iPhone; CPU iPhone OS 18_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.5 Mobile/15E148 Safari/604.1').description('请求抖音页面时使用的 User-Agent。'),
61
+ cookie: Schema.string().role('textarea').default('').description('可选 Cookie。遇到风控或无法读取页面数据时可填写。'),
62
+ timeout: Schema.number().min(3).max(60).step(1).default(15).description('请求超时时间,单位秒。'),
63
+ showError: Schema.boolean().default(false).description('解析失败时向聊天发送错误提示。'),
64
+ loggerinfo: Schema.boolean().default(false).description('输出调试日志。').experimental(),
65
+ }).description('网络与调试'),
66
+ ])
67
+
68
+ export const usage = `
69
+ 发送抖音链接或平台卡片即可自动解析。
70
+
71
+ 支持示例:
72
+
73
+ - https://v.douyin.com/_2ljF4AmKL8/
74
+ - https://www.douyin.com/video/7521023890996514083
75
+ - https://www.douyin.com/note/7469411074119322899
76
+ - https://www.iesdouyin.com/share/video/7521023890996514083
77
+ `
78
+
79
+ export function apply(ctx: Context, config: Config) {
80
+ if (!config.enabled) return
81
+
82
+ const recent = new Map<string, number>()
83
+
84
+ ctx.middleware(async (session, next) => {
85
+ const content = session.content || session.stripped?.content || ''
86
+ const isCard = /^<\w+\s/i.test(content) || content.includes('data=')
87
+
88
+ if (isCard && !config.parseMode.includes('card')) return next()
89
+ if (!isCard && !config.parseMode.includes('link')) return next()
90
+
91
+ const links = extractDouyinLinks(content).slice(0, config.parseLimit)
92
+ if (!links.length) return next()
93
+
94
+ const targets = links.filter((link) => shouldProcess(recent, session.channelId || session.guildId || 'private', link, config.minimumInterval))
95
+ if (!targets.length) return next()
96
+
97
+ handleLinks(session, targets, config).catch((error) => {
98
+ logger.warn(error)
99
+ })
100
+
101
+ return next()
102
+ }, config.middleware)
103
+ }
104
+
105
+ async function handleLinks(session: any, links: string[], config: Config) {
106
+ let waitTipMessageId: string | undefined
107
+
108
+ if (config.waitTip) {
109
+ const result = await session.send(`${h.quote(session.messageId)}${config.waitTip}`)
110
+ waitTipMessageId = Array.isArray(result) ? result[0] : result
111
+ }
112
+
113
+ try {
114
+ const allMessages: h[] = []
115
+
116
+ for (const link of links) {
117
+ if (config.loggerinfo) logger.info(`parse ${link}`)
118
+ const post = await fetchDouyinPost(link, config)
119
+ allMessages.push(...buildDouyinMessages(post, config, session))
120
+ }
121
+
122
+ if (!allMessages.length) return
123
+
124
+ if (config.useForward && (session.platform === 'onebot' || session.platform === 'red')) {
125
+ await session.send(h('figure', { children: allMessages }))
126
+ return
127
+ }
128
+
129
+ if (config.quote) {
130
+ await session.send(h('message', h.quote(session.messageId), allMessages[0].children))
131
+ for (const message of allMessages.slice(1)) await session.send(message)
132
+ return
133
+ }
134
+
135
+ for (const message of allMessages) await session.send(message)
136
+ } catch (error) {
137
+ logger.warn(error)
138
+ if (config.showError) await session.send(`抖音解析失败:${error instanceof Error ? error.message : String(error)}`)
139
+ } finally {
140
+ if (waitTipMessageId) {
141
+ await session.bot?.deleteMessage?.(session.channelId, waitTipMessageId).catch?.(() => undefined)
142
+ }
143
+ }
144
+ }
145
+
146
+ function shouldProcess(recent: Map<string, number>, channelId: string, link: string, seconds: number) {
147
+ if (seconds <= 0) return true
148
+
149
+ const key = `${channelId}:${link}`
150
+ const now = Date.now()
151
+ const last = recent.get(key)
152
+ if (last && now - last < seconds * 1000) return false
153
+
154
+ recent.set(key, now)
155
+ return true
156
+ }
package/src/parser.ts ADDED
@@ -0,0 +1,401 @@
1
+ import { h } from 'koishi'
2
+
3
+ export interface DouyinConfigLike {
4
+ userAgent: string
5
+ timeout: number
6
+ showVideo: boolean
7
+ showImages: boolean
8
+ maxImages: number
9
+ maxDescLength: number
10
+ descTruncateSuffix: string
11
+ showAuthor: boolean
12
+ showLink: boolean
13
+ cookie?: string
14
+ }
15
+
16
+ export interface DouyinPost {
17
+ id: string
18
+ url: string
19
+ title: string
20
+ desc: string
21
+ type: 'video' | 'note' | 'slides' | 'unknown'
22
+ authorName: string
23
+ authorAvatar?: string
24
+ createTime?: number
25
+ duration?: number
26
+ coverUrl?: string
27
+ imageUrls: string[]
28
+ dynamicImageUrls: string[]
29
+ videoUrls: string[]
30
+ }
31
+
32
+ interface ParsedDouyinUrl {
33
+ url: string
34
+ id: string
35
+ type: 'video' | 'note' | 'slides' | 'unknown'
36
+ }
37
+
38
+ const URL_BOUNDARY = '[^\\s"\'<>\\\\^`{|},。;!?、【】《》]+'
39
+ const LINK_PATTERNS = [
40
+ new RegExp(`https?://v\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
41
+ new RegExp(`https?://jx\\.douyin\\.com/${URL_BOUNDARY}`, 'gi'),
42
+ new RegExp(`https?://(?:www\\.)?douyin\\.com/(?:video|note)/${URL_BOUNDARY}`, 'gi'),
43
+ new RegExp(`https?://(?:www\\.)?iesdouyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
44
+ new RegExp(`https?://m\\.douyin\\.com/share/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
45
+ new RegExp(`https?://jingxuan\\.douyin\\.com/m/(?:slides|video|note)/${URL_BOUNDARY}`, 'gi'),
46
+ ]
47
+
48
+ const IOS_HEADERS = {
49
+ accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
50
+ 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
51
+ referer: 'https://www.douyin.com/',
52
+ }
53
+
54
+ const ANDROID_HEADERS = {
55
+ accept: 'application/json,text/plain,*/*',
56
+ referer: 'https://www.douyin.com/',
57
+ }
58
+
59
+ export function extractDouyinLinks(content: string): string[] {
60
+ const candidates = expandTextCandidates(content)
61
+ const links: string[] = []
62
+
63
+ for (const candidate of candidates) {
64
+ const normalized = normalizeText(candidate)
65
+ for (const pattern of LINK_PATTERNS) {
66
+ pattern.lastIndex = 0
67
+ let match: RegExpExecArray | null
68
+ while ((match = pattern.exec(normalized))) {
69
+ links.push(cleanUrl(match[0]))
70
+ }
71
+ }
72
+ }
73
+
74
+ return [...new Set(links)]
75
+ }
76
+
77
+ export async function resolveDouyinLink(rawUrl: string, config: DouyinConfigLike): Promise<string> {
78
+ const url = ensureProtocol(rawUrl)
79
+ if (!/(?:v|jx)\.douyin\.com/i.test(url)) return url
80
+
81
+ const response = await fetchWithTimeout(url, config, { redirect: 'manual', headers: IOS_HEADERS })
82
+ const location = response.headers.get('location')
83
+ if (location) return new URL(location, url).toString()
84
+
85
+ if (response.status >= 300 && response.status < 400) return url
86
+
87
+ const followed = await fetchWithTimeout(url, config, { redirect: 'follow', headers: IOS_HEADERS })
88
+ return followed.url || url
89
+ }
90
+
91
+ export async function fetchDouyinPost(rawUrl: string, config: DouyinConfigLike): Promise<DouyinPost> {
92
+ const resolvedUrl = await resolveDouyinLink(rawUrl, config)
93
+ const parsed = parseDouyinUrl(resolvedUrl)
94
+ if (!parsed.id) throw new Error('未能识别抖音作品 ID。')
95
+
96
+ if (parsed.type === 'slides') return fetchSlidesPost(parsed, config)
97
+
98
+ const errors: string[] = []
99
+ for (const url of buildPageCandidates(parsed)) {
100
+ try {
101
+ const html = await fetchText(url, config, IOS_HEADERS)
102
+ const post = extractRouterDataPost(html, parsed, url)
103
+ if (post) return post
104
+ errors.push(`${url}: 未找到 _ROUTER_DATA`)
105
+ } catch (error) {
106
+ errors.push(`${url}: ${error instanceof Error ? error.message : String(error)}`)
107
+ }
108
+ }
109
+
110
+ try {
111
+ return await fetchSlidesPost(parsed, config)
112
+ } catch (error) {
113
+ errors.push(`slidesinfo: ${error instanceof Error ? error.message : String(error)}`)
114
+ }
115
+
116
+ throw new Error(`抖音解析失败:${errors.join(';')}`)
117
+ }
118
+
119
+ export function buildDouyinMessages(post: DouyinPost, config: DouyinConfigLike, session?: { userId?: string, username?: string, author?: { nickname?: string } }) {
120
+ const messages: h[] = []
121
+ const attrs = {
122
+ userId: session?.userId,
123
+ nickname: session?.author?.nickname || session?.username,
124
+ }
125
+
126
+ messages.push(h('message', attrs, h.text(formatPostText(post, config))))
127
+
128
+ if (config.showImages) {
129
+ for (const imageUrl of post.imageUrls.slice(0, config.maxImages)) {
130
+ messages.push(h('message', attrs, h.image(imageUrl)))
131
+ }
132
+ }
133
+
134
+ if (config.showVideo) {
135
+ for (const videoUrl of [...post.dynamicImageUrls, ...post.videoUrls].slice(0, 1)) {
136
+ messages.push(h('message', attrs, h.video(videoUrl)))
137
+ }
138
+ }
139
+
140
+ return messages
141
+ }
142
+
143
+ export function extractRouterDataPost(html: string, parsed: ParsedDouyinUrl = { id: '', type: 'unknown', url: '' }, sourceUrl = parsed.url): DouyinPost | null {
144
+ const match = html.match(/window\._ROUTER_DATA\s*=\s*([\s\S]*?)<\/script>/)
145
+ if (!match?.[1]) return null
146
+
147
+ const routerData = parseJsonLike(match[1].trim().replace(/;+\s*$/, ''))
148
+ const item = firstDefined(
149
+ deepGet(routerData, ['loaderData', 'video_(id)/page', 'videoInfoRes', 'item_list', 0]),
150
+ deepGet(routerData, ['loaderData', 'note_(id)/page', 'videoInfoRes', 'item_list', 0]),
151
+ )
152
+ if (!item) return null
153
+
154
+ return buildPostFromAweme(item, parsed, sourceUrl)
155
+ }
156
+
157
+ function parseDouyinUrl(url: string): ParsedDouyinUrl {
158
+ const normalized = ensureProtocol(url)
159
+ const match = normalized.match(/(?:douyin\.com\/(?:video|note)|(?:iesdouyin|m\.douyin)\.com\/share\/(?:slides|video|note)|jingxuan\.douyin\.com\/m\/(?:slides|video|note))\/(\d+)/i)
160
+ const typeMatch = normalized.match(/douyin\.com\/(video|note)\/\d+|(?:iesdouyin|m\.douyin)\.com\/share\/(slides|video|note)\/\d+|jingxuan\.douyin\.com\/m\/(slides|video|note)\/\d+/i)
161
+ return {
162
+ url: normalized,
163
+ id: match?.[1] || '',
164
+ type: (typeMatch?.[1] || typeMatch?.[2] || typeMatch?.[3] || 'unknown') as ParsedDouyinUrl['type'],
165
+ }
166
+ }
167
+
168
+ function buildPageCandidates(parsed: ParsedDouyinUrl) {
169
+ const type = parsed.type === 'unknown' ? 'video' : parsed.type
170
+ return [
171
+ `https://m.douyin.com/share/${type}/${parsed.id}`,
172
+ `https://www.iesdouyin.com/share/${type}/${parsed.id}`,
173
+ ]
174
+ }
175
+
176
+ async function fetchSlidesPost(parsed: ParsedDouyinUrl, config: DouyinConfigLike): Promise<DouyinPost> {
177
+ const api = new URL('https://www.iesdouyin.com/web/api/v2/aweme/slidesinfo/')
178
+ api.searchParams.set('aweme_ids', `[${parsed.id}]`)
179
+ api.searchParams.set('request_source', '200')
180
+
181
+ const response = await fetchWithTimeout(api.toString(), config, { redirect: 'follow', headers: ANDROID_HEADERS })
182
+ if (!response.ok) throw new Error(`请求抖音图集接口失败:HTTP ${response.status}`)
183
+
184
+ const data = await response.json() as any
185
+ const item = data?.aweme_details?.[0]
186
+ if (!item) throw new Error('图集接口未返回作品数据。')
187
+
188
+ return buildPostFromAweme(item, { ...parsed, type: 'slides' }, parsed.url)
189
+ }
190
+
191
+ function buildPostFromAweme(item: any, parsed: ParsedDouyinUrl, sourceUrl: string): DouyinPost {
192
+ const video = item?.video
193
+ const images = Array.isArray(item?.images) ? item.images : []
194
+ const imageUrls: string[] = []
195
+ const dynamicImageUrls: string[] = []
196
+
197
+ for (const image of images) {
198
+ const dynamicUrl = pickUrl(image?.video?.play_addr?.url_list)
199
+ if (dynamicUrl) {
200
+ dynamicImageUrls.push(removeWatermark(dynamicUrl))
201
+ continue
202
+ }
203
+ const imageUrl = pickUrl(image?.url_list)
204
+ if (imageUrl) imageUrls.push(imageUrl)
205
+ }
206
+
207
+ const hasImageContent = imageUrls.length > 0 || dynamicImageUrls.length > 0
208
+ const videoUrl = hasImageContent ? undefined : pickUrl(video?.play_addr?.url_list)
209
+ const coverUrl = pickUrl(video?.cover?.url_list)
210
+ const avatarUrl = pickUrl(item?.author?.avatar_thumb?.url_list) || pickUrl(item?.author?.avatar_medium?.url_list)
211
+
212
+ return {
213
+ id: parsed.id || String(item?.aweme_id || ''),
214
+ url: canonicalUrl(parsed, sourceUrl),
215
+ title: String(item?.desc || '抖音作品'),
216
+ desc: String(item?.desc || ''),
217
+ type: parsed.type,
218
+ authorName: String(item?.author?.nickname || '未知作者'),
219
+ authorAvatar: avatarUrl,
220
+ createTime: numberOrUndefined(item?.create_time),
221
+ duration: typeof video?.duration === 'number' ? Math.round(video.duration / 1000) : undefined,
222
+ coverUrl,
223
+ imageUrls: unique(imageUrls),
224
+ dynamicImageUrls: unique(dynamicImageUrls),
225
+ videoUrls: videoUrl ? [removeWatermark(videoUrl)] : [],
226
+ }
227
+ }
228
+
229
+ function formatPostText(post: DouyinPost, config: DouyinConfigLike) {
230
+ const lines = [`抖音:${post.title || '抖音作品'}`]
231
+
232
+ if (config.showAuthor) lines.push(`作者:${post.authorName}`)
233
+ if (post.desc && post.desc !== post.title && config.maxDescLength > 0) {
234
+ lines.push(trimText(post.desc, config.maxDescLength, config.descTruncateSuffix))
235
+ }
236
+ if (config.showLink) lines.push(post.url)
237
+
238
+ return lines.join('\n')
239
+ }
240
+
241
+ function expandTextCandidates(content: string): string[] {
242
+ const values = new Set<string>([content])
243
+
244
+ try {
245
+ for (const element of h.parse(content)) collectElementText(element, values)
246
+ } catch {
247
+ // Some adapters deliver partial XML snippets; regex extraction below still handles them.
248
+ }
249
+
250
+ for (const match of content.matchAll(/\bdata=(?:"([^"]*)"|'([^']*)')/gi)) {
251
+ values.add(match[1] || match[2] || '')
252
+ }
253
+
254
+ for (const value of [...values]) {
255
+ const decoded = decodeHtmlEntities(value)
256
+ values.add(decoded)
257
+ maybeCollectJsonValues(decoded, values)
258
+ }
259
+
260
+ return [...values]
261
+ }
262
+
263
+ function collectElementText(element: h, values: Set<string>) {
264
+ if (typeof element === 'string') {
265
+ values.add(element)
266
+ return
267
+ }
268
+
269
+ for (const value of Object.values(element.attrs || {})) {
270
+ if (typeof value === 'string') values.add(value)
271
+ }
272
+
273
+ for (const child of element.children || []) {
274
+ collectElementText(child as h, values)
275
+ }
276
+ }
277
+
278
+ function maybeCollectJsonValues(text: string, values: Set<string>) {
279
+ try {
280
+ const json = JSON.parse(text)
281
+ walkJson(json, values)
282
+ } catch {
283
+ const unescaped = text.replace(/\\"/g, '"').replace(/\\\//g, '/')
284
+ if (unescaped !== text) values.add(unescaped)
285
+ }
286
+ }
287
+
288
+ function walkJson(value: unknown, values: Set<string>) {
289
+ if (typeof value === 'string') {
290
+ values.add(value)
291
+ return
292
+ }
293
+ if (Array.isArray(value)) {
294
+ for (const item of value) walkJson(item, values)
295
+ return
296
+ }
297
+ if (value && typeof value === 'object') {
298
+ for (const item of Object.values(value)) walkJson(item, values)
299
+ }
300
+ }
301
+
302
+ function normalizeText(text: string) {
303
+ let value = decodeHtmlEntities(text).replace(/\\\//g, '/')
304
+ try {
305
+ value = decodeURIComponent(value)
306
+ } catch {
307
+ // Keep the original if it is only partly percent-encoded.
308
+ }
309
+ return value
310
+ }
311
+
312
+ function decodeHtmlEntities(text: string) {
313
+ return text
314
+ .replace(/&quot;/g, '"')
315
+ .replace(/&#34;/g, '"')
316
+ .replace(/&#x22;/gi, '"')
317
+ .replace(/&apos;/g, "'")
318
+ .replace(/&#39;/g, "'")
319
+ .replace(/&#x27;/gi, "'")
320
+ .replace(/&amp;/g, '&')
321
+ .replace(/&lt;/g, '<')
322
+ .replace(/&gt;/g, '>')
323
+ }
324
+
325
+ function cleanUrl(url: string) {
326
+ return ensureProtocol(url)
327
+ .replace(/[),,。;;!?!]+$/g, '')
328
+ .replace(/&amp;/g, '&')
329
+ }
330
+
331
+ function ensureProtocol(url: string) {
332
+ return /^https?:\/\//i.test(url) ? url : `https://${url}`
333
+ }
334
+
335
+ async function fetchText(url: string, config: DouyinConfigLike, headers: Record<string, string>) {
336
+ const response = await fetchWithTimeout(url, config, { redirect: 'follow', headers })
337
+ if (!response.ok) throw new Error(`请求抖音页面失败:HTTP ${response.status}`)
338
+ return response.text()
339
+ }
340
+
341
+ async function fetchWithTimeout(url: string, config: DouyinConfigLike, init: RequestInit) {
342
+ const controller = new AbortController()
343
+ const timer = setTimeout(() => controller.abort(), config.timeout * 1000)
344
+
345
+ try {
346
+ return await fetch(url, {
347
+ ...init,
348
+ signal: controller.signal,
349
+ headers: {
350
+ 'user-agent': config.userAgent,
351
+ ...(config.cookie ? { cookie: config.cookie } : {}),
352
+ ...(init.headers || {}),
353
+ },
354
+ })
355
+ } finally {
356
+ clearTimeout(timer)
357
+ }
358
+ }
359
+
360
+ function parseJsonLike(payload: string) {
361
+ return JSON.parse(payload.replace(/[\u0000-\u0008\u000b\u000c\u000e-\u001f\u007f]/g, ''))
362
+ }
363
+
364
+ function deepGet(source: any, path: Array<string | number>) {
365
+ let cursor = source
366
+ for (const key of path) {
367
+ if (cursor == null) return undefined
368
+ cursor = cursor[key]
369
+ }
370
+ return cursor
371
+ }
372
+
373
+ function firstDefined<T>(...values: T[]) {
374
+ return values.find((value) => value != null)
375
+ }
376
+
377
+ function pickUrl(value: unknown) {
378
+ return Array.isArray(value) && typeof value[0] === 'string' ? value[0] : undefined
379
+ }
380
+
381
+ function removeWatermark(url: string) {
382
+ return url.replace('playwm', 'play')
383
+ }
384
+
385
+ function canonicalUrl(parsed: ParsedDouyinUrl, fallback: string) {
386
+ if (!parsed.id) return fallback
387
+ const type = parsed.type === 'slides' ? 'note' : parsed.type === 'unknown' ? 'video' : parsed.type
388
+ return `https://www.douyin.com/${type}/${parsed.id}`
389
+ }
390
+
391
+ function numberOrUndefined(value: unknown) {
392
+ return typeof value === 'number' ? value : undefined
393
+ }
394
+
395
+ function unique(values: string[]) {
396
+ return [...new Set(values.filter(Boolean))]
397
+ }
398
+
399
+ function trimText(text: string, maxLength: number, suffix: string) {
400
+ return text.length > maxLength ? `${text.slice(0, maxLength)}${suffix}` : text
401
+ }