@openclaw-china/shared 0.1.31 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,722 +1,722 @@
1
- /**
2
- * 媒体解析模块
3
- *
4
- * 提供统一的媒体路径提取、解析和规范化功能
5
- * 支持 Markdown 图片、HTML img 标签、MEDIA: 标记、本地路径等多种格式
6
- *
7
- * @module @openclaw-china/shared/media
8
- */
9
-
10
- import * as fs from "fs";
11
- import * as os from "os";
12
- import * as path from "path";
13
- import { fileURLToPath } from "url";
14
-
15
- // ============================================================================
16
- // 类型定义
17
- // ============================================================================
18
-
19
- /**
20
- * 媒体类型
21
- */
22
- export type MediaType = "image" | "audio" | "video" | "file";
23
-
24
- /**
25
- * 媒体来源类型
26
- */
27
- export type MediaSourceKind = "markdown" | "markdown-linked" | "html" | "bare";
28
-
29
- /**
30
- * 提取的媒体项
31
- */
32
- export interface ExtractedMedia {
33
- /** 原始路径或 URL */
34
- source: string;
35
- /** 规范化后的本地路径(仅本地文件有效) */
36
- localPath?: string;
37
- /** 媒体类型 */
38
- type: MediaType;
39
- /** 是否为本地文件 */
40
- isLocal: boolean;
41
- /** 是否为 HTTP URL */
42
- isHttp: boolean;
43
- /** 文件名 */
44
- fileName?: string;
45
- /** 来源类型:markdown/html/bare */
46
- sourceKind?: MediaSourceKind;
47
- }
48
-
49
- /**
50
- * 媒体解析结果
51
- */
52
- export interface MediaParseResult {
53
- /** 清理后的文本(移除媒体标记) */
54
- text: string;
55
- /** 提取的图片列表 */
56
- images: ExtractedMedia[];
57
- /** 提取的非图片文件列表 */
58
- files: ExtractedMedia[];
59
- /** 所有媒体列表(图片 + 文件) */
60
- all: ExtractedMedia[];
61
- }
62
-
63
- /**
64
- * 媒体解析选项
65
- */
66
- export interface MediaParseOptions {
67
- /** 是否从文本中移除媒体标记,默认 true */
68
- removeFromText?: boolean;
69
- /** 是否检查本地文件存在性,默认 false */
70
- checkExists?: boolean;
71
- /** 文件存在性检查函数(用于依赖注入) */
72
- existsSync?: (path: string) => boolean;
73
- /** 是否解析行首 MEDIA: 指令,默认 false */
74
- parseMediaLines?: boolean;
75
- /** 是否解析 Markdown 图片,默认 true */
76
- parseMarkdownImages?: boolean;
77
- /** 是否解析 HTML img 标签,默认 true */
78
- parseHtmlImages?: boolean;
79
- /** 是否解析裸露的本地路径,默认 true */
80
- parseBarePaths?: boolean;
81
- /** 是否解析 Markdown 链接中的文件,默认 true */
82
- parseMarkdownLinks?: boolean;
83
- }
84
-
85
- // ============================================================================
86
- // 常量定义
87
- // ============================================================================
88
-
89
- /**
90
- * 图片扩展名集合
91
- */
92
- export const IMAGE_EXTENSIONS = new Set([
93
- "png",
94
- "jpg",
95
- "jpeg",
96
- "gif",
97
- "webp",
98
- "bmp",
99
- "tiff",
100
- "tif",
101
- "heic",
102
- "heif",
103
- "svg",
104
- "ico",
105
- ]);
106
-
107
- /**
108
- * 音频扩展名集合
109
- */
110
- export const AUDIO_EXTENSIONS = new Set([
111
- "mp3",
112
- "wav",
113
- "ogg",
114
- "m4a",
115
- "amr",
116
- "flac",
117
- "aac",
118
- "wma",
119
- ]);
120
-
121
- /**
122
- * 视频扩展名集合
123
- */
124
- export const VIDEO_EXTENSIONS = new Set([
125
- "mp4",
126
- "mov",
127
- "avi",
128
- "mkv",
129
- "webm",
130
- "flv",
131
- "wmv",
132
- "m4v",
133
- ]);
134
-
135
- /**
136
- * 非图片文件扩展名集合(用于文件提取)
137
- */
138
- export const NON_IMAGE_EXTENSIONS = new Set([
139
- // 文档
140
- "pdf",
141
- "doc",
142
- "docx",
143
- "xls",
144
- "xlsx",
145
- "csv",
146
- "ppt",
147
- "pptx",
148
- "txt",
149
- "md",
150
- "rtf",
151
- "odt",
152
- "ods",
153
- // 压缩包
154
- "zip",
155
- "rar",
156
- "7z",
157
- "tar",
158
- "gz",
159
- "tgz",
160
- "bz2",
161
- // 音频
162
- ...AUDIO_EXTENSIONS,
163
- // 视频
164
- ...VIDEO_EXTENSIONS,
165
- // 数据
166
- "json",
167
- "xml",
168
- "yaml",
169
- "yml",
170
- ]);
171
-
172
- // ============================================================================
173
- // 正则表达式
174
- // ============================================================================
175
-
176
- /**
177
- * Markdown 图片语法: ![alt](path)
178
- * 支持 file://, MEDIA:, attachment://, 绝对路径
179
- */
180
- const MARKDOWN_IMAGE_RE =
181
- /!\[([^\]]*)\]\(([^)]+)\)/g;
182
-
183
- /**
184
- * Markdown 链接中的图片: [![alt](img)](link)
185
- */
186
- const MARKDOWN_LINKED_IMAGE_RE =
187
- /\[!\[([^\]]*)\]\(([^)]+)\)\]\(([^)]+)\)/g;
188
-
189
- /**
190
- * HTML img 标签
191
- */
192
- const HTML_IMAGE_RE =
193
- /<img\b[^>]*\bsrc\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\s>]+))[^>]*>/gi;
194
-
195
- /**
196
- * Markdown 链接语法: [label](path)
197
- */
198
- const MARKDOWN_LINK_RE = /\[([^\]]*)\]\(([^)]+)\)/g;
199
-
200
- /**
201
- * 本地图片路径(裸露的,非 Markdown 格式)
202
- * 支持 Unix 和 Windows 路径
203
- */
204
- const BARE_IMAGE_PATH_RE =
205
- /`?((?:\/(?:tmp|var|private|Users|home|root)\/[^\s`'",)]+|[A-Za-z]:[\\/][^\s`'",)]+)\.(?:png|jpg|jpeg|gif|bmp|webp|svg|ico))`?/gi;
206
-
207
- /**
208
- * 本地文件路径(非图片)
209
- * 动态生成,包含所有非图片扩展名
210
- */
211
- const NON_IMAGE_EXT_PATTERN = Array.from(NON_IMAGE_EXTENSIONS).join("|");
212
- const WINDOWS_PATH_SEP = String.raw`(?:\\\\|\\)`;
213
- const WINDOWS_FILE_PATH = String.raw`[A-Za-z]:${WINDOWS_PATH_SEP}(?:[^\\/:*?"<>|\r\n]+${WINDOWS_PATH_SEP})*[^\\/:*?"<>|\r\n]+`;
214
- const UNIX_FILE_PATH = String.raw`\/(?:tmp|var|private|Users|home|root)\/[^\s'",)]+`;
215
- const BARE_FILE_PATH_RE = new RegExp(
216
- String.raw`\`?((?:${UNIX_FILE_PATH}|${WINDOWS_FILE_PATH})\.(?:${NON_IMAGE_EXT_PATTERN}))\`?`,
217
- "gi"
218
- );
219
-
220
- // MEDIA: 行解析辅助
221
- const MEDIA_LINE_PREFIX = "MEDIA:";
222
-
223
- function unwrapMediaLinePayload(value: string): string | undefined {
224
- const trimmed = value.trim();
225
- if (trimmed.length < 2) return undefined;
226
- const first = trimmed[0];
227
- const last = trimmed[trimmed.length - 1];
228
- if (first !== last) return undefined;
229
- if (first !== `"` && first !== "'" && first !== "`") return undefined;
230
- return trimmed.slice(1, -1).trim();
231
- }
232
-
233
- function cleanMediaLineCandidate(value: string): string {
234
- return value.replace(/^[`"'[{(<]+/, "").replace(/[`"'\])}>.,;]+$/, "");
235
- }
236
-
237
- function splitMediaLineCandidates(payload: string): string[] {
238
- const unwrapped = unwrapMediaLinePayload(payload);
239
- if (unwrapped) return [unwrapped];
240
- return payload.split(/\s+/).filter(Boolean);
241
- }
242
-
243
- // ============================================================================
244
- // 路径处理函数
245
- // ============================================================================
246
-
247
- /**
248
- * 检查是否为 HTTP/HTTPS URL
249
- */
250
- export function isHttpUrl(value: string): boolean {
251
- return /^https?:\/\//i.test(value);
252
- }
253
-
254
- /**
255
- * 检查是否为 file:// URL
256
- */
257
- export function isFileUrl(value: string): boolean {
258
- return /^file:\/\//i.test(value);
259
- }
260
-
261
- /**
262
- * 检查是否为本地路径引用
263
- * 支持 file://, MEDIA:, attachment://, 绝对路径
264
- */
265
- export function isLocalReference(raw: string): boolean {
266
- if (isHttpUrl(raw)) return false;
267
- return (
268
- raw.startsWith("file://") ||
269
- raw.startsWith("MEDIA:") ||
270
- raw.startsWith("attachment://") ||
271
- raw.startsWith("/") ||
272
- raw.startsWith("~") ||
273
- /^[a-zA-Z]:[\\/]/.test(raw)
274
- );
275
- }
276
-
277
- /**
278
- * 规范化本地路径
279
- * 移除 file://, MEDIA:, attachment:// 前缀,并解码 URI
280
- */
281
- export function normalizeLocalPath(raw: string): string {
282
- let p = raw.trim();
283
-
284
- // 处理 file:// URL
285
- if (isFileUrl(p)) {
286
- try {
287
- return fileURLToPath(p);
288
- } catch {
289
- p = p.replace(/^file:\/\/\/?/i, "");
290
- }
291
- }
292
-
293
- // 处理其他前缀
294
- if (p.startsWith("MEDIA:")) {
295
- p = p.replace(/^MEDIA:/i, "");
296
- } else if (p.startsWith("attachment://")) {
297
- p = p.replace(/^attachment:\/\//i, "");
298
- }
299
-
300
- // 处理转义空格
301
- p = p.replace(/\\ /g, " ");
302
-
303
- // 尝试 URI 解码
304
- try {
305
- p = decodeURIComponent(p);
306
- } catch {
307
- // 忽略解码错误
308
- }
309
-
310
- // 处理波浪号路径 (~)
311
- if (p.startsWith("~/") || p === "~") {
312
- p = path.join(os.homedir(), p.slice(1));
313
- } else if (p.startsWith("~")) {
314
- // ~username 格式,在 Windows 上不常见,保持原样
315
- // 在 Unix 上可以用 os.homedir() 的父目录 + username,但这里简化处理
316
- }
317
-
318
- // 处理相对路径
319
- if (!path.isAbsolute(p)) {
320
- p = path.resolve(process.cwd(), p);
321
- }
322
-
323
- return p;
324
- }
325
-
326
- /**
327
- * 从 URL 中移除标题部分(Markdown 语法中的 "title")
328
- */
329
- export function stripTitleFromUrl(value: string): string {
330
- const trimmed = value.trim();
331
- // Only strip when the title is explicitly quoted: url "title" or url 'title'
332
- const match = trimmed.match(/^(\S+)\s+["'][^"']*["']\s*$/);
333
- return match ? match[1] : trimmed;
334
- }
335
-
336
- /**
337
- * 获取文件扩展名(不含点)
338
- */
339
- export function getExtension(filePath: string): string {
340
- const ext = path.extname(filePath).toLowerCase();
341
- return ext.startsWith(".") ? ext.slice(1) : ext;
342
- }
343
-
344
- /**
345
- * 检查是否为图片路径
346
- */
347
- export function isImagePath(filePath: string): boolean {
348
- const ext = getExtension(filePath);
349
- return ext ? IMAGE_EXTENSIONS.has(ext) : false;
350
- }
351
-
352
- /**
353
- * 检查是否为非图片文件路径
354
- */
355
- export function isNonImageFilePath(filePath: string): boolean {
356
- const ext = getExtension(filePath);
357
- return ext ? NON_IMAGE_EXTENSIONS.has(ext) : false;
358
- }
359
-
360
- /**
361
- * 根据文件扩展名检测媒体类型
362
- */
363
- export function detectMediaType(filePath: string): MediaType {
364
- const ext = getExtension(filePath);
365
-
366
- if (IMAGE_EXTENSIONS.has(ext)) return "image";
367
- if (AUDIO_EXTENSIONS.has(ext)) return "audio";
368
- if (VIDEO_EXTENSIONS.has(ext)) return "video";
369
-
370
- return "file";
371
- }
372
-
373
- // ============================================================================
374
- // 媒体提取函数
375
- // ============================================================================
376
-
377
- /**
378
- * 创建 ExtractedMedia 对象
379
- */
380
- function createExtractedMedia(
381
- source: string,
382
- sourceKind: MediaSourceKind,
383
- options?: MediaParseOptions
384
- ): ExtractedMedia {
385
- const isHttp = isHttpUrl(source);
386
- const isLocal = !isHttp && isLocalReference(source);
387
- const cleanSource = stripTitleFromUrl(source);
388
-
389
- let localPath: string | undefined;
390
- let fileName: string | undefined;
391
-
392
- if (isLocal) {
393
- localPath = normalizeLocalPath(cleanSource);
394
- fileName = path.basename(localPath);
395
- } else if (isHttp) {
396
- try {
397
- const url = new URL(cleanSource);
398
- fileName = path.basename(url.pathname) || undefined;
399
- } catch {
400
- // 忽略 URL 解析错误
401
- }
402
- }
403
-
404
- const type = detectMediaType(fileName || cleanSource);
405
-
406
- return {
407
- source: cleanSource,
408
- localPath,
409
- type,
410
- isLocal,
411
- isHttp,
412
- fileName,
413
- sourceKind,
414
- };
415
- }
416
-
417
- /**
418
- * 从文本中提取所有媒体
419
- *
420
- * @param text - 要解析的文本
421
- * @param options - 解析选项
422
- * @returns 解析结果,包含清理后的文本和提取的媒体列表
423
- */
424
- export function extractMediaFromText(
425
- text: string,
426
- options: MediaParseOptions = {}
427
- ): MediaParseResult {
428
- const {
429
- removeFromText = true,
430
- checkExists = false,
431
- existsSync,
432
- parseMediaLines = false,
433
- parseMarkdownImages = true,
434
- parseHtmlImages = true,
435
- parseBarePaths = true,
436
- parseMarkdownLinks = true,
437
- } = options;
438
-
439
- const images: ExtractedMedia[] = [];
440
- const files: ExtractedMedia[] = [];
441
- const seenSources = new Set<string>();
442
- let result = text;
443
-
444
- // 辅助函数:添加媒体项(去重)
445
- const addMedia = (media: ExtractedMedia): boolean => {
446
- const key = media.localPath || media.source;
447
- if (seenSources.has(key)) return false;
448
-
449
- // 检查文件存在性
450
- if (checkExists && media.isLocal && media.localPath) {
451
- const exists = existsSync
452
- ? existsSync(media.localPath)
453
- : fs.existsSync(media.localPath);
454
- if (!exists) return false;
455
- }
456
-
457
- seenSources.add(key);
458
-
459
- if (media.type === "image") {
460
- images.push(media);
461
- } else {
462
- files.push(media);
463
- }
464
- return true;
465
- };
466
-
467
- // 0. 解析行首 MEDIA: 指令
468
- if (parseMediaLines) {
469
- const lines = result.split("\n");
470
- const keptLines: string[] = [];
471
- for (const line of lines) {
472
- const trimmedStart = line.trimStart();
473
- if (!trimmedStart.startsWith(MEDIA_LINE_PREFIX)) {
474
- keptLines.push(line);
475
- continue;
476
- }
477
-
478
- const payload = trimmedStart.slice(MEDIA_LINE_PREFIX.length).trim();
479
- if (!payload) {
480
- keptLines.push(line);
481
- continue;
482
- }
483
-
484
- const candidates = splitMediaLineCandidates(payload);
485
- let addedAny = false;
486
- for (const raw of candidates) {
487
- const candidate = stripTitleFromUrl(cleanMediaLineCandidate(raw));
488
- if (!candidate) continue;
489
- if (!isHttpUrl(candidate) && !isLocalReference(candidate)) {
490
- continue;
491
- }
492
- const media = createExtractedMedia(candidate, "bare", options);
493
- if (addMedia(media)) {
494
- addedAny = true;
495
- }
496
- }
497
-
498
- if (!addedAny || !removeFromText) {
499
- keptLines.push(line);
500
- }
501
- }
502
-
503
- if (removeFromText) {
504
- result = keptLines.join("\n");
505
- }
506
- }
507
-
508
- // 收集需要替换的位置(用于安全替换)
509
- type Replacement = { start: number; end: number; replacement: string };
510
- const replacements: Replacement[] = [];
511
-
512
- // 辅助函数:应用替换(从后向前,避免索引错位)
513
- const applyReplacements = (): void => {
514
- if (replacements.length === 0) return;
515
- // 按起始位置降序排序,从后向前替换
516
- replacements.sort((a, b) => b.start - a.start);
517
- for (const { start, end, replacement } of replacements) {
518
- result = result.slice(0, start) + replacement + result.slice(end);
519
- }
520
- replacements.length = 0; // 清空
521
- };
522
-
523
- // 1. 解析 Markdown 链接中的图片: [![alt](img)](link)
524
- if (parseMarkdownImages) {
525
- const linkedMatches = [...text.matchAll(MARKDOWN_LINKED_IMAGE_RE)];
526
- for (const match of linkedMatches) {
527
- const [fullMatch, _alt, imgSrc] = match;
528
- const media = createExtractedMedia(imgSrc, "markdown", options);
529
- if (media.type === "image") {
530
- addMedia(media);
531
- if (removeFromText && match.index !== undefined) {
532
- replacements.push({
533
- start: match.index,
534
- end: match.index + fullMatch.length,
535
- replacement: "",
536
- });
537
- }
538
- }
539
- }
540
- applyReplacements();
541
- }
542
-
543
- // 2. 解析 Markdown 图片: ![alt](path)
544
- if (parseMarkdownImages) {
545
- const mdMatches = [...result.matchAll(MARKDOWN_IMAGE_RE)];
546
- for (const match of mdMatches) {
547
- const [fullMatch, _alt, src] = match;
548
- const media = createExtractedMedia(src, "markdown", options);
549
- if (media.type === "image") {
550
- addMedia(media);
551
- if (removeFromText && match.index !== undefined) {
552
- replacements.push({
553
- start: match.index,
554
- end: match.index + fullMatch.length,
555
- replacement: "",
556
- });
557
- }
558
- }
559
- }
560
- applyReplacements();
561
- }
562
-
563
- // 3. 解析 HTML img 标签
564
- if (parseHtmlImages) {
565
- const htmlMatches = [...result.matchAll(HTML_IMAGE_RE)];
566
- for (const match of htmlMatches) {
567
- const [fullMatch, src1, src2, src3] = match;
568
- const src = src1 || src2 || src3;
569
- if (src) {
570
- const media = createExtractedMedia(src, "html", options);
571
- if (media.type === "image") {
572
- addMedia(media);
573
- if (removeFromText && match.index !== undefined) {
574
- replacements.push({
575
- start: match.index,
576
- end: match.index + fullMatch.length,
577
- replacement: "",
578
- });
579
- }
580
- }
581
- }
582
- }
583
- applyReplacements();
584
- }
585
-
586
- // 4. 解析 Markdown 链接中的文件: [label](path)
587
- if (parseMarkdownLinks) {
588
- // 重置正则
589
- MARKDOWN_LINK_RE.lastIndex = 0;
590
- const linkMatches = [...result.matchAll(MARKDOWN_LINK_RE)];
591
- for (const match of linkMatches) {
592
- const [fullMatch, _label, rawPath] = match;
593
- const idx = match.index ?? 0;
594
-
595
- // 跳过图片语法 ![...](...) - 检查前一个字符是否为 !
596
- if (idx > 0 && result[idx - 1] === "!") continue;
597
-
598
- // 只处理本地引用
599
- if (!isLocalReference(rawPath)) continue;
600
-
601
- const media = createExtractedMedia(rawPath, "markdown", options);
602
-
603
- // 只处理非图片文件
604
- if (media.type !== "image" && isNonImageFilePath(media.localPath || rawPath)) {
605
- if (addMedia(media)) {
606
- if (removeFromText && match.index !== undefined) {
607
- const fileName = media.fileName || path.basename(rawPath);
608
- replacements.push({
609
- start: match.index,
610
- end: match.index + fullMatch.length,
611
- replacement: `[文件: ${fileName}]`,
612
- });
613
- }
614
- }
615
- }
616
- }
617
- applyReplacements();
618
- }
619
-
620
- // 5. 解析裸露的本地图片路径
621
- if (parseBarePaths && parseMarkdownImages) {
622
- // 重置正则
623
- BARE_IMAGE_PATH_RE.lastIndex = 0;
624
- const bareImageMatches = [...result.matchAll(BARE_IMAGE_PATH_RE)];
625
-
626
- // 过滤掉已经在 Markdown 语法中的路径
627
- const newBareImageMatches = bareImageMatches.filter((m) => {
628
- const idx = m.index ?? 0;
629
- const before = result.slice(Math.max(0, idx - 10), idx);
630
- return !before.includes("](");
631
- });
632
-
633
- for (const match of newBareImageMatches) {
634
- const [fullMatch, rawPath] = match;
635
- const media = createExtractedMedia(rawPath, "bare", options);
636
- if (media.type === "image") {
637
- addMedia(media);
638
- if (removeFromText && match.index !== undefined) {
639
- replacements.push({
640
- start: match.index,
641
- end: match.index + fullMatch.length,
642
- replacement: "",
643
- });
644
- }
645
- }
646
- }
647
- applyReplacements();
648
- }
649
-
650
- // 6. 解析裸露的本地文件路径(非图片)
651
- if (parseBarePaths && parseMarkdownLinks) {
652
- // 重置正则
653
- BARE_FILE_PATH_RE.lastIndex = 0;
654
- const bareFileMatches = [...result.matchAll(BARE_FILE_PATH_RE)];
655
-
656
- for (const match of bareFileMatches) {
657
- const [fullMatch, rawPath] = match;
658
- const media = createExtractedMedia(rawPath, "bare", options);
659
-
660
- if (media.type !== "image") {
661
- if (addMedia(media)) {
662
- if (removeFromText && match.index !== undefined) {
663
- const fileName = media.fileName || path.basename(rawPath);
664
- replacements.push({
665
- start: match.index,
666
- end: match.index + fullMatch.length,
667
- replacement: `[文件: ${fileName}]`,
668
- });
669
- }
670
- }
671
- }
672
- }
673
- applyReplacements();
674
- }
675
-
676
- // 清理多余的空行
677
- if (removeFromText) {
678
- result = result.replace(/\n{3,}/g, "\n\n").trim();
679
- }
680
-
681
- return {
682
- text: result,
683
- images,
684
- files,
685
- all: [...images, ...files],
686
- };
687
- }
688
-
689
- /**
690
- * 仅提取图片(简化版)
691
- */
692
- export function extractImagesFromText(
693
- text: string,
694
- options: Omit<MediaParseOptions, "parseMarkdownLinks"> = {}
695
- ): { text: string; images: ExtractedMedia[] } {
696
- const result = extractMediaFromText(text, {
697
- ...options,
698
- parseMarkdownLinks: false,
699
- });
700
- return {
701
- text: result.text,
702
- images: result.images,
703
- };
704
- }
705
-
706
- /**
707
- * 仅提取文件(简化版)
708
- */
709
- export function extractFilesFromText(
710
- text: string,
711
- options: Omit<MediaParseOptions, "parseMarkdownImages" | "parseHtmlImages"> = {}
712
- ): { text: string; files: ExtractedMedia[] } {
713
- const result = extractMediaFromText(text, {
714
- ...options,
715
- parseMarkdownImages: false,
716
- parseHtmlImages: false,
717
- });
718
- return {
719
- text: result.text,
720
- files: result.files,
721
- };
722
- }
1
+ /**
2
+ * 媒体解析模块
3
+ *
4
+ * 提供统一的媒体路径提取、解析和规范化功能
5
+ * 支持 Markdown 图片、HTML img 标签、MEDIA: 标记、本地路径等多种格式
6
+ *
7
+ * @module @openclaw-china/shared/media
8
+ */
9
+
10
+ import * as fs from "fs";
11
+ import * as os from "os";
12
+ import * as path from "path";
13
+ import { fileURLToPath } from "url";
14
+
15
+ // ============================================================================
16
+ // 类型定义
17
+ // ============================================================================
18
+
19
+ /**
20
+ * 媒体类型
21
+ */
22
+ export type MediaType = "image" | "audio" | "video" | "file";
23
+
24
+ /**
25
+ * 媒体来源类型
26
+ */
27
+ export type MediaSourceKind = "markdown" | "markdown-linked" | "html" | "bare";
28
+
29
+ /**
30
+ * 提取的媒体项
31
+ */
32
+ export interface ExtractedMedia {
33
+ /** 原始路径或 URL */
34
+ source: string;
35
+ /** 规范化后的本地路径(仅本地文件有效) */
36
+ localPath?: string;
37
+ /** 媒体类型 */
38
+ type: MediaType;
39
+ /** 是否为本地文件 */
40
+ isLocal: boolean;
41
+ /** 是否为 HTTP URL */
42
+ isHttp: boolean;
43
+ /** 文件名 */
44
+ fileName?: string;
45
+ /** 来源类型:markdown/html/bare */
46
+ sourceKind?: MediaSourceKind;
47
+ }
48
+
49
+ /**
50
+ * 媒体解析结果
51
+ */
52
+ export interface MediaParseResult {
53
+ /** 清理后的文本(移除媒体标记) */
54
+ text: string;
55
+ /** 提取的图片列表 */
56
+ images: ExtractedMedia[];
57
+ /** 提取的非图片文件列表 */
58
+ files: ExtractedMedia[];
59
+ /** 所有媒体列表(图片 + 文件) */
60
+ all: ExtractedMedia[];
61
+ }
62
+
63
+ /**
64
+ * 媒体解析选项
65
+ */
66
+ export interface MediaParseOptions {
67
+ /** 是否从文本中移除媒体标记,默认 true */
68
+ removeFromText?: boolean;
69
+ /** 是否检查本地文件存在性,默认 false */
70
+ checkExists?: boolean;
71
+ /** 文件存在性检查函数(用于依赖注入) */
72
+ existsSync?: (path: string) => boolean;
73
+ /** 是否解析行首 MEDIA: 指令,默认 false */
74
+ parseMediaLines?: boolean;
75
+ /** 是否解析 Markdown 图片,默认 true */
76
+ parseMarkdownImages?: boolean;
77
+ /** 是否解析 HTML img 标签,默认 true */
78
+ parseHtmlImages?: boolean;
79
+ /** 是否解析裸露的本地路径,默认 true */
80
+ parseBarePaths?: boolean;
81
+ /** 是否解析 Markdown 链接中的文件,默认 true */
82
+ parseMarkdownLinks?: boolean;
83
+ }
84
+
85
+ // ============================================================================
86
+ // 常量定义
87
+ // ============================================================================
88
+
89
+ /**
90
+ * 图片扩展名集合
91
+ */
92
+ export const IMAGE_EXTENSIONS = new Set([
93
+ "png",
94
+ "jpg",
95
+ "jpeg",
96
+ "gif",
97
+ "webp",
98
+ "bmp",
99
+ "tiff",
100
+ "tif",
101
+ "heic",
102
+ "heif",
103
+ "svg",
104
+ "ico",
105
+ ]);
106
+
107
+ /**
108
+ * 音频扩展名集合
109
+ */
110
+ export const AUDIO_EXTENSIONS = new Set([
111
+ "mp3",
112
+ "wav",
113
+ "ogg",
114
+ "m4a",
115
+ "amr",
116
+ "flac",
117
+ "aac",
118
+ "wma",
119
+ ]);
120
+
121
+ /**
122
+ * 视频扩展名集合
123
+ */
124
+ export const VIDEO_EXTENSIONS = new Set([
125
+ "mp4",
126
+ "mov",
127
+ "avi",
128
+ "mkv",
129
+ "webm",
130
+ "flv",
131
+ "wmv",
132
+ "m4v",
133
+ ]);
134
+
135
+ /**
136
+ * 非图片文件扩展名集合(用于文件提取)
137
+ */
138
+ export const NON_IMAGE_EXTENSIONS = new Set([
139
+ // 文档
140
+ "pdf",
141
+ "doc",
142
+ "docx",
143
+ "xls",
144
+ "xlsx",
145
+ "csv",
146
+ "ppt",
147
+ "pptx",
148
+ "txt",
149
+ "md",
150
+ "rtf",
151
+ "odt",
152
+ "ods",
153
+ // 压缩包
154
+ "zip",
155
+ "rar",
156
+ "7z",
157
+ "tar",
158
+ "gz",
159
+ "tgz",
160
+ "bz2",
161
+ // 音频
162
+ ...AUDIO_EXTENSIONS,
163
+ // 视频
164
+ ...VIDEO_EXTENSIONS,
165
+ // 数据
166
+ "json",
167
+ "xml",
168
+ "yaml",
169
+ "yml",
170
+ ]);
171
+
172
+ // ============================================================================
173
+ // 正则表达式
174
+ // ============================================================================
175
+
176
+ /**
177
+ * Markdown 图片语法: ![alt](path)
178
+ * 支持 file://, MEDIA:, attachment://, 绝对路径
179
+ */
180
+ const MARKDOWN_IMAGE_RE =
181
+ /!\[([^\]]*)\]\(([^)]+)\)/g;
182
+
183
+ /**
184
+ * Markdown 链接中的图片: [![alt](img)](link)
185
+ */
186
+ const MARKDOWN_LINKED_IMAGE_RE =
187
+ /\[!\[([^\]]*)\]\(([^)]+)\)\]\(([^)]+)\)/g;
188
+
189
+ /**
190
+ * HTML img 标签
191
+ */
192
+ const HTML_IMAGE_RE =
193
+ /<img\b[^>]*\bsrc\s*=\s*(?:"([^"]+)"|'([^']+)'|([^\s>]+))[^>]*>/gi;
194
+
195
+ /**
196
+ * Markdown 链接语法: [label](path)
197
+ */
198
+ const MARKDOWN_LINK_RE = /\[([^\]]*)\]\(([^)]+)\)/g;
199
+
200
+ /**
201
+ * 本地图片路径(裸露的,非 Markdown 格式)
202
+ * 支持 Unix 和 Windows 路径
203
+ */
204
+ const BARE_IMAGE_PATH_RE =
205
+ /`?((?:\/(?:tmp|var|private|Users|home|root)\/[^\s`'",)]+|[A-Za-z]:[\\/][^\s`'",)]+)\.(?:png|jpg|jpeg|gif|bmp|webp|svg|ico))`?/gi;
206
+
207
+ /**
208
+ * 本地文件路径(非图片)
209
+ * 动态生成,包含所有非图片扩展名
210
+ */
211
+ const NON_IMAGE_EXT_PATTERN = Array.from(NON_IMAGE_EXTENSIONS).join("|");
212
+ const WINDOWS_PATH_SEP = String.raw`(?:\\\\|\\)`;
213
+ const WINDOWS_FILE_PATH = String.raw`[A-Za-z]:${WINDOWS_PATH_SEP}(?:[^\\/:*?"<>|\r\n]+${WINDOWS_PATH_SEP})*[^\\/:*?"<>|\r\n]+`;
214
+ const UNIX_FILE_PATH = String.raw`\/(?:tmp|var|private|Users|home|root)\/[^\s'",)]+`;
215
+ const BARE_FILE_PATH_RE = new RegExp(
216
+ String.raw`\`?((?:${UNIX_FILE_PATH}|${WINDOWS_FILE_PATH})\.(?:${NON_IMAGE_EXT_PATTERN}))\`?`,
217
+ "gi"
218
+ );
219
+
220
+ // MEDIA: 行解析辅助
221
+ const MEDIA_LINE_PREFIX = "MEDIA:";
222
+
223
+ function unwrapMediaLinePayload(value: string): string | undefined {
224
+ const trimmed = value.trim();
225
+ if (trimmed.length < 2) return undefined;
226
+ const first = trimmed[0];
227
+ const last = trimmed[trimmed.length - 1];
228
+ if (first !== last) return undefined;
229
+ if (first !== `"` && first !== "'" && first !== "`") return undefined;
230
+ return trimmed.slice(1, -1).trim();
231
+ }
232
+
233
+ function cleanMediaLineCandidate(value: string): string {
234
+ return value.replace(/^[`"'[{(<]+/, "").replace(/[`"'\])}>.,;]+$/, "");
235
+ }
236
+
237
+ function splitMediaLineCandidates(payload: string): string[] {
238
+ const unwrapped = unwrapMediaLinePayload(payload);
239
+ if (unwrapped) return [unwrapped];
240
+ return payload.split(/\s+/).filter(Boolean);
241
+ }
242
+
243
+ // ============================================================================
244
+ // 路径处理函数
245
+ // ============================================================================
246
+
247
+ /**
248
+ * 检查是否为 HTTP/HTTPS URL
249
+ */
250
+ export function isHttpUrl(value: string): boolean {
251
+ return /^https?:\/\//i.test(value);
252
+ }
253
+
254
+ /**
255
+ * 检查是否为 file:// URL
256
+ */
257
+ export function isFileUrl(value: string): boolean {
258
+ return /^file:\/\//i.test(value);
259
+ }
260
+
261
+ /**
262
+ * 检查是否为本地路径引用
263
+ * 支持 file://, MEDIA:, attachment://, 绝对路径
264
+ */
265
+ export function isLocalReference(raw: string): boolean {
266
+ if (isHttpUrl(raw)) return false;
267
+ return (
268
+ raw.startsWith("file://") ||
269
+ raw.startsWith("MEDIA:") ||
270
+ raw.startsWith("attachment://") ||
271
+ raw.startsWith("/") ||
272
+ raw.startsWith("~") ||
273
+ /^[a-zA-Z]:[\\/]/.test(raw)
274
+ );
275
+ }
276
+
277
+ /**
278
+ * 规范化本地路径
279
+ * 移除 file://, MEDIA:, attachment:// 前缀,并解码 URI
280
+ */
281
+ export function normalizeLocalPath(raw: string): string {
282
+ let p = raw.trim();
283
+
284
+ // 处理 file:// URL
285
+ if (isFileUrl(p)) {
286
+ try {
287
+ return fileURLToPath(p);
288
+ } catch {
289
+ p = p.replace(/^file:\/\/\/?/i, "");
290
+ }
291
+ }
292
+
293
+ // 处理其他前缀
294
+ if (p.startsWith("MEDIA:")) {
295
+ p = p.replace(/^MEDIA:/i, "");
296
+ } else if (p.startsWith("attachment://")) {
297
+ p = p.replace(/^attachment:\/\//i, "");
298
+ }
299
+
300
+ // 处理转义空格
301
+ p = p.replace(/\\ /g, " ");
302
+
303
+ // 尝试 URI 解码
304
+ try {
305
+ p = decodeURIComponent(p);
306
+ } catch {
307
+ // 忽略解码错误
308
+ }
309
+
310
+ // 处理波浪号路径 (~)
311
+ if (p.startsWith("~/") || p === "~") {
312
+ p = path.join(os.homedir(), p.slice(1));
313
+ } else if (p.startsWith("~")) {
314
+ // ~username 格式,在 Windows 上不常见,保持原样
315
+ // 在 Unix 上可以用 os.homedir() 的父目录 + username,但这里简化处理
316
+ }
317
+
318
+ // 处理相对路径
319
+ if (!path.isAbsolute(p)) {
320
+ p = path.resolve(process.cwd(), p);
321
+ }
322
+
323
+ return p;
324
+ }
325
+
326
+ /**
327
+ * 从 URL 中移除标题部分(Markdown 语法中的 "title")
328
+ */
329
+ export function stripTitleFromUrl(value: string): string {
330
+ const trimmed = value.trim();
331
+ // Only strip when the title is explicitly quoted: url "title" or url 'title'
332
+ const match = trimmed.match(/^(\S+)\s+["'][^"']*["']\s*$/);
333
+ return match ? match[1] : trimmed;
334
+ }
335
+
336
+ /**
337
+ * 获取文件扩展名(不含点)
338
+ */
339
+ export function getExtension(filePath: string): string {
340
+ const ext = path.extname(filePath).toLowerCase();
341
+ return ext.startsWith(".") ? ext.slice(1) : ext;
342
+ }
343
+
344
+ /**
345
+ * 检查是否为图片路径
346
+ */
347
+ export function isImagePath(filePath: string): boolean {
348
+ const ext = getExtension(filePath);
349
+ return ext ? IMAGE_EXTENSIONS.has(ext) : false;
350
+ }
351
+
352
+ /**
353
+ * 检查是否为非图片文件路径
354
+ */
355
+ export function isNonImageFilePath(filePath: string): boolean {
356
+ const ext = getExtension(filePath);
357
+ return ext ? NON_IMAGE_EXTENSIONS.has(ext) : false;
358
+ }
359
+
360
+ /**
361
+ * 根据文件扩展名检测媒体类型
362
+ */
363
+ export function detectMediaType(filePath: string): MediaType {
364
+ const ext = getExtension(filePath);
365
+
366
+ if (IMAGE_EXTENSIONS.has(ext)) return "image";
367
+ if (AUDIO_EXTENSIONS.has(ext)) return "audio";
368
+ if (VIDEO_EXTENSIONS.has(ext)) return "video";
369
+
370
+ return "file";
371
+ }
372
+
373
+ // ============================================================================
374
+ // 媒体提取函数
375
+ // ============================================================================
376
+
377
+ /**
378
+ * 创建 ExtractedMedia 对象
379
+ */
380
+ function createExtractedMedia(
381
+ source: string,
382
+ sourceKind: MediaSourceKind,
383
+ options?: MediaParseOptions
384
+ ): ExtractedMedia {
385
+ const isHttp = isHttpUrl(source);
386
+ const isLocal = !isHttp && isLocalReference(source);
387
+ const cleanSource = stripTitleFromUrl(source);
388
+
389
+ let localPath: string | undefined;
390
+ let fileName: string | undefined;
391
+
392
+ if (isLocal) {
393
+ localPath = normalizeLocalPath(cleanSource);
394
+ fileName = path.basename(localPath);
395
+ } else if (isHttp) {
396
+ try {
397
+ const url = new URL(cleanSource);
398
+ fileName = path.basename(url.pathname) || undefined;
399
+ } catch {
400
+ // 忽略 URL 解析错误
401
+ }
402
+ }
403
+
404
+ const type = detectMediaType(fileName || cleanSource);
405
+
406
+ return {
407
+ source: cleanSource,
408
+ localPath,
409
+ type,
410
+ isLocal,
411
+ isHttp,
412
+ fileName,
413
+ sourceKind,
414
+ };
415
+ }
416
+
417
+ /**
418
+ * 从文本中提取所有媒体
419
+ *
420
+ * @param text - 要解析的文本
421
+ * @param options - 解析选项
422
+ * @returns 解析结果,包含清理后的文本和提取的媒体列表
423
+ */
424
+ export function extractMediaFromText(
425
+ text: string,
426
+ options: MediaParseOptions = {}
427
+ ): MediaParseResult {
428
+ const {
429
+ removeFromText = true,
430
+ checkExists = false,
431
+ existsSync,
432
+ parseMediaLines = false,
433
+ parseMarkdownImages = true,
434
+ parseHtmlImages = true,
435
+ parseBarePaths = true,
436
+ parseMarkdownLinks = true,
437
+ } = options;
438
+
439
+ const images: ExtractedMedia[] = [];
440
+ const files: ExtractedMedia[] = [];
441
+ const seenSources = new Set<string>();
442
+ let result = text;
443
+
444
+ // 辅助函数:添加媒体项(去重)
445
+ const addMedia = (media: ExtractedMedia): boolean => {
446
+ const key = media.localPath || media.source;
447
+ if (seenSources.has(key)) return false;
448
+
449
+ // 检查文件存在性
450
+ if (checkExists && media.isLocal && media.localPath) {
451
+ const exists = existsSync
452
+ ? existsSync(media.localPath)
453
+ : fs.existsSync(media.localPath);
454
+ if (!exists) return false;
455
+ }
456
+
457
+ seenSources.add(key);
458
+
459
+ if (media.type === "image") {
460
+ images.push(media);
461
+ } else {
462
+ files.push(media);
463
+ }
464
+ return true;
465
+ };
466
+
467
+ // 0. 解析行首 MEDIA: 指令
468
+ if (parseMediaLines) {
469
+ const lines = result.split("\n");
470
+ const keptLines: string[] = [];
471
+ for (const line of lines) {
472
+ const trimmedStart = line.trimStart();
473
+ if (!trimmedStart.startsWith(MEDIA_LINE_PREFIX)) {
474
+ keptLines.push(line);
475
+ continue;
476
+ }
477
+
478
+ const payload = trimmedStart.slice(MEDIA_LINE_PREFIX.length).trim();
479
+ if (!payload) {
480
+ keptLines.push(line);
481
+ continue;
482
+ }
483
+
484
+ const candidates = splitMediaLineCandidates(payload);
485
+ let addedAny = false;
486
+ for (const raw of candidates) {
487
+ const candidate = stripTitleFromUrl(cleanMediaLineCandidate(raw));
488
+ if (!candidate) continue;
489
+ if (!isHttpUrl(candidate) && !isLocalReference(candidate)) {
490
+ continue;
491
+ }
492
+ const media = createExtractedMedia(candidate, "bare", options);
493
+ if (addMedia(media)) {
494
+ addedAny = true;
495
+ }
496
+ }
497
+
498
+ if (!addedAny || !removeFromText) {
499
+ keptLines.push(line);
500
+ }
501
+ }
502
+
503
+ if (removeFromText) {
504
+ result = keptLines.join("\n");
505
+ }
506
+ }
507
+
508
+ // 收集需要替换的位置(用于安全替换)
509
+ type Replacement = { start: number; end: number; replacement: string };
510
+ const replacements: Replacement[] = [];
511
+
512
+ // 辅助函数:应用替换(从后向前,避免索引错位)
513
+ const applyReplacements = (): void => {
514
+ if (replacements.length === 0) return;
515
+ // 按起始位置降序排序,从后向前替换
516
+ replacements.sort((a, b) => b.start - a.start);
517
+ for (const { start, end, replacement } of replacements) {
518
+ result = result.slice(0, start) + replacement + result.slice(end);
519
+ }
520
+ replacements.length = 0; // 清空
521
+ };
522
+
523
+ // 1. 解析 Markdown 链接中的图片: [![alt](img)](link)
524
+ if (parseMarkdownImages) {
525
+ const linkedMatches = [...text.matchAll(MARKDOWN_LINKED_IMAGE_RE)];
526
+ for (const match of linkedMatches) {
527
+ const [fullMatch, _alt, imgSrc] = match;
528
+ const media = createExtractedMedia(imgSrc, "markdown", options);
529
+ if (media.type === "image") {
530
+ addMedia(media);
531
+ if (removeFromText && match.index !== undefined) {
532
+ replacements.push({
533
+ start: match.index,
534
+ end: match.index + fullMatch.length,
535
+ replacement: "",
536
+ });
537
+ }
538
+ }
539
+ }
540
+ applyReplacements();
541
+ }
542
+
543
+ // 2. 解析 Markdown 图片: ![alt](path)
544
+ if (parseMarkdownImages) {
545
+ const mdMatches = [...result.matchAll(MARKDOWN_IMAGE_RE)];
546
+ for (const match of mdMatches) {
547
+ const [fullMatch, _alt, src] = match;
548
+ const media = createExtractedMedia(src, "markdown", options);
549
+ if (media.type === "image") {
550
+ addMedia(media);
551
+ if (removeFromText && match.index !== undefined) {
552
+ replacements.push({
553
+ start: match.index,
554
+ end: match.index + fullMatch.length,
555
+ replacement: "",
556
+ });
557
+ }
558
+ }
559
+ }
560
+ applyReplacements();
561
+ }
562
+
563
+ // 3. 解析 HTML img 标签
564
+ if (parseHtmlImages) {
565
+ const htmlMatches = [...result.matchAll(HTML_IMAGE_RE)];
566
+ for (const match of htmlMatches) {
567
+ const [fullMatch, src1, src2, src3] = match;
568
+ const src = src1 || src2 || src3;
569
+ if (src) {
570
+ const media = createExtractedMedia(src, "html", options);
571
+ if (media.type === "image") {
572
+ addMedia(media);
573
+ if (removeFromText && match.index !== undefined) {
574
+ replacements.push({
575
+ start: match.index,
576
+ end: match.index + fullMatch.length,
577
+ replacement: "",
578
+ });
579
+ }
580
+ }
581
+ }
582
+ }
583
+ applyReplacements();
584
+ }
585
+
586
+ // 4. 解析 Markdown 链接中的文件: [label](path)
587
+ if (parseMarkdownLinks) {
588
+ // 重置正则
589
+ MARKDOWN_LINK_RE.lastIndex = 0;
590
+ const linkMatches = [...result.matchAll(MARKDOWN_LINK_RE)];
591
+ for (const match of linkMatches) {
592
+ const [fullMatch, _label, rawPath] = match;
593
+ const idx = match.index ?? 0;
594
+
595
+ // 跳过图片语法 ![...](...) - 检查前一个字符是否为 !
596
+ if (idx > 0 && result[idx - 1] === "!") continue;
597
+
598
+ // 只处理本地引用
599
+ if (!isLocalReference(rawPath)) continue;
600
+
601
+ const media = createExtractedMedia(rawPath, "markdown", options);
602
+
603
+ // 只处理非图片文件
604
+ if (media.type !== "image" && isNonImageFilePath(media.localPath || rawPath)) {
605
+ if (addMedia(media)) {
606
+ if (removeFromText && match.index !== undefined) {
607
+ const fileName = media.fileName || path.basename(rawPath);
608
+ replacements.push({
609
+ start: match.index,
610
+ end: match.index + fullMatch.length,
611
+ replacement: `[文件: ${fileName}]`,
612
+ });
613
+ }
614
+ }
615
+ }
616
+ }
617
+ applyReplacements();
618
+ }
619
+
620
+ // 5. 解析裸露的本地图片路径
621
+ if (parseBarePaths && parseMarkdownImages) {
622
+ // 重置正则
623
+ BARE_IMAGE_PATH_RE.lastIndex = 0;
624
+ const bareImageMatches = [...result.matchAll(BARE_IMAGE_PATH_RE)];
625
+
626
+ // 过滤掉已经在 Markdown 语法中的路径
627
+ const newBareImageMatches = bareImageMatches.filter((m) => {
628
+ const idx = m.index ?? 0;
629
+ const before = result.slice(Math.max(0, idx - 10), idx);
630
+ return !before.includes("](");
631
+ });
632
+
633
+ for (const match of newBareImageMatches) {
634
+ const [fullMatch, rawPath] = match;
635
+ const media = createExtractedMedia(rawPath, "bare", options);
636
+ if (media.type === "image") {
637
+ addMedia(media);
638
+ if (removeFromText && match.index !== undefined) {
639
+ replacements.push({
640
+ start: match.index,
641
+ end: match.index + fullMatch.length,
642
+ replacement: "",
643
+ });
644
+ }
645
+ }
646
+ }
647
+ applyReplacements();
648
+ }
649
+
650
+ // 6. 解析裸露的本地文件路径(非图片)
651
+ if (parseBarePaths && parseMarkdownLinks) {
652
+ // 重置正则
653
+ BARE_FILE_PATH_RE.lastIndex = 0;
654
+ const bareFileMatches = [...result.matchAll(BARE_FILE_PATH_RE)];
655
+
656
+ for (const match of bareFileMatches) {
657
+ const [fullMatch, rawPath] = match;
658
+ const media = createExtractedMedia(rawPath, "bare", options);
659
+
660
+ if (media.type !== "image") {
661
+ if (addMedia(media)) {
662
+ if (removeFromText && match.index !== undefined) {
663
+ const fileName = media.fileName || path.basename(rawPath);
664
+ replacements.push({
665
+ start: match.index,
666
+ end: match.index + fullMatch.length,
667
+ replacement: `[文件: ${fileName}]`,
668
+ });
669
+ }
670
+ }
671
+ }
672
+ }
673
+ applyReplacements();
674
+ }
675
+
676
+ // 清理多余的空行
677
+ if (removeFromText) {
678
+ result = result.replace(/\n{3,}/g, "\n\n").trim();
679
+ }
680
+
681
+ return {
682
+ text: result,
683
+ images,
684
+ files,
685
+ all: [...images, ...files],
686
+ };
687
+ }
688
+
689
+ /**
690
+ * 仅提取图片(简化版)
691
+ */
692
+ export function extractImagesFromText(
693
+ text: string,
694
+ options: Omit<MediaParseOptions, "parseMarkdownLinks"> = {}
695
+ ): { text: string; images: ExtractedMedia[] } {
696
+ const result = extractMediaFromText(text, {
697
+ ...options,
698
+ parseMarkdownLinks: false,
699
+ });
700
+ return {
701
+ text: result.text,
702
+ images: result.images,
703
+ };
704
+ }
705
+
706
+ /**
707
+ * 仅提取文件(简化版)
708
+ */
709
+ export function extractFilesFromText(
710
+ text: string,
711
+ options: Omit<MediaParseOptions, "parseMarkdownImages" | "parseHtmlImages"> = {}
712
+ ): { text: string; files: ExtractedMedia[] } {
713
+ const result = extractMediaFromText(text, {
714
+ ...options,
715
+ parseMarkdownImages: false,
716
+ parseHtmlImages: false,
717
+ });
718
+ return {
719
+ text: result.text,
720
+ files: result.files,
721
+ };
722
+ }