deepfish-ai 1.0.21 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,6 +8,7 @@ const mammoth = require('mammoth')
8
8
  const docx = require('docx')
9
9
  const PizZip = require('pizzip')
10
10
  const Docxtemplater = require('docxtemplater')
11
+ const cheerio = require('cheerio')
11
12
 
12
13
  // ─── 统一返回结构 ─────────────────────────────────────────────────────────────
13
14
 
@@ -23,6 +24,204 @@ function resolvePath(filePath) {
23
24
  return path.resolve(process.cwd(), filePath)
24
25
  }
25
26
 
27
+ // ─── 格式转换辅助函数 ────────────────────────────────────────────────────────
28
+
29
+ /**
30
+ * 使用 puppeteer 将 HTML 字符串渲染为 PDF 文件
31
+ */
32
+ async function htmlStringToPdf(html, outputPath) {
33
+ let puppeteer
34
+ try {
35
+ puppeteer = require('puppeteer')
36
+ } catch {
37
+ throw new Error('puppeteer 未安装,请先执行 npm install puppeteer')
38
+ }
39
+ const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] })
40
+ try {
41
+ const page = await browser.newPage()
42
+ await page.setContent(html, { waitUntil: 'networkidle0' })
43
+ fs.ensureDirSync(path.dirname(outputPath))
44
+ await page.pdf({ path: outputPath, format: 'A4', printBackground: true })
45
+ } finally {
46
+ await browser.close()
47
+ }
48
+ }
49
+
50
+ function escapeHtml(str) {
51
+ return str
52
+ .replace(/&/g, '&')
53
+ .replace(/</g, '&lt;')
54
+ .replace(/>/g, '&gt;')
55
+ }
56
+
57
+ function unescapeXml(str) {
58
+ return str
59
+ .replace(/&amp;/g, '&')
60
+ .replace(/&lt;/g, '<')
61
+ .replace(/&gt;/g, '>')
62
+ .replace(/&quot;/g, '"')
63
+ .replace(/&apos;/g, "'")
64
+ }
65
+
66
+ function buildRunXml(rPr, text) {
67
+ if (!text) return ''
68
+ const xmlSp = /^\s|\s$/.test(text) ? ' xml:space="preserve"' : ''
69
+ return `<w:r>${rPr}<w:t${xmlSp}>${escapeHtml(text)}</w:t></w:r>`
70
+ }
71
+
72
+ /**
73
+ * 将 Markdown 文本转换为 HTML 字符串
74
+ */
75
+ function markdownToHtmlString(md) {
76
+ let html = md
77
+ .replace(/```(\w*)\n([\s\S]*?)```/g, (_, lang, code) =>
78
+ `<pre><code class="language-${lang}">${escapeHtml(code.trimEnd())}</code></pre>`)
79
+ .replace(/`([^`]+)`/g, (_, c) => `<code>${escapeHtml(c)}</code>`)
80
+ .replace(/^###### (.+)$/gm, '<h6>$1</h6>')
81
+ .replace(/^##### (.+)$/gm, '<h5>$1</h5>')
82
+ .replace(/^#### (.+)$/gm, '<h4>$1</h4>')
83
+ .replace(/^### (.+)$/gm, '<h3>$1</h3>')
84
+ .replace(/^## (.+)$/gm, '<h2>$1</h2>')
85
+ .replace(/^# (.+)$/gm, '<h1>$1</h1>')
86
+ .replace(/^[-*_]{3,}$/gm, '<hr>')
87
+ .replace(/\*\*\*(.+?)\*\*\*/g, '<strong><em>$1</em></strong>')
88
+ .replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>')
89
+ .replace(/__(.+?)__/g, '<strong>$1</strong>')
90
+ .replace(/\*(.+?)\*/g, '<em>$1</em>')
91
+ .replace(/_(.+?)_/g, '<em>$1</em>')
92
+ .replace(/~~(.+?)~~/g, '<del>$1</del>')
93
+ .replace(/!\[([^\]]*)\]\(([^)]+)\)/g, '<img alt="$1" src="$2">')
94
+ .replace(/\[([^\]]+)\]\(([^)]+)\)/g, '<a href="$2">$1</a>')
95
+ .replace(/^[ \t]*[-*+] (.+)$/gm, '<li>$1</li>')
96
+ .replace(/^[ \t]*\d+\. (.+)$/gm, '<li>$1</li>')
97
+ .replace(/^> (.+)$/gm, '<blockquote>$1</blockquote>')
98
+ html = html.replace(/(<li>[\s\S]+?<\/li>)(\n(?!<li>)|$)/g, (_, items) => `<ul>${items}</ul>`)
99
+ html = html.replace(/^(?!<[a-z]|$)(.+)$/gm, '<p>$1</p>')
100
+ return `<!DOCTYPE html><html><head><meta charset="utf-8"><style>
101
+ body{font-family:sans-serif;line-height:1.7;max-width:900px;margin:40px auto;padding:0 20px;color:#333}
102
+ h1,h2,h3,h4,h5,h6{margin-top:1.2em}
103
+ pre{background:#f5f5f5;padding:12px;border-radius:4px;overflow:auto}
104
+ code{background:#f0f0f0;padding:2px 4px;border-radius:3px}
105
+ blockquote{border-left:4px solid #ddd;margin:0;padding-left:1em;color:#666}
106
+ table{border-collapse:collapse;width:100%}td,th{border:1px solid #ddd;padding:6px 10px}
107
+ </style></head><body>${html}</body></html>`
108
+ }
109
+
110
+ /**
111
+ * 将 HTML 字符串转换为 Markdown 文本
112
+ */
113
+ function htmlStringToMarkdown(html) {
114
+ const $ = cheerio.load(html)
115
+ const body = $('body').length ? $('body') : $.root()
116
+
117
+ function nodeToMd(el) {
118
+ const node = $(el)
119
+ const tag = el.type === 'text' ? '#text' : (el.name || '').toLowerCase()
120
+ if (el.type === 'text') return el.data || ''
121
+ const inner = () => node.contents().toArray().map(nodeToMd).join('')
122
+ switch (tag) {
123
+ case 'h1': return `# ${inner()}\n\n`
124
+ case 'h2': return `## ${inner()}\n\n`
125
+ case 'h3': return `### ${inner()}\n\n`
126
+ case 'h4': return `#### ${inner()}\n\n`
127
+ case 'h5': return `##### ${inner()}\n\n`
128
+ case 'h6': return `###### ${inner()}\n\n`
129
+ case 'p': return `${inner()}\n\n`
130
+ case 'br': return '\n'
131
+ case 'hr': return '---\n\n'
132
+ case 'strong':
133
+ case 'b': return `**${inner()}**`
134
+ case 'em':
135
+ case 'i': return `*${inner()}*`
136
+ case 'del':
137
+ case 's': return `~~${inner()}~~`
138
+ case 'code': return `\`${inner()}\``
139
+ case 'pre': {
140
+ const codeEl = node.find('code')
141
+ const lang = (codeEl.attr('class') || '').replace('language-', '')
142
+ const content = codeEl.length ? codeEl.text() : node.text()
143
+ return `\`\`\`${lang}\n${content}\n\`\`\`\n\n`
144
+ }
145
+ case 'blockquote': return inner().split('\n').map(l => l ? `> ${l}` : '').join('\n') + '\n\n'
146
+ case 'a': return `[${inner()}](${node.attr('href') || ''})`
147
+ case 'img': return `![${node.attr('alt') || ''}](${node.attr('src') || ''})`
148
+ case 'ul':
149
+ case 'ol': return inner() + '\n'
150
+ case 'li': return `- ${inner()}\n`
151
+ case 'table': {
152
+ const rows = node.find('tr').toArray()
153
+ if (!rows.length) return ''
154
+ return rows.map((row, i) => {
155
+ const cells = $(row).find('th,td').toArray().map(c => $(c).text().trim())
156
+ const line = `| ${cells.join(' | ')} |`
157
+ return i === 0 ? `${line}\n| ${cells.map(() => '---').join(' | ')} |` : line
158
+ }).join('\n') + '\n\n'
159
+ }
160
+ case 'head':
161
+ case 'style':
162
+ case 'script': return ''
163
+ default: return inner()
164
+ }
165
+ }
166
+
167
+ return body.contents().toArray().map(nodeToMd).join('').replace(/\n{3,}/g, '\n\n').trim()
168
+ }
169
+
170
+ /**
171
+ * 将 HTML 字符串解析为 docx sections 数组
172
+ */
173
+ function htmlStringToDocxSections(html) {
174
+ const $ = cheerio.load(html)
175
+ const sections = []
176
+
177
+ function processNode(el) {
178
+ const node = $(el)
179
+ const tag = (el.name || '').toLowerCase()
180
+ const headingMatch = tag.match(/^h([1-6])$/)
181
+ if (headingMatch) {
182
+ sections.push({ type: 'heading', level: parseInt(headingMatch[1]), text: node.text().trim() })
183
+ return
184
+ }
185
+ switch (tag) {
186
+ case 'p':
187
+ if (node.text().trim()) sections.push({ type: 'paragraph', text: node.text().trim() })
188
+ break
189
+ case 'ul':
190
+ sections.push({ type: 'list', items: node.find('li').toArray().map(li => $(li).text().trim()) })
191
+ break
192
+ case 'ol':
193
+ sections.push({ type: 'numberedList', items: node.find('li').toArray().map(li => $(li).text().trim()) })
194
+ break
195
+ case 'table': {
196
+ const rows = node.find('tr').toArray().map(row =>
197
+ $(row).find('th,td').toArray().map(c => $(c).text().trim()))
198
+ if (rows.length) sections.push({ type: 'table', rows })
199
+ break
200
+ }
201
+ case 'hr':
202
+ sections.push({ type: 'horizontalRule' })
203
+ break
204
+ case 'pre':
205
+ sections.push({ type: 'paragraph', text: node.text() })
206
+ break
207
+ case 'blockquote':
208
+ node.find('p').each((_, pEl) => {
209
+ const t = $(pEl).text().trim()
210
+ if (t) sections.push({ type: 'paragraph', text: `> ${t}` })
211
+ })
212
+ if (!node.find('p').length && node.text().trim()) {
213
+ sections.push({ type: 'paragraph', text: `> ${node.text().trim()}` })
214
+ }
215
+ break
216
+ default:
217
+ node.children().each((_, child) => processNode(child))
218
+ }
219
+ }
220
+
221
+ $('body').children().each((_, el) => processNode(el))
222
+ return sections
223
+ }
224
+
26
225
  // ─── 内部辅助:将 sections 描述转换为 docx children ──────────────────────────
27
226
  /**
28
227
  * sections 数组每项结构:
@@ -439,6 +638,242 @@ async function overwriteDocx(filePath, sections = []) {
439
638
  }
440
639
  }
441
640
 
641
+ // ─── Word 格式转换函数 ───────────────────────────────────────────────────────
642
+
643
+ /**
644
+ * Word 转 PDF
645
+ */
646
+ async function wordToPdf(inputPath, outputPath) {
647
+ try {
648
+ const fullInput = resolvePath(inputPath)
649
+ const fullOutput = resolvePath(outputPath)
650
+ if (!fs.existsSync(fullInput)) {
651
+ return fail(`File does not exist: ${fullInput}`, { inputPath: fullInput })
652
+ }
653
+ const result = await mammoth.convertToHtml({ path: fullInput })
654
+ const html = `<!DOCTYPE html><html><head><meta charset="utf-8"><style>
655
+ body{font-family:sans-serif;line-height:1.7;margin:40px;color:#333}
656
+ table{border-collapse:collapse;width:100%}td,th{border:1px solid #ddd;padding:6px 10px}
657
+ </style></head><body>${result.value}</body></html>`
658
+ await htmlStringToPdf(html, fullOutput)
659
+ return ok({ inputPath: fullInput, outputPath: fullOutput })
660
+ } catch (error) {
661
+ return fail(error, { inputPath, outputPath })
662
+ }
663
+ }
664
+
665
+ /**
666
+ * Word 转 HTML
667
+ */
668
+ async function wordToHtml(inputPath, outputPath) {
669
+ try {
670
+ const fullInput = resolvePath(inputPath)
671
+ const fullOutput = resolvePath(outputPath)
672
+ if (!fs.existsSync(fullInput)) {
673
+ return fail(`File does not exist: ${fullInput}`, { inputPath: fullInput })
674
+ }
675
+ const result = await mammoth.convertToHtml({ path: fullInput })
676
+ const html = `<!DOCTYPE html><html><head><meta charset="utf-8"><style>
677
+ body{font-family:sans-serif;line-height:1.7;max-width:900px;margin:40px auto;padding:0 20px;color:#333}
678
+ table{border-collapse:collapse;width:100%}td,th{border:1px solid #ddd;padding:6px 10px}
679
+ </style></head><body>${result.value}</body></html>`
680
+ fs.ensureDirSync(path.dirname(fullOutput))
681
+ fs.writeFileSync(fullOutput, html, 'utf8')
682
+ return ok({ inputPath: fullInput, outputPath: fullOutput, messages: result.messages })
683
+ } catch (error) {
684
+ return fail(error, { inputPath, outputPath })
685
+ }
686
+ }
687
+
688
+ /**
689
+ * Word 转 Markdown
690
+ */
691
+ async function wordToMarkdown(inputPath, outputPath) {
692
+ try {
693
+ const fullInput = resolvePath(inputPath)
694
+ const fullOutput = resolvePath(outputPath)
695
+ if (!fs.existsSync(fullInput)) {
696
+ return fail(`File does not exist: ${fullInput}`, { inputPath: fullInput })
697
+ }
698
+ const result = await mammoth.convertToHtml({ path: fullInput })
699
+ const md = htmlStringToMarkdown(result.value)
700
+ fs.ensureDirSync(path.dirname(fullOutput))
701
+ fs.writeFileSync(fullOutput, md, 'utf8')
702
+ return ok({ inputPath: fullInput, outputPath: fullOutput })
703
+ } catch (error) {
704
+ return fail(error, { inputPath, outputPath })
705
+ }
706
+ }
707
+
708
+ /**
709
+ * Markdown 转 Word
710
+ */
711
+ async function markdownToWord(inputPath, outputPath) {
712
+ try {
713
+ const fullInput = resolvePath(inputPath)
714
+ const fullOutput = resolvePath(outputPath)
715
+ if (!fs.existsSync(fullInput)) {
716
+ return fail(`File does not exist: ${fullInput}`, { inputPath: fullInput })
717
+ }
718
+ const md = fs.readFileSync(fullInput, 'utf8')
719
+ const html = markdownToHtmlString(md)
720
+ const sections = htmlStringToDocxSections(html)
721
+ const { Document, Packer } = docx
722
+ fs.ensureDirSync(path.dirname(fullOutput))
723
+ const children = buildChildren(sections, docx)
724
+ const doc = new Document({ sections: [{ properties: {}, children }] })
725
+ const buffer = await Packer.toBuffer(doc)
726
+ fs.writeFileSync(fullOutput, buffer)
727
+ return ok({ inputPath: fullInput, outputPath: fullOutput, sectionCount: sections.length })
728
+ } catch (error) {
729
+ return fail(error, { inputPath, outputPath })
730
+ }
731
+ }
732
+
733
+ /**
734
+ * HTML 转 Word
735
+ */
736
+ async function htmlToWord(inputPath, outputPath) {
737
+ try {
738
+ const fullInput = resolvePath(inputPath)
739
+ const fullOutput = resolvePath(outputPath)
740
+ if (!fs.existsSync(fullInput)) {
741
+ return fail(`File does not exist: ${fullInput}`, { inputPath: fullInput })
742
+ }
743
+ const html = fs.readFileSync(fullInput, 'utf8')
744
+ const sections = htmlStringToDocxSections(html)
745
+ const { Document, Packer } = docx
746
+ fs.ensureDirSync(path.dirname(fullOutput))
747
+ const children = buildChildren(sections, docx)
748
+ const doc = new Document({ sections: [{ properties: {}, children }] })
749
+ const buffer = await Packer.toBuffer(doc)
750
+ fs.writeFileSync(fullOutput, buffer)
751
+ return ok({ inputPath: fullInput, outputPath: fullOutput, sectionCount: sections.length })
752
+ } catch (error) {
753
+ return fail(error, { inputPath, outputPath })
754
+ }
755
+ }
756
+
757
+ // ─── 工具描述 ─────────────────────────────────────────────────────────────────
758
+
759
+ /**
760
+ * 保留原格式修改 Word 文档内容(支持跨 <w:r> run 的文本替换,不破坏其余样式)
761
+ * @param {string} filePath - .docx 文件路径
762
+ * @param {Array<{search:string, replace:string}>} replacements - 替换规则数组
763
+ */
764
+ async function patchDocxText(filePath, replacements) {
765
+ try {
766
+ const fullPath = resolvePath(filePath)
767
+ if (!fs.existsSync(fullPath)) {
768
+ return fail(`File does not exist: ${fullPath}`, { filePath: fullPath })
769
+ }
770
+ const content = fs.readFileSync(fullPath, 'binary')
771
+ const zip = new PizZip(content)
772
+ const docFile = zip.file('word/document.xml')
773
+ if (!docFile) return fail('word/document.xml not found', { filePath: fullPath })
774
+ let xml = docFile.asText()
775
+ let totalCount = 0
776
+
777
+ for (const { search, replace: replaceStr } of (replacements || [])) {
778
+ if (!search) continue
779
+
780
+ xml = xml.replace(/<w:p(?:\s[^>]*)?>([\s\S]*?)<\/w:p>/g, (para) => {
781
+ // 提取所有含文本的 <w:r> 元素
782
+ const runRe = /<w:r(?:\s[^>]*)?>([\s\S]*?)<\/w:r>/g
783
+ const runs = []
784
+ let rm
785
+ while ((rm = runRe.exec(para)) !== null) {
786
+ const tM = rm[0].match(/<w:t(?:\s[^>]*)?>([^<]*)<\/w:t>/)
787
+ if (!tM) continue // 跳过无文本内容的 run(如字段符号)
788
+ const rPrM = rm[0].match(/<w:rPr>[\s\S]*?<\/w:rPr>/)
789
+ runs.push({
790
+ full: rm[0],
791
+ rPr: rPrM ? rPrM[0] : '',
792
+ text: unescapeXml(tM[1]),
793
+ })
794
+ }
795
+ if (!runs.length) return para
796
+
797
+ const fullText = runs.map(r => r.text).join('')
798
+ if (!fullText.includes(search)) return para
799
+
800
+ // 构建每个 run 的字符偏移区间
801
+ let off = 0
802
+ const offsets = runs.map(r => { const s = off; off += r.text.length; return { start: s, end: off } })
803
+
804
+ const findRunIdx = (pos) => {
805
+ const idx = offsets.findIndex(o => pos >= o.start && pos < o.end)
806
+ return idx >= 0 ? idx : runs.length - 1
807
+ }
808
+
809
+ // 找所有非重叠匹配位置
810
+ const occurrences = []
811
+ let from = 0
812
+ while (true) {
813
+ const pos = fullText.indexOf(search, from)
814
+ if (pos === -1) break
815
+ occurrences.push(pos)
816
+ from = pos + search.length
817
+ }
818
+ totalCount += occurrences.length
819
+
820
+ // 按字符位置构建新的 {rPr, text} 片段列表
821
+ const segs = []
822
+ let cursor = 0
823
+ for (const matchPos of occurrences) {
824
+ // 匹配前的文本:按 run 边界拆分,保留各自 rPr
825
+ let p = cursor
826
+ while (p < matchPos) {
827
+ const ri = findRunIdx(p)
828
+ const runEnd = Math.min(offsets[ri].end, matchPos)
829
+ segs.push({ rPr: runs[ri].rPr, text: fullText.slice(p, runEnd) })
830
+ p = runEnd
831
+ }
832
+ // 替换文本:使用匹配起始位置所在 run 的 rPr
833
+ segs.push({ rPr: runs[findRunIdx(matchPos)].rPr, text: replaceStr })
834
+ cursor = matchPos + search.length
835
+ }
836
+ // 匹配后剩余文本
837
+ let p = cursor
838
+ while (p < fullText.length) {
839
+ const ri = findRunIdx(p)
840
+ segs.push({ rPr: runs[ri].rPr, text: fullText.slice(p, offsets[ri].end) })
841
+ p = offsets[ri].end
842
+ }
843
+
844
+ // 合并相邻且 rPr 相同的片段
845
+ const merged = []
846
+ for (const seg of segs) {
847
+ if (!seg.text) continue
848
+ if (merged.length && merged[merged.length - 1].rPr === seg.rPr) {
849
+ merged[merged.length - 1].text += seg.text
850
+ } else {
851
+ merged.push({ rPr: seg.rPr, text: seg.text })
852
+ }
853
+ }
854
+
855
+ // 构建新的 runs XML
856
+ const newRunsXml = merged.map(s => buildRunXml(s.rPr, s.text)).join('')
857
+
858
+ // 将段落中从第一个到最后一个文本 run 的区间替换为新 runs
859
+ const firstRunFull = runs[0].full
860
+ const lastRunFull = runs[runs.length - 1].full
861
+ const firstIdx = para.indexOf(firstRunFull)
862
+ const lastIdx = para.lastIndexOf(lastRunFull)
863
+ if (firstIdx === -1 || lastIdx === -1) return para
864
+ return para.slice(0, firstIdx) + newRunsXml + para.slice(lastIdx + lastRunFull.length)
865
+ })
866
+ }
867
+
868
+ zip.file('word/document.xml', xml)
869
+ const buf = zip.generate({ type: 'nodebuffer', compression: 'DEFLATE' })
870
+ fs.writeFileSync(fullPath, buf)
871
+ return ok({ filePath: fullPath, totalReplacements: totalCount })
872
+ } catch (error) {
873
+ return fail(error, { filePath })
874
+ }
875
+ }
876
+
442
877
  // ─── 工具描述 ─────────────────────────────────────────────────────────────────
443
878
 
444
879
  const descriptions = [
@@ -676,6 +1111,114 @@ const descriptions = [
676
1111
  },
677
1112
  },
678
1113
  },
1114
+ {
1115
+ type: 'function',
1116
+ function: {
1117
+ name: 'wordToPdf',
1118
+ description:
1119
+ '将 Word 文档(.docx)转换为 PDF 文件。依赖 puppeteer,转换时保留基础格式。参数:inputPath 为源 .docx 路径;outputPath 为输出 .pdf 路径。返回值:对象,包含 success、data(含 inputPath、outputPath)、error。',
1120
+ parameters: {
1121
+ type: 'object',
1122
+ properties: {
1123
+ inputPath: { type: 'string', description: '源 .docx 文件路径。' },
1124
+ outputPath: { type: 'string', description: '输出 .pdf 文件路径。' },
1125
+ },
1126
+ required: ['inputPath', 'outputPath'],
1127
+ },
1128
+ },
1129
+ },
1130
+ {
1131
+ type: 'function',
1132
+ function: {
1133
+ name: 'wordToHtml',
1134
+ description:
1135
+ '将 Word 文档(.docx)转换为 HTML 文件,保留格式信息(粗体、斜体、表格等)。参数:inputPath 为源 .docx 路径;outputPath 为输出 .html 路径。返回值:对象,包含 success、data(含 inputPath、outputPath、messages)、error。',
1136
+ parameters: {
1137
+ type: 'object',
1138
+ properties: {
1139
+ inputPath: { type: 'string', description: '源 .docx 文件路径。' },
1140
+ outputPath: { type: 'string', description: '输出 .html 文件路径。' },
1141
+ },
1142
+ required: ['inputPath', 'outputPath'],
1143
+ },
1144
+ },
1145
+ },
1146
+ {
1147
+ type: 'function',
1148
+ function: {
1149
+ name: 'wordToMarkdown',
1150
+ description:
1151
+ '将 Word 文档(.docx)转换为 Markdown 文件(.md)。通过 HTML 中间格式进行转换,保留标题、段落、表格、链接等结构。参数:inputPath 为源 .docx 路径;outputPath 为输出 .md 路径。返回值:对象,包含 success、data(含 inputPath、outputPath)、error。',
1152
+ parameters: {
1153
+ type: 'object',
1154
+ properties: {
1155
+ inputPath: { type: 'string', description: '源 .docx 文件路径。' },
1156
+ outputPath: { type: 'string', description: '输出 .md 文件路径。' },
1157
+ },
1158
+ required: ['inputPath', 'outputPath'],
1159
+ },
1160
+ },
1161
+ },
1162
+ {
1163
+ type: 'function',
1164
+ function: {
1165
+ name: 'markdownToWord',
1166
+ description:
1167
+ '将 Markdown 文件(.md)转换为 Word 文档(.docx)。支持标题、段落、表格、列表、代码块、引用等元素。参数:inputPath 为源 .md 路径;outputPath 为输出 .docx 路径。返回值:对象,包含 success、data(含 inputPath、outputPath、sectionCount)、error。',
1168
+ parameters: {
1169
+ type: 'object',
1170
+ properties: {
1171
+ inputPath: { type: 'string', description: '源 .md 文件路径。' },
1172
+ outputPath: { type: 'string', description: '输出 .docx 文件路径。' },
1173
+ },
1174
+ required: ['inputPath', 'outputPath'],
1175
+ },
1176
+ },
1177
+ },
1178
+ {
1179
+ type: 'function',
1180
+ function: {
1181
+ name: 'htmlToWord',
1182
+ description:
1183
+ '将 HTML 文件转换为 Word 文档(.docx),解析标题、段落、表格、列表等元素。参数:inputPath 为源 .html 路径;outputPath 为输出 .docx 路径。返回值:对象,包含 success、data(含 inputPath、outputPath、sectionCount)、error。',
1184
+ parameters: {
1185
+ type: 'object',
1186
+ properties: {
1187
+ inputPath: { type: 'string', description: '源 .html 文件路径。' },
1188
+ outputPath: { type: 'string', description: '输出 .docx 文件路径。' },
1189
+ },
1190
+ required: ['inputPath', 'outputPath'],
1191
+ },
1192
+ },
1193
+ },
1194
+ {
1195
+ type: 'function',
1196
+ function: {
1197
+ name: 'patchDocxText',
1198
+ description: `保留原格式修改 Word 文档内容(支持跨 run 文本替换)。与 replaceDocxText 的区别:本函数在替换时智能处理 Word XML 的 run 拆分问题,替换文本会沿用匹配位置的原有字符样式(字体、字号、颜色等),不重建文档结构,最大程度保留原有格式。
1199
+ 参数:filePath 为目标 .docx 路径;replacements 为替换规则数组,每项格式 { search: '查找文本', replace: '替换文本' }。
1200
+ 返回值:对象,包含 success、data(含 filePath、totalReplacements)、error。`,
1201
+ parameters: {
1202
+ type: 'object',
1203
+ properties: {
1204
+ filePath: { type: 'string', description: '目标 .docx 文件路径。' },
1205
+ replacements: {
1206
+ type: 'array',
1207
+ description: '替换规则数组,每项包含 search(查找文本)和 replace(替换文本)。',
1208
+ items: {
1209
+ type: 'object',
1210
+ properties: {
1211
+ search: { type: 'string', description: '要查找的文本。' },
1212
+ replace: { type: 'string', description: '替换后的文本。' },
1213
+ },
1214
+ required: ['search', 'replace'],
1215
+ },
1216
+ },
1217
+ },
1218
+ required: ['filePath', 'replacements'],
1219
+ },
1220
+ },
1221
+ },
679
1222
  ]
680
1223
 
681
1224
  // ─── 导出 ──────────────────────────────────────────────────────────────────────
@@ -695,11 +1238,18 @@ const functions = {
695
1238
  mergeDocx,
696
1239
  extractDocxLinks,
697
1240
  getDocxParagraphStats,
1241
+ wordToPdf,
1242
+ wordToHtml,
1243
+ wordToMarkdown,
1244
+ markdownToWord,
1245
+ htmlToWord,
1246
+ patchDocxText,
698
1247
  }
699
1248
 
700
1249
  const DocxTool = {
701
1250
  name: 'DocxTool',
702
- description: '提供 Word 文档(.docx)的创建、读取、搜索、替换、模板填充、格式转换、合并等全面处理能力',
1251
+ description: '提供 Word 文档(.docx)的创建、读取、搜索、替换、模板填充、格式转换、合并,以及 Word/Markdown/HTML/PDF 格式互转等全面处理能力',
1252
+ // 格式转换:wordToPdf、wordToHtml、wordToMarkdown、markdownToWord、htmlToWord
703
1253
  platform: 'all',
704
1254
  descriptions,
705
1255
  functions,