getmdfromleetcode 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +186 -0
- package/check_solution.js +20 -0
- package/clean_next_data.json +1 -0
- package/index.js +74 -0
- package/index.js.backup +355 -0
- package/lib/markdownFormatter.js +269 -0
- package/lib/problemDataFetcher.js +250 -0
- package/next_data.json +1 -0
- package/package.json +29 -0
- package/problem_page.html +93 -0
- package/solution.html +89 -0
- package/solution_data.json +2604 -0
- package/solution_page.html +1 -0
- package/solution_page_full.html +1 -0
- package/specific_solution.html +89 -0
- package/test_format.js +20 -0
- package/test_subscript.js +52 -0
package/index.js.backup
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
// markdownFormatter.js - Markdown format化模块
|
|
2
|
+
|
|
3
|
+
import * as cheerio from 'cheerio';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* 将题目数据格式化为Markdown格式
|
|
7
|
+
* @param {Object} question 题目数据
|
|
8
|
+
* @param {boolean} rawOutput 是否输出原始内容
|
|
9
|
+
* @returns {string} 格式化后的Markdown内容
|
|
10
|
+
*/
|
|
11
|
+
export function formatAsMarkdown(question, rawOutput = false) {
|
|
12
|
+
const title = question.translatedTitle || question.title;
|
|
13
|
+
const difficulty = question.difficulty;
|
|
14
|
+
const content = question.translatedContent || question.content;
|
|
15
|
+
|
|
16
|
+
// 如果需要原始输出,则返回包含标题和难度的原始HTML内容
|
|
17
|
+
if (rawOutput) {
|
|
18
|
+
let rawResult = '';
|
|
19
|
+
if (title) {
|
|
20
|
+
rawResult += `<h1>${title}</h1>\n`;
|
|
21
|
+
}
|
|
22
|
+
if (difficulty) {
|
|
23
|
+
rawResult += `<p><strong>Difficulty:</strong> ${difficulty}</p>\n`;
|
|
24
|
+
}
|
|
25
|
+
rawResult += content;
|
|
26
|
+
return rawResult;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// 使用cheerio处理HTML格式的描述
|
|
30
|
+
const $ = cheerio.load(content, {decodeEntities: false});
|
|
31
|
+
|
|
32
|
+
// 提取文本内容并格式化
|
|
33
|
+
let description = '';
|
|
34
|
+
|
|
35
|
+
// 按照HTML中的顺序处理所有元素
|
|
36
|
+
$('body').children().each((i, elem) => {
|
|
37
|
+
const $elem = $(elem);
|
|
38
|
+
|
|
39
|
+
if (elem.tagName === 'p') {
|
|
40
|
+
// 处理段落中的内联元素
|
|
41
|
+
let text = '';
|
|
42
|
+
$elem.contents().each((j, child) => {
|
|
43
|
+
const $child = $(child);
|
|
44
|
+
if (child.type === 'text') {
|
|
45
|
+
text += child.data;
|
|
46
|
+
} else if (child.tagName === 'strong') {
|
|
47
|
+
text += `**${$child.text()}**`;
|
|
48
|
+
} else if (child.tagName === 'em') {
|
|
49
|
+
text += `*${$child.text()}*`;
|
|
50
|
+
} else if (child.tagName === 'code') {
|
|
51
|
+
text += `\`${$child.text()}\``;
|
|
52
|
+
} else if (child.tagName === 'sup') {
|
|
53
|
+
// 处理上标(数学公式中的幂)
|
|
54
|
+
text += `^${$child.text()}`;
|
|
55
|
+
} else {
|
|
56
|
+
// 其他标签直接获取文本
|
|
57
|
+
text += $child.text();
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
text = text.trim();
|
|
61
|
+
if (text) {
|
|
62
|
+
// 特殊处理示例标题
|
|
63
|
+
if (text.startsWith('示例')) {
|
|
64
|
+
description += `\n\n## ${text}\n\n`;
|
|
65
|
+
} else if (text.startsWith('提示')) {
|
|
66
|
+
description += `\n\n## 提示:\n`;
|
|
67
|
+
} else {
|
|
68
|
+
description += text + '\n\n';
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
} else if (elem.tagName === 'pre') {
|
|
72
|
+
const codeText = $elem.text().trim();
|
|
73
|
+
if (codeText) {
|
|
74
|
+
description += '```\n' + codeText + '\n```\n\n';
|
|
75
|
+
}
|
|
76
|
+
} else if (elem.tagName === 'ul' || elem.tagName === 'ol') {
|
|
77
|
+
$elem.children('li').each((j, li) => {
|
|
78
|
+
const text = $(li).text().trim();
|
|
79
|
+
if (text) {
|
|
80
|
+
description += `- ${text}\n`;
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
description += '\n';
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
// 构建Markdown输出
|
|
88
|
+
let markdown = '';
|
|
89
|
+
if (title) {
|
|
90
|
+
markdown += `# ${title}\n\n`;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if (difficulty) {
|
|
94
|
+
markdown += `**Difficulty:** ${difficulty}\n\n`;
|
|
95
|
+
} else {
|
|
96
|
+
markdown += `**Difficulty:** 未找到\n\n`;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
if (description) {
|
|
100
|
+
// 格式化描述内容
|
|
101
|
+
let formattedDescription = description;
|
|
102
|
+
|
|
103
|
+
// 处理示例部分
|
|
104
|
+
formattedDescription = formattedDescription.replace(/示例\s*(\d+)\s*:/g, '\n\n## 示例 $1:\n\n');
|
|
105
|
+
formattedDescription = formattedDescription.replace(/示例\s*(\d+)\s*:/g, '\n\n## 示例 $1:\n\n');
|
|
106
|
+
|
|
107
|
+
// 处理解释部分
|
|
108
|
+
formattedDescription = formattedDescription.replace(/解释:/g, '\n**解释:** ');
|
|
109
|
+
|
|
110
|
+
// 处理提示部分
|
|
111
|
+
formattedDescription = formattedDescription.replace(/提示:/g, '\n\n## 提示:\n');
|
|
112
|
+
formattedDescription = formattedDescription.replace(/进阶:/g, '\n\n**进阶:**\n');
|
|
113
|
+
|
|
114
|
+
// 处理列表项
|
|
115
|
+
formattedDescription = formattedDescription.replace(/\n \* /g, '\n- ');
|
|
116
|
+
|
|
117
|
+
// 修复特殊字符问题
|
|
118
|
+
formattedDescription = formattedDescription.replace(/(\d+)\\u003csup\\u003e(\d+)\\u003c\/sup\\u003e/g, '$1^$2');
|
|
119
|
+
formattedDescription = formattedDescription.replace(/(\d+)\s*\^\s*(\d+)/g, '$1^$2');
|
|
120
|
+
// 将数字^数字的表达式用$包裹,符合LaTeX规范
|
|
121
|
+
formattedDescription = formattedDescription.replace(/(\d+)\^(\d+)/g, '\$$1^$2\$');
|
|
122
|
+
// 处理负数的上标情况
|
|
123
|
+
formattedDescription = formattedDescription.replace(/-\s*(\d+)\^(\d+)/g, '-\$$1^$2\$');
|
|
124
|
+
// 特殊处理常见的数学范围表达式
|
|
125
|
+
formattedDescription = formattedDescription.replace(/104/g, '$10^4$');
|
|
126
|
+
formattedDescription = formattedDescription.replace(/105/g, '$10^5$');
|
|
127
|
+
formattedDescription = formattedDescription.replace(/109/g, '$10^9$');
|
|
128
|
+
|
|
129
|
+
// 清理多余的空白字符
|
|
130
|
+
formattedDescription = formattedDescription.replace(/\n\s*\n\s*\n/g, '\n\n');
|
|
131
|
+
formattedDescription = formattedDescription.replace(/^ +/gm, '');
|
|
132
|
+
|
|
133
|
+
markdown += `## Description\n\n${formattedDescription}\n`;
|
|
134
|
+
} else {
|
|
135
|
+
markdown += '## Description\n\n未能提取到题目描述\n\n';
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return markdown;
|
|
139
|
+
}
|
|
140
|
+
// urlUtils.js - URL处理工具模块
|
|
141
|
+
|
|
142
|
+
/**
|
|
143
|
+
* 从URL中提取题目slug
|
|
144
|
+
* @param {string} url LeetCode题目URL
|
|
145
|
+
* @returns {string|null} 题目slug
|
|
146
|
+
*/
|
|
147
|
+
export function getProblemSlug(url) {
|
|
148
|
+
const match = url.match(/\/problems\/([^/]+)/);
|
|
149
|
+
return match ? match[1] : null;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* 检查是否为有效的LeetCode题目URL
|
|
154
|
+
* @param {string} url 待检查的URL
|
|
155
|
+
* @returns {boolean} 是否为有效的LeetCode题目URL
|
|
156
|
+
*/
|
|
157
|
+
export function isValidLeetCodeUrl(url) {
|
|
158
|
+
return url.startsWith('https://leetcode.cn/problems/') ||
|
|
159
|
+
url.startsWith('https://leetcode.com/problems/');
|
|
160
|
+
}
|
|
161
|
+
// problemDataFetcher.js - LeetCode问题数据获取模块
|
|
162
|
+
|
|
163
|
+
// 导入依赖
|
|
164
|
+
import fetch from 'node-fetch';
|
|
165
|
+
import { getProblemSlug } from './urlUtils.js';
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* 使用GraphQL API获取LeetCode题目数据
|
|
169
|
+
* @param {string} slug 题目slug
|
|
170
|
+
* @param {string} language 语言类型 ('chinese' 或 'english')
|
|
171
|
+
* @returns {Promise<Object>} 题目数据
|
|
172
|
+
*/
|
|
173
|
+
export async function fetchProblemDataViaGraphQL(slug, language = 'chinese') {
|
|
174
|
+
const isCN = slug.includes('leetcode.cn');
|
|
175
|
+
const graphqlUrl = isCN
|
|
176
|
+
? 'https://leetcode.cn/graphql'
|
|
177
|
+
: 'https://leetcode.com/graphql';
|
|
178
|
+
|
|
179
|
+
const query = `
|
|
180
|
+
query questionData($titleSlug: String!) {
|
|
181
|
+
question(titleSlug: $titleSlug) {
|
|
182
|
+
questionId
|
|
183
|
+
questionFrontendId
|
|
184
|
+
title
|
|
185
|
+
titleSlug
|
|
186
|
+
content
|
|
187
|
+
translatedContent
|
|
188
|
+
difficulty
|
|
189
|
+
topicTags {
|
|
190
|
+
name
|
|
191
|
+
slug
|
|
192
|
+
translatedName
|
|
193
|
+
}
|
|
194
|
+
hints
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
`;
|
|
198
|
+
|
|
199
|
+
const response = await fetch(graphqlUrl, {
|
|
200
|
+
method: 'POST',
|
|
201
|
+
headers: {
|
|
202
|
+
'Content-Type': 'application/json',
|
|
203
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
|
204
|
+
'Referer': `https://leetcode.cn/problems/${slug}/`
|
|
205
|
+
},
|
|
206
|
+
body: JSON.stringify({
|
|
207
|
+
query,
|
|
208
|
+
variables: { titleSlug: slug }
|
|
209
|
+
})
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
if (!response.ok) {
|
|
213
|
+
throw new Error(`GraphQL request failed with status ${response.status}`);
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
const data = await response.json();
|
|
217
|
+
|
|
218
|
+
if (data.errors) {
|
|
219
|
+
throw new Error(`GraphQL errors: ${JSON.stringify(data.errors)}`);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return data.data.question;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
/**
|
|
226
|
+
* 从页面的__NEXT_DATA__中提取题目数据
|
|
227
|
+
* @param {string} url 题目URL
|
|
228
|
+
* @param {string} language 语言类型 ('chinese' 或 'english')
|
|
229
|
+
* @returns {Promise<Object>} 题目数据
|
|
230
|
+
*/
|
|
231
|
+
export async function fetchProblemDataFromPage(url, language = 'chinese') {
|
|
232
|
+
const response = await fetch(url, {
|
|
233
|
+
headers: {
|
|
234
|
+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
|
235
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
236
|
+
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
237
|
+
'Accept-Encoding': 'gzip, deflate',
|
|
238
|
+
'Connection': 'keep-alive',
|
|
239
|
+
'Upgrade-Insecure-Requests': '1',
|
|
240
|
+
}
|
|
241
|
+
});
|
|
242
|
+
|
|
243
|
+
if (response.status !== 200) {
|
|
244
|
+
throw new Error(`Failed to fetch URL: status code ${response.status}`);
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const html = await response.text();
|
|
248
|
+
|
|
249
|
+
// 查找__NEXT_DATA__脚本标签
|
|
250
|
+
const nextDataMatch = html.match(/<script id="__NEXT_DATA__" type="application\/json">(.+?)<\/script>/s);
|
|
251
|
+
if (!nextDataMatch) {
|
|
252
|
+
throw new Error('Could not find __NEXT_DATA__ in page');
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
try {
|
|
256
|
+
const nextData = JSON.parse(nextDataMatch[1]);
|
|
257
|
+
const questionData = nextData.props.pageProps.dehydratedState.queries.find(
|
|
258
|
+
query => query.queryKey &&
|
|
259
|
+
query.queryKey[0] === 'questionDetail'
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
if (!questionData || !questionData.state.data.question) {
|
|
263
|
+
throw new Error('Could not find question data in __NEXT_DATA__');
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return questionData.state.data.question;
|
|
267
|
+
} catch (e) {
|
|
268
|
+
throw new Error(`Failed to parse __NEXT_DATA__: ${e.message}`);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* 获取LeetCode题目数据
|
|
274
|
+
* @param {string} problemURL 题目URL
|
|
275
|
+
* @param {string} language 语言类型 ('chinese' 或 'english')
|
|
276
|
+
* @returns {Promise<Object>} 题目数据
|
|
277
|
+
*/
|
|
278
|
+
export async function fetchProblemData(problemURL, language = 'chinese') {
|
|
279
|
+
const slug = getProblemSlug(problemURL);
|
|
280
|
+
if (!slug) {
|
|
281
|
+
throw new Error('Invalid LeetCode problem URL');
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
let question;
|
|
285
|
+
|
|
286
|
+
try {
|
|
287
|
+
// 首先尝试使用GraphQL API获取数据
|
|
288
|
+
question = await fetchProblemDataViaGraphQL(slug, language);
|
|
289
|
+
} catch (graphqlError) {
|
|
290
|
+
console.error('GraphQL API failed, trying to extract from page...', graphqlError.message);
|
|
291
|
+
try {
|
|
292
|
+
// 如果GraphQL失败,尝试从页面中提取数据
|
|
293
|
+
question = await fetchProblemDataFromPage(problemURL, language);
|
|
294
|
+
} catch (pageError) {
|
|
295
|
+
throw new Error(`Failed to fetch problem data via both methods: ${pageError.message}`);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
return question;
|
|
300
|
+
}
|
|
301
|
+
#!/usr/bin/env node
|
|
302
|
+
|
|
303
|
+
import yargs from 'yargs';
|
|
304
|
+
import { hideBin } from 'yargs/helpers';
|
|
305
|
+
import { fetchProblemData } from './lib/problemDataFetcher.js';
|
|
306
|
+
import { formatAsMarkdown } from './lib/markdownFormatter.js';
|
|
307
|
+
|
|
308
|
+
// 解析命令行参数
|
|
309
|
+
const argv = yargs(hideBin(process.argv))
|
|
310
|
+
.usage('Usage: $0 -u [url]')
|
|
311
|
+
.option('url', {
|
|
312
|
+
alias: 'u',
|
|
313
|
+
describe: 'LeetCode problem URL',
|
|
314
|
+
type: 'string',
|
|
315
|
+
demandOption: true
|
|
316
|
+
})
|
|
317
|
+
.option('raw', {
|
|
318
|
+
alias: 'r',
|
|
319
|
+
describe: 'Output raw content without Markdown formatting',
|
|
320
|
+
type: 'boolean',
|
|
321
|
+
default: false
|
|
322
|
+
})
|
|
323
|
+
.option('english', {
|
|
324
|
+
alias: 'e',
|
|
325
|
+
describe: 'Fetch English content instead of Chinese',
|
|
326
|
+
type: 'boolean',
|
|
327
|
+
default: false
|
|
328
|
+
})
|
|
329
|
+
.argv;
|
|
330
|
+
|
|
331
|
+
// 主函数
|
|
332
|
+
async function main() {
|
|
333
|
+
const url = argv.url;
|
|
334
|
+
const raw = argv.raw;
|
|
335
|
+
const english = argv.english;
|
|
336
|
+
|
|
337
|
+
if (!url.startsWith('https://leetcode.cn/problems/') && !url.startsWith('https://leetcode.com/problems/')) {
|
|
338
|
+
console.error('Error: Only LeetCode problem URLs are supported (e.g., https://leetcode.cn/problems/two-sum/ or https://leetcode.com/problems/two-sum/)');
|
|
339
|
+
process.exit(1);
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
try {
|
|
343
|
+
// 根据参数决定获取中文还是英文内容
|
|
344
|
+
const language = english ? 'english' : 'chinese';
|
|
345
|
+
const question = await fetchProblemData(url, language);
|
|
346
|
+
const result = formatAsMarkdown(question, raw);
|
|
347
|
+
console.log(result);
|
|
348
|
+
} catch (error) {
|
|
349
|
+
console.error('Error:', error.message);
|
|
350
|
+
process.exit(1);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
// 执行主函数
|
|
355
|
+
main();
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Markdown格式化模块
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import * as cheerio from 'cheerio';
|
|
6
|
+
|
|
7
|
+
// 将题目内容格式化为Markdown
|
|
8
|
+
function formatAsMarkdown(question, rawOutput = false) {
|
|
9
|
+
const title = question.displayTitle;
|
|
10
|
+
const difficulty = question.difficulty;
|
|
11
|
+
const content = question.displayContent;
|
|
12
|
+
const topicTags = question.topicTags || [];
|
|
13
|
+
const solution = question.solution;
|
|
14
|
+
|
|
15
|
+
// 如果需要原始输出,则返回包含标题和难度的原始HTML内容
|
|
16
|
+
if (rawOutput) {
|
|
17
|
+
let rawResult = '';
|
|
18
|
+
if (title) {
|
|
19
|
+
rawResult += `<h1>${title}</h1>\n`;
|
|
20
|
+
}
|
|
21
|
+
if (difficulty) {
|
|
22
|
+
rawResult += `<p><strong>Difficulty:</strong> ${difficulty}</p>\n`;
|
|
23
|
+
}
|
|
24
|
+
rawResult += content;
|
|
25
|
+
|
|
26
|
+
// 添加题解内容(如果存在)
|
|
27
|
+
if (solution && solution.content) {
|
|
28
|
+
rawResult += `\n\n<h2>题解</h2>\n`;
|
|
29
|
+
rawResult += solution.content || '';
|
|
30
|
+
} else {
|
|
31
|
+
rawResult += `\n\n<h2>题解</h2>\n`;
|
|
32
|
+
rawResult += `<p>没有找到题解</p>\n`;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return rawResult;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// 使用cheerio处理HTML格式的描述
|
|
39
|
+
const $ = cheerio.load(content, {decodeEntities: false});
|
|
40
|
+
|
|
41
|
+
// 提取文本内容并格式化
|
|
42
|
+
let description = '';
|
|
43
|
+
|
|
44
|
+
// 按照HTML中的顺序处理所有元素
|
|
45
|
+
$('body').children().each((i, elem) => {
|
|
46
|
+
const $elem = $(elem);
|
|
47
|
+
|
|
48
|
+
if (elem.tagName === 'p') {
|
|
49
|
+
// 处理段落中的内联元素
|
|
50
|
+
let text = '';
|
|
51
|
+
$elem.contents().each((j, child) => {
|
|
52
|
+
const $child = $(child);
|
|
53
|
+
if (child.type === 'text') {
|
|
54
|
+
text += child.data;
|
|
55
|
+
} else if (child.tagName === 'strong') {
|
|
56
|
+
text += `**${$child.text()}**`;
|
|
57
|
+
} else if (child.tagName === 'em') {
|
|
58
|
+
text += `*${$child.text()}*`;
|
|
59
|
+
} else if (child.tagName === 'code') {
|
|
60
|
+
text += `\`${$child.text()}\``;
|
|
61
|
+
} else if (child.tagName === 'sup') {
|
|
62
|
+
// 处理上标(数学公式中的幂)
|
|
63
|
+
text += `^${$child.text()}`;
|
|
64
|
+
} else if (child.tagName === 'sub') {
|
|
65
|
+
// 处理下标
|
|
66
|
+
text += `_${$child.text()}`;
|
|
67
|
+
} else {
|
|
68
|
+
// 其他标签直接获取文本
|
|
69
|
+
text += $child.text();
|
|
70
|
+
}
|
|
71
|
+
});
|
|
72
|
+
text = text.trim();
|
|
73
|
+
if (text) {
|
|
74
|
+
// 特殊处理示例标题
|
|
75
|
+
if (text.startsWith('示例') || text.startsWith('Example')) {
|
|
76
|
+
description += `\n\n## ${text}\n\n`;
|
|
77
|
+
} else if (text.startsWith('提示') || text.startsWith('Hint')) {
|
|
78
|
+
description += `\n\n## 提示:\n`;
|
|
79
|
+
} else {
|
|
80
|
+
description += text + '\n\n';
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
} else if (elem.tagName === 'pre') {
|
|
84
|
+
const codeText = $elem.text();
|
|
85
|
+
if (codeText) {
|
|
86
|
+
description += '```\n' + codeText + '```\n\n';
|
|
87
|
+
}
|
|
88
|
+
} else if (elem.tagName === 'ul' || elem.tagName === 'ol') {
|
|
89
|
+
$elem.children('li').each((j, li) => {
|
|
90
|
+
const text = $(li).text().trim();
|
|
91
|
+
if (text) {
|
|
92
|
+
description += `- ${text}\n`;
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
description += '\n';
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
// 构建Markdown输出
|
|
100
|
+
let markdown = '';
|
|
101
|
+
if (title) {
|
|
102
|
+
markdown += `# ${title}\n\n`;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (difficulty) {
|
|
106
|
+
markdown += `**Difficulty:** ${difficulty}\n\n`;
|
|
107
|
+
} else {
|
|
108
|
+
markdown += `**Difficulty:** 未找到\n\n`;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// 添加题目标签
|
|
112
|
+
if (topicTags.length > 0) {
|
|
113
|
+
const tags = topicTags.map(tag =>
|
|
114
|
+
tag.translatedName || tag.name
|
|
115
|
+
).join(', ');
|
|
116
|
+
markdown += `**Tags:** ${tags}\n\n`;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
if (description) {
|
|
120
|
+
// 格式化描述内容
|
|
121
|
+
let formattedDescription = description;
|
|
122
|
+
|
|
123
|
+
// 处理示例部分
|
|
124
|
+
formattedDescription = formattedDescription.replace(/示例\s*(\d+)\s*:/g, '\n\n## 示例 $1:\n\n');
|
|
125
|
+
formattedDescription = formattedDescription.replace(/示例\s*(\d+)\s*:/g, '\n\n## 示例 $1:\n\n');
|
|
126
|
+
formattedDescription = formattedDescription.replace(/Example\s*(\d+)\s*:/g, '\n\n## Example $1:\n\n');
|
|
127
|
+
|
|
128
|
+
// 处理解释部分
|
|
129
|
+
formattedDescription = formattedDescription.replace(/解释:/g, '\n**解释:** ');
|
|
130
|
+
formattedDescription = formattedDescription.replace(/Explanation:/g, '\n**Explanation:** ');
|
|
131
|
+
|
|
132
|
+
// 处理提示部分
|
|
133
|
+
formattedDescription = formattedDescription.replace(/提示:/g, '\n\n## 提示:\n');
|
|
134
|
+
formattedDescription = formattedDescription.replace(/进阶:/g, '\n\n**进阶:**\n');
|
|
135
|
+
formattedDescription = formattedDescription.replace(/Follow up:/g, '\n\n**Follow up:**\n');
|
|
136
|
+
|
|
137
|
+
// 处理列表项
|
|
138
|
+
formattedDescription = formattedDescription.replace(/\n \* /g, '\n- ');
|
|
139
|
+
|
|
140
|
+
// 修复特殊字符问题
|
|
141
|
+
formattedDescription = formattedDescription.replace(/(\d+)\\u003csup\\u003e(\d+)\\u003c\/sup\\u003e/g, '$1^$2');
|
|
142
|
+
formattedDescription = formattedDescription.replace(/(\d+)\\u003csub\\u003e(\d+)\\u003c\/sub\\u003e/g, '$1_$2');
|
|
143
|
+
|
|
144
|
+
// 特殊处理常见的数学范围表达式,先处理特定的表达式
|
|
145
|
+
formattedDescription = formattedDescription.replace(/10\s*\^\s*4/g, '$10^4$');
|
|
146
|
+
formattedDescription = formattedDescription.replace(/10\s*\^\s*5/g, '$10^5$');
|
|
147
|
+
formattedDescription = formattedDescription.replace(/10\s*\^\s*9/g, '$10^9$');
|
|
148
|
+
|
|
149
|
+
// 将其他数字^数字的表达式用$包裹,符合LaTeX规范(排除特定已处理的表达式)
|
|
150
|
+
formattedDescription = formattedDescription.replace(/(\d+)\s*\^\s*(\d+)/g, function(match, p1, p2) {
|
|
151
|
+
// 如果不是已经特殊处理的表达式,则添加LaTeX标记
|
|
152
|
+
if (!(p1 === '10' && (p2 === '4' || p2 === '5' || p2 === '9'))) {
|
|
153
|
+
return `$${p1}^${p2}$`;
|
|
154
|
+
}
|
|
155
|
+
return match;
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
// 处理下标表达式(例如 H_2 -> $H_2$ 或 H_{2} -> $H_{2}$)
|
|
159
|
+
formattedDescription = formattedDescription.replace(/([A-Za-z])_({?[0-9]+}?)/g, function(match, p1, p2) {
|
|
160
|
+
// 如果已经是{数字}格式,则直接使用,否则添加大括号
|
|
161
|
+
if (!p2.startsWith('{')) {
|
|
162
|
+
p2 = '{' + p2 + '}';
|
|
163
|
+
}
|
|
164
|
+
return `$${p1}_${p2}$`;
|
|
165
|
+
});
|
|
166
|
+
|
|
167
|
+
// 处理负数的上标情况
|
|
168
|
+
formattedDescription = formattedDescription.replace(/-\s*(\d+)\^(\d+)/g, '-\$$1^$2\$');
|
|
169
|
+
|
|
170
|
+
// 清理多余的空白字符
|
|
171
|
+
formattedDescription = formattedDescription.replace(/\n\s*\n\s*\n/g, '\n\n');
|
|
172
|
+
formattedDescription = formattedDescription.replace(/^ +/gm, '');
|
|
173
|
+
|
|
174
|
+
markdown += `## Description\n\n${formattedDescription}\n`;
|
|
175
|
+
} else {
|
|
176
|
+
markdown += '## Description\n\n未能提取到题目描述\n\n';
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// 添加题解内容(如果存在)
|
|
180
|
+
if (solution && solution.content) {
|
|
181
|
+
markdown += '\n## 题解\n\n';
|
|
182
|
+
|
|
183
|
+
// 处理题解内容
|
|
184
|
+
const solution$ = cheerio.load(solution.content || '', {decodeEntities: false});
|
|
185
|
+
|
|
186
|
+
// 提取题解文本内容并格式化
|
|
187
|
+
let solutionText = '';
|
|
188
|
+
|
|
189
|
+
// 更简单直接的方法:直接提取所有文本内容并按段落处理
|
|
190
|
+
const allText = solution$('body').text();
|
|
191
|
+
// 修复:不使用trim,保留原始段落格式
|
|
192
|
+
const paragraphs = allText.split('\n');
|
|
193
|
+
|
|
194
|
+
paragraphs.forEach(paragraph => {
|
|
195
|
+
// 修复:只对纯文本段落进行处理,保留空行和缩进
|
|
196
|
+
if (paragraph.trim()) {
|
|
197
|
+
// 检查是否是标题
|
|
198
|
+
if (paragraph.trim().startsWith('####')) {
|
|
199
|
+
solutionText += `\n\n${paragraph.trim()}\n\n`;
|
|
200
|
+
} else if (paragraph.trim().startsWith('###')) {
|
|
201
|
+
solutionText += `\n\n${paragraph.trim()}\n\n`;
|
|
202
|
+
} else if (paragraph.trim().startsWith('##')) {
|
|
203
|
+
solutionText += `\n\n${paragraph.trim()}\n\n`;
|
|
204
|
+
} else if (paragraph.trim().startsWith('#')) {
|
|
205
|
+
solutionText += `\n\n${paragraph.trim()}\n\n`;
|
|
206
|
+
} else {
|
|
207
|
+
// 普通段落,保留原始格式
|
|
208
|
+
solutionText += `${paragraph}\n`;
|
|
209
|
+
}
|
|
210
|
+
} else {
|
|
211
|
+
// 空行直接添加
|
|
212
|
+
solutionText += '\n';
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
// 特殊处理代码块部分,保留原始缩进
|
|
217
|
+
const codeBlocks = solution$('pre');
|
|
218
|
+
codeBlocks.each((i, elem) => {
|
|
219
|
+
const $elem = solution$(elem);
|
|
220
|
+
const classAttr = $elem.attr('class') || '';
|
|
221
|
+
let lang = '';
|
|
222
|
+
// 尝试从class中提取语言信息
|
|
223
|
+
const langMatch = classAttr.match(/\[([^\]]+)\]/);
|
|
224
|
+
if (langMatch) {
|
|
225
|
+
lang = langMatch[1].split('-')[0];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const codeText = $elem.text();
|
|
229
|
+
if (codeText) {
|
|
230
|
+
// 修复:保留原始代码的缩进
|
|
231
|
+
solutionText += '```' + lang + '\n' + codeText + '```\n\n';
|
|
232
|
+
}
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
// 应用与题目描述相同的数学公式处理
|
|
236
|
+
solutionText = solutionText.replace(/(\d+)\\u003csup\\u003e(\d+)\\u003c\/sup\\u003e/g, '$1^$2');
|
|
237
|
+
solutionText = solutionText.replace(/(\d+)\\u003csub\\u003e(\d+)\\u003c\/sub\\u003e/g, '$1_$2');
|
|
238
|
+
solutionText = solutionText.replace(/10\s*\^\s*4/g, '$10^4$');
|
|
239
|
+
solutionText = solutionText.replace(/10\s*\^\s*5/g, '$10^5$');
|
|
240
|
+
solutionText = solutionText.replace(/10\s*\^\s*9/g, '$10^9$');
|
|
241
|
+
solutionText = solutionText.replace(/(\d+)\s*\^\s*(\d+)/g, function(match, p1, p2) {
|
|
242
|
+
if (!(p1 === '10' && (p2 === '4' || p2 === '5' || p2 === '9'))) {
|
|
243
|
+
return `$${p1}^${p2}$`;
|
|
244
|
+
}
|
|
245
|
+
return match;
|
|
246
|
+
});
|
|
247
|
+
// 处理下标表达式(例如 H_2 -> $H_2$ 或 H_{2} -> $H_{2}$)
|
|
248
|
+
solutionText = solutionText.replace(/([A-Za-z])_({?[0-9]+}?)/g, function(match, p1, p2) {
|
|
249
|
+
// 如果已经是{数字}格式,则直接使用,否则添加大括号
|
|
250
|
+
if (!p2.startsWith('{')) {
|
|
251
|
+
p2 = '{' + p2 + '}';
|
|
252
|
+
}
|
|
253
|
+
return `$${p1}_${p2}$`;
|
|
254
|
+
});
|
|
255
|
+
solutionText = solutionText.replace(/-\s*(\d+)\^(\d+)/g, '-\$$1^$2\$');
|
|
256
|
+
|
|
257
|
+
// 清理多余的空白字符,但保留必要的换行
|
|
258
|
+
solutionText = solutionText.replace(/\n\s*\n\s*\n/g, '\n\n');
|
|
259
|
+
|
|
260
|
+
markdown += solutionText;
|
|
261
|
+
} else {
|
|
262
|
+
markdown += '\n## 题解\n\n';
|
|
263
|
+
markdown += '没有找到题解\n\n';
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
return markdown;
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
export { formatAsMarkdown };
|