getmdfromleetcode 1.1.0 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +25 -7
- package/index.js +80 -25
- package/lib/markdownFormatter.js +108 -152
- package/lib/problemDataFetcher.js +50 -12
- package/package.json +14 -6
- package/check_solution.js +0 -20
- package/clean_next_data.json +0 -1
- package/index.js.backup +0 -355
- package/next_data.json +0 -1
- package/problem_page.html +0 -93
- package/solution.html +0 -89
- package/solution_data.json +0 -2604
- package/solution_page.html +0 -1
- package/solution_page_full.html +0 -1
- package/specific_solution.html +0 -89
- package/test_format.js +0 -20
- package/test_subscript.js +0 -52
package/index.js.backup
DELETED
|
@@ -1,355 +0,0 @@
|
|
|
1
|
-
// markdownFormatter.js - Markdown format化模块
|
|
2
|
-
|
|
3
|
-
import * as cheerio from 'cheerio';
|
|
4
|
-
|
|
5
|
-
/**
|
|
6
|
-
* 将题目数据格式化为Markdown格式
|
|
7
|
-
* @param {Object} question 题目数据
|
|
8
|
-
* @param {boolean} rawOutput 是否输出原始内容
|
|
9
|
-
* @returns {string} 格式化后的Markdown内容
|
|
10
|
-
*/
|
|
11
|
-
export function formatAsMarkdown(question, rawOutput = false) {
|
|
12
|
-
const title = question.translatedTitle || question.title;
|
|
13
|
-
const difficulty = question.difficulty;
|
|
14
|
-
const content = question.translatedContent || question.content;
|
|
15
|
-
|
|
16
|
-
// 如果需要原始输出,则返回包含标题和难度的原始HTML内容
|
|
17
|
-
if (rawOutput) {
|
|
18
|
-
let rawResult = '';
|
|
19
|
-
if (title) {
|
|
20
|
-
rawResult += `<h1>${title}</h1>\n`;
|
|
21
|
-
}
|
|
22
|
-
if (difficulty) {
|
|
23
|
-
rawResult += `<p><strong>Difficulty:</strong> ${difficulty}</p>\n`;
|
|
24
|
-
}
|
|
25
|
-
rawResult += content;
|
|
26
|
-
return rawResult;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
// 使用cheerio处理HTML格式的描述
|
|
30
|
-
const $ = cheerio.load(content, {decodeEntities: false});
|
|
31
|
-
|
|
32
|
-
// 提取文本内容并格式化
|
|
33
|
-
let description = '';
|
|
34
|
-
|
|
35
|
-
// 按照HTML中的顺序处理所有元素
|
|
36
|
-
$('body').children().each((i, elem) => {
|
|
37
|
-
const $elem = $(elem);
|
|
38
|
-
|
|
39
|
-
if (elem.tagName === 'p') {
|
|
40
|
-
// 处理段落中的内联元素
|
|
41
|
-
let text = '';
|
|
42
|
-
$elem.contents().each((j, child) => {
|
|
43
|
-
const $child = $(child);
|
|
44
|
-
if (child.type === 'text') {
|
|
45
|
-
text += child.data;
|
|
46
|
-
} else if (child.tagName === 'strong') {
|
|
47
|
-
text += `**${$child.text()}**`;
|
|
48
|
-
} else if (child.tagName === 'em') {
|
|
49
|
-
text += `*${$child.text()}*`;
|
|
50
|
-
} else if (child.tagName === 'code') {
|
|
51
|
-
text += `\`${$child.text()}\``;
|
|
52
|
-
} else if (child.tagName === 'sup') {
|
|
53
|
-
// 处理上标(数学公式中的幂)
|
|
54
|
-
text += `^${$child.text()}`;
|
|
55
|
-
} else {
|
|
56
|
-
// 其他标签直接获取文本
|
|
57
|
-
text += $child.text();
|
|
58
|
-
}
|
|
59
|
-
});
|
|
60
|
-
text = text.trim();
|
|
61
|
-
if (text) {
|
|
62
|
-
// 特殊处理示例标题
|
|
63
|
-
if (text.startsWith('示例')) {
|
|
64
|
-
description += `\n\n## ${text}\n\n`;
|
|
65
|
-
} else if (text.startsWith('提示')) {
|
|
66
|
-
description += `\n\n## 提示:\n`;
|
|
67
|
-
} else {
|
|
68
|
-
description += text + '\n\n';
|
|
69
|
-
}
|
|
70
|
-
}
|
|
71
|
-
} else if (elem.tagName === 'pre') {
|
|
72
|
-
const codeText = $elem.text().trim();
|
|
73
|
-
if (codeText) {
|
|
74
|
-
description += '```\n' + codeText + '\n```\n\n';
|
|
75
|
-
}
|
|
76
|
-
} else if (elem.tagName === 'ul' || elem.tagName === 'ol') {
|
|
77
|
-
$elem.children('li').each((j, li) => {
|
|
78
|
-
const text = $(li).text().trim();
|
|
79
|
-
if (text) {
|
|
80
|
-
description += `- ${text}\n`;
|
|
81
|
-
}
|
|
82
|
-
});
|
|
83
|
-
description += '\n';
|
|
84
|
-
}
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
// 构建Markdown输出
|
|
88
|
-
let markdown = '';
|
|
89
|
-
if (title) {
|
|
90
|
-
markdown += `# ${title}\n\n`;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
if (difficulty) {
|
|
94
|
-
markdown += `**Difficulty:** ${difficulty}\n\n`;
|
|
95
|
-
} else {
|
|
96
|
-
markdown += `**Difficulty:** 未找到\n\n`;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
if (description) {
|
|
100
|
-
// 格式化描述内容
|
|
101
|
-
let formattedDescription = description;
|
|
102
|
-
|
|
103
|
-
// 处理示例部分
|
|
104
|
-
formattedDescription = formattedDescription.replace(/示例\s*(\d+)\s*:/g, '\n\n## 示例 $1:\n\n');
|
|
105
|
-
formattedDescription = formattedDescription.replace(/示例\s*(\d+)\s*:/g, '\n\n## 示例 $1:\n\n');
|
|
106
|
-
|
|
107
|
-
// 处理解释部分
|
|
108
|
-
formattedDescription = formattedDescription.replace(/解释:/g, '\n**解释:** ');
|
|
109
|
-
|
|
110
|
-
// 处理提示部分
|
|
111
|
-
formattedDescription = formattedDescription.replace(/提示:/g, '\n\n## 提示:\n');
|
|
112
|
-
formattedDescription = formattedDescription.replace(/进阶:/g, '\n\n**进阶:**\n');
|
|
113
|
-
|
|
114
|
-
// 处理列表项
|
|
115
|
-
formattedDescription = formattedDescription.replace(/\n \* /g, '\n- ');
|
|
116
|
-
|
|
117
|
-
// 修复特殊字符问题
|
|
118
|
-
formattedDescription = formattedDescription.replace(/(\d+)\\u003csup\\u003e(\d+)\\u003c\/sup\\u003e/g, '$1^$2');
|
|
119
|
-
formattedDescription = formattedDescription.replace(/(\d+)\s*\^\s*(\d+)/g, '$1^$2');
|
|
120
|
-
// 将数字^数字的表达式用$包裹,符合LaTeX规范
|
|
121
|
-
formattedDescription = formattedDescription.replace(/(\d+)\^(\d+)/g, '\$$1^$2\$');
|
|
122
|
-
// 处理负数的上标情况
|
|
123
|
-
formattedDescription = formattedDescription.replace(/-\s*(\d+)\^(\d+)/g, '-\$$1^$2\$');
|
|
124
|
-
// 特殊处理常见的数学范围表达式
|
|
125
|
-
formattedDescription = formattedDescription.replace(/104/g, '$10^4$');
|
|
126
|
-
formattedDescription = formattedDescription.replace(/105/g, '$10^5$');
|
|
127
|
-
formattedDescription = formattedDescription.replace(/109/g, '$10^9$');
|
|
128
|
-
|
|
129
|
-
// 清理多余的空白字符
|
|
130
|
-
formattedDescription = formattedDescription.replace(/\n\s*\n\s*\n/g, '\n\n');
|
|
131
|
-
formattedDescription = formattedDescription.replace(/^ +/gm, '');
|
|
132
|
-
|
|
133
|
-
markdown += `## Description\n\n${formattedDescription}\n`;
|
|
134
|
-
} else {
|
|
135
|
-
markdown += '## Description\n\n未能提取到题目描述\n\n';
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
return markdown;
|
|
139
|
-
}
|
|
140
|
-
// urlUtils.js - URL处理工具模块
|
|
141
|
-
|
|
142
|
-
/**
|
|
143
|
-
* 从URL中提取题目slug
|
|
144
|
-
* @param {string} url LeetCode题目URL
|
|
145
|
-
* @returns {string|null} 题目slug
|
|
146
|
-
*/
|
|
147
|
-
export function getProblemSlug(url) {
|
|
148
|
-
const match = url.match(/\/problems\/([^/]+)/);
|
|
149
|
-
return match ? match[1] : null;
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
/**
|
|
153
|
-
* 检查是否为有效的LeetCode题目URL
|
|
154
|
-
* @param {string} url 待检查的URL
|
|
155
|
-
* @returns {boolean} 是否为有效的LeetCode题目URL
|
|
156
|
-
*/
|
|
157
|
-
export function isValidLeetCodeUrl(url) {
|
|
158
|
-
return url.startsWith('https://leetcode.cn/problems/') ||
|
|
159
|
-
url.startsWith('https://leetcode.com/problems/');
|
|
160
|
-
}
|
|
161
|
-
// problemDataFetcher.js - LeetCode问题数据获取模块
|
|
162
|
-
|
|
163
|
-
// 导入依赖
|
|
164
|
-
import fetch from 'node-fetch';
|
|
165
|
-
import { getProblemSlug } from './urlUtils.js';
|
|
166
|
-
|
|
167
|
-
/**
|
|
168
|
-
* 使用GraphQL API获取LeetCode题目数据
|
|
169
|
-
* @param {string} slug 题目slug
|
|
170
|
-
* @param {string} language 语言类型 ('chinese' 或 'english')
|
|
171
|
-
* @returns {Promise<Object>} 题目数据
|
|
172
|
-
*/
|
|
173
|
-
export async function fetchProblemDataViaGraphQL(slug, language = 'chinese') {
|
|
174
|
-
const isCN = slug.includes('leetcode.cn');
|
|
175
|
-
const graphqlUrl = isCN
|
|
176
|
-
? 'https://leetcode.cn/graphql'
|
|
177
|
-
: 'https://leetcode.com/graphql';
|
|
178
|
-
|
|
179
|
-
const query = `
|
|
180
|
-
query questionData($titleSlug: String!) {
|
|
181
|
-
question(titleSlug: $titleSlug) {
|
|
182
|
-
questionId
|
|
183
|
-
questionFrontendId
|
|
184
|
-
title
|
|
185
|
-
titleSlug
|
|
186
|
-
content
|
|
187
|
-
translatedContent
|
|
188
|
-
difficulty
|
|
189
|
-
topicTags {
|
|
190
|
-
name
|
|
191
|
-
slug
|
|
192
|
-
translatedName
|
|
193
|
-
}
|
|
194
|
-
hints
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
`;
|
|
198
|
-
|
|
199
|
-
const response = await fetch(graphqlUrl, {
|
|
200
|
-
method: 'POST',
|
|
201
|
-
headers: {
|
|
202
|
-
'Content-Type': 'application/json',
|
|
203
|
-
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36',
|
|
204
|
-
'Referer': `https://leetcode.cn/problems/${slug}/`
|
|
205
|
-
},
|
|
206
|
-
body: JSON.stringify({
|
|
207
|
-
query,
|
|
208
|
-
variables: { titleSlug: slug }
|
|
209
|
-
})
|
|
210
|
-
});
|
|
211
|
-
|
|
212
|
-
if (!response.ok) {
|
|
213
|
-
throw new Error(`GraphQL request failed with status ${response.status}`);
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
const data = await response.json();
|
|
217
|
-
|
|
218
|
-
if (data.errors) {
|
|
219
|
-
throw new Error(`GraphQL errors: ${JSON.stringify(data.errors)}`);
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
return data.data.question;
|
|
223
|
-
}
|
|
224
|
-
|
|
225
|
-
/**
|
|
226
|
-
* 从页面的__NEXT_DATA__中提取题目数据
|
|
227
|
-
* @param {string} url 题目URL
|
|
228
|
-
* @param {string} language 语言类型 ('chinese' 或 'english')
|
|
229
|
-
* @returns {Promise<Object>} 题目数据
|
|
230
|
-
*/
|
|
231
|
-
export async function fetchProblemDataFromPage(url, language = 'chinese') {
|
|
232
|
-
const response = await fetch(url, {
|
|
233
|
-
headers: {
|
|
234
|
-
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
|
|
235
|
-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
|
236
|
-
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
|
|
237
|
-
'Accept-Encoding': 'gzip, deflate',
|
|
238
|
-
'Connection': 'keep-alive',
|
|
239
|
-
'Upgrade-Insecure-Requests': '1',
|
|
240
|
-
}
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
if (response.status !== 200) {
|
|
244
|
-
throw new Error(`Failed to fetch URL: status code ${response.status}`);
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
const html = await response.text();
|
|
248
|
-
|
|
249
|
-
// 查找__NEXT_DATA__脚本标签
|
|
250
|
-
const nextDataMatch = html.match(/<script id="__NEXT_DATA__" type="application\/json">(.+?)<\/script>/s);
|
|
251
|
-
if (!nextDataMatch) {
|
|
252
|
-
throw new Error('Could not find __NEXT_DATA__ in page');
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
try {
|
|
256
|
-
const nextData = JSON.parse(nextDataMatch[1]);
|
|
257
|
-
const questionData = nextData.props.pageProps.dehydratedState.queries.find(
|
|
258
|
-
query => query.queryKey &&
|
|
259
|
-
query.queryKey[0] === 'questionDetail'
|
|
260
|
-
);
|
|
261
|
-
|
|
262
|
-
if (!questionData || !questionData.state.data.question) {
|
|
263
|
-
throw new Error('Could not find question data in __NEXT_DATA__');
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
return questionData.state.data.question;
|
|
267
|
-
} catch (e) {
|
|
268
|
-
throw new Error(`Failed to parse __NEXT_DATA__: ${e.message}`);
|
|
269
|
-
}
|
|
270
|
-
}
|
|
271
|
-
|
|
272
|
-
/**
|
|
273
|
-
* 获取LeetCode题目数据
|
|
274
|
-
* @param {string} problemURL 题目URL
|
|
275
|
-
* @param {string} language 语言类型 ('chinese' 或 'english')
|
|
276
|
-
* @returns {Promise<Object>} 题目数据
|
|
277
|
-
*/
|
|
278
|
-
export async function fetchProblemData(problemURL, language = 'chinese') {
|
|
279
|
-
const slug = getProblemSlug(problemURL);
|
|
280
|
-
if (!slug) {
|
|
281
|
-
throw new Error('Invalid LeetCode problem URL');
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
let question;
|
|
285
|
-
|
|
286
|
-
try {
|
|
287
|
-
// 首先尝试使用GraphQL API获取数据
|
|
288
|
-
question = await fetchProblemDataViaGraphQL(slug, language);
|
|
289
|
-
} catch (graphqlError) {
|
|
290
|
-
console.error('GraphQL API failed, trying to extract from page...', graphqlError.message);
|
|
291
|
-
try {
|
|
292
|
-
// 如果GraphQL失败,尝试从页面中提取数据
|
|
293
|
-
question = await fetchProblemDataFromPage(problemURL, language);
|
|
294
|
-
} catch (pageError) {
|
|
295
|
-
throw new Error(`Failed to fetch problem data via both methods: ${pageError.message}`);
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
|
|
299
|
-
return question;
|
|
300
|
-
}
|
|
301
|
-
#!/usr/bin/env node
|
|
302
|
-
|
|
303
|
-
import yargs from 'yargs';
|
|
304
|
-
import { hideBin } from 'yargs/helpers';
|
|
305
|
-
import { fetchProblemData } from './lib/problemDataFetcher.js';
|
|
306
|
-
import { formatAsMarkdown } from './lib/markdownFormatter.js';
|
|
307
|
-
|
|
308
|
-
// 解析命令行参数
|
|
309
|
-
const argv = yargs(hideBin(process.argv))
|
|
310
|
-
.usage('Usage: $0 -u [url]')
|
|
311
|
-
.option('url', {
|
|
312
|
-
alias: 'u',
|
|
313
|
-
describe: 'LeetCode problem URL',
|
|
314
|
-
type: 'string',
|
|
315
|
-
demandOption: true
|
|
316
|
-
})
|
|
317
|
-
.option('raw', {
|
|
318
|
-
alias: 'r',
|
|
319
|
-
describe: 'Output raw content without Markdown formatting',
|
|
320
|
-
type: 'boolean',
|
|
321
|
-
default: false
|
|
322
|
-
})
|
|
323
|
-
.option('english', {
|
|
324
|
-
alias: 'e',
|
|
325
|
-
describe: 'Fetch English content instead of Chinese',
|
|
326
|
-
type: 'boolean',
|
|
327
|
-
default: false
|
|
328
|
-
})
|
|
329
|
-
.argv;
|
|
330
|
-
|
|
331
|
-
// 主函数
|
|
332
|
-
async function main() {
|
|
333
|
-
const url = argv.url;
|
|
334
|
-
const raw = argv.raw;
|
|
335
|
-
const english = argv.english;
|
|
336
|
-
|
|
337
|
-
if (!url.startsWith('https://leetcode.cn/problems/') && !url.startsWith('https://leetcode.com/problems/')) {
|
|
338
|
-
console.error('Error: Only LeetCode problem URLs are supported (e.g., https://leetcode.cn/problems/two-sum/ or https://leetcode.com/problems/two-sum/)');
|
|
339
|
-
process.exit(1);
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
try {
|
|
343
|
-
// 根据参数决定获取中文还是英文内容
|
|
344
|
-
const language = english ? 'english' : 'chinese';
|
|
345
|
-
const question = await fetchProblemData(url, language);
|
|
346
|
-
const result = formatAsMarkdown(question, raw);
|
|
347
|
-
console.log(result);
|
|
348
|
-
} catch (error) {
|
|
349
|
-
console.error('Error:', error.message);
|
|
350
|
-
process.exit(1);
|
|
351
|
-
}
|
|
352
|
-
}
|
|
353
|
-
|
|
354
|
-
// 执行主函数
|
|
355
|
-
main();
|