hos-m2f 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hos_m2f/converters/md_to_docx.py +51 -6
- hos_m2f/converters/md_to_epub.py +37 -65
- hos_m2f/converters/md_to_latex.py +63 -0
- hos_m2f/converters/pdf_to_md.py +120 -0
- {hos_m2f-0.5.4.dist-info → hos_m2f-0.5.5.dist-info}/METADATA +1 -1
- {hos_m2f-0.5.4.dist-info → hos_m2f-0.5.5.dist-info}/RECORD +10 -7
- tests/test_latex.py +182 -0
- {hos_m2f-0.5.4.dist-info → hos_m2f-0.5.5.dist-info}/WHEEL +0 -0
- {hos_m2f-0.5.4.dist-info → hos_m2f-0.5.5.dist-info}/entry_points.txt +0 -0
- {hos_m2f-0.5.4.dist-info → hos_m2f-0.5.5.dist-info}/top_level.txt +0 -0
hos_m2f/converters/md_to_docx.py
CHANGED
|
@@ -95,10 +95,28 @@ class MDToDOCXConverter(BaseConverter):
|
|
|
95
95
|
# 填充表头
|
|
96
96
|
header_row = table.rows[0]
|
|
97
97
|
for i, cell_text in enumerate(header_cells):
|
|
98
|
-
header_row.cells[i]
|
|
98
|
+
cell = header_row.cells[i]
|
|
99
|
+
cell.text = cell_text
|
|
100
|
+
# 设置表头样式
|
|
101
|
+
for paragraph in cell.paragraphs:
|
|
102
|
+
for run in paragraph.runs:
|
|
103
|
+
run.bold = True
|
|
104
|
+
run.font.size = Pt(11)
|
|
105
|
+
paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
|
|
99
106
|
|
|
100
|
-
#
|
|
107
|
+
# 跳过分隔线行,并解析对齐方式
|
|
108
|
+
alignments = []
|
|
101
109
|
if len(rows) > 1 and '---' in rows[1]:
|
|
110
|
+
# 解析对齐方式
|
|
111
|
+
alignment_row = rows[1]
|
|
112
|
+
alignment_cells = [cell.strip() for cell in alignment_row.split('|') if cell.strip()]
|
|
113
|
+
for cell in alignment_cells:
|
|
114
|
+
if cell.startswith(':') and cell.endswith(':'):
|
|
115
|
+
alignments.append(WD_ALIGN_PARAGRAPH.CENTER)
|
|
116
|
+
elif cell.endswith(':'):
|
|
117
|
+
alignments.append(WD_ALIGN_PARAGRAPH.RIGHT)
|
|
118
|
+
else:
|
|
119
|
+
alignments.append(WD_ALIGN_PARAGRAPH.LEFT)
|
|
102
120
|
data_rows = rows[2:]
|
|
103
121
|
else:
|
|
104
122
|
data_rows = rows[1:]
|
|
@@ -110,7 +128,12 @@ class MDToDOCXConverter(BaseConverter):
|
|
|
110
128
|
new_row = table.add_row()
|
|
111
129
|
for i, cell_text in enumerate(cells):
|
|
112
130
|
if i < len(new_row.cells):
|
|
113
|
-
new_row.cells[i]
|
|
131
|
+
cell = new_row.cells[i]
|
|
132
|
+
cell.text = cell_text
|
|
133
|
+
# 设置对齐方式
|
|
134
|
+
if i < len(alignments):
|
|
135
|
+
for paragraph in cell.paragraphs:
|
|
136
|
+
paragraph.alignment = alignments[i]
|
|
114
137
|
except Exception as e:
|
|
115
138
|
# 如果解析失败,回退到简单处理
|
|
116
139
|
self.doc.add_paragraph('Table: ' + text[:100] + '...')
|
|
@@ -217,9 +240,31 @@ class MDToDOCXConverter(BaseConverter):
|
|
|
217
240
|
|
|
218
241
|
def _render_mermaid(self, mermaid_code):
|
|
219
242
|
"""渲染Mermaid图表为图片"""
|
|
220
|
-
#
|
|
221
|
-
|
|
222
|
-
|
|
243
|
+
# 使用mermaid.ink API渲染Mermaid图表
|
|
244
|
+
try:
|
|
245
|
+
import requests
|
|
246
|
+
from io import BytesIO
|
|
247
|
+
import urllib.parse
|
|
248
|
+
|
|
249
|
+
# 编码Mermaid代码
|
|
250
|
+
encoded_code = urllib.parse.quote(mermaid_code)
|
|
251
|
+
|
|
252
|
+
# 构建API URL
|
|
253
|
+
url = f"https://mermaid.ink/img/{encoded_code}"
|
|
254
|
+
|
|
255
|
+
# 发送请求
|
|
256
|
+
response = requests.get(url, timeout=10)
|
|
257
|
+
|
|
258
|
+
if response.status_code == 200:
|
|
259
|
+
# 返回图片数据流
|
|
260
|
+
return BytesIO(response.content)
|
|
261
|
+
else:
|
|
262
|
+
# 如果API调用失败,返回None
|
|
263
|
+
return None
|
|
264
|
+
except Exception as e:
|
|
265
|
+
# 如果处理失败,返回None
|
|
266
|
+
print(f"Error rendering Mermaid chart: {e}")
|
|
267
|
+
return None
|
|
223
268
|
|
|
224
269
|
# 渲染Markdown
|
|
225
270
|
renderer = DOCXRenderer(doc)
|
hos_m2f/converters/md_to_epub.py
CHANGED
|
@@ -2,14 +2,18 @@
|
|
|
2
2
|
|
|
3
3
|
from typing import Any, Optional, Dict
|
|
4
4
|
from hos_m2f.converters.base_converter import BaseConverter
|
|
5
|
-
import
|
|
6
|
-
from
|
|
7
|
-
import mistune
|
|
5
|
+
from hos_m2f.renderers.epub_renderer import EPUBRenderer
|
|
6
|
+
from hos_m2f.structure.book_parser import BookParser
|
|
8
7
|
|
|
9
8
|
|
|
10
9
|
class MDToEPUBConverter(BaseConverter):
|
|
11
10
|
"""Markdown到EPUB格式转换器"""
|
|
12
11
|
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""初始化转换器"""
|
|
14
|
+
self.renderer = EPUBRenderer()
|
|
15
|
+
self.book_parser = BookParser()
|
|
16
|
+
|
|
13
17
|
def convert(self, input_content: str, options: Optional[Dict[str, Any]] = None) -> bytes:
|
|
14
18
|
"""将Markdown转换为EPUB
|
|
15
19
|
|
|
@@ -23,73 +27,41 @@ class MDToEPUBConverter(BaseConverter):
|
|
|
23
27
|
if options is None:
|
|
24
28
|
options = {}
|
|
25
29
|
|
|
26
|
-
#
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# 设置元数据
|
|
30
|
-
book.set_identifier('id12345')
|
|
31
|
-
book.set_title(options.get('title', 'Untitled'))
|
|
32
|
-
book.set_language(options.get('language', 'zh'))
|
|
33
|
-
book.add_author(options.get('author', 'Unknown'))
|
|
34
|
-
|
|
35
|
-
# 添加封面
|
|
36
|
-
if 'cover' in options:
|
|
37
|
-
cover_image = epub.EpubItem(
|
|
38
|
-
uid='cover-image',
|
|
39
|
-
file_name='images/cover.jpg',
|
|
40
|
-
media_type='image/jpeg',
|
|
41
|
-
content=options['cover']
|
|
42
|
-
)
|
|
43
|
-
book.add_item(cover_image)
|
|
44
|
-
book.set_cover('images/cover.jpg', cover_image)
|
|
45
|
-
|
|
46
|
-
# 解析Markdown
|
|
47
|
-
markdown = mistune.create_markdown()
|
|
48
|
-
|
|
30
|
+
# 使用BookParser解析Markdown内容
|
|
31
|
+
parsed_content = self.book_parser.parse(input_content, options)
|
|
49
32
|
|
|
50
|
-
#
|
|
51
|
-
|
|
33
|
+
# 增强解析结果
|
|
34
|
+
parsed_content = self._enhance_parsed_content(parsed_content, options)
|
|
52
35
|
|
|
53
|
-
#
|
|
54
|
-
|
|
55
|
-
title=options.get('title', 'Chapter 1'),
|
|
56
|
-
file_name='chapter1.xhtml',
|
|
57
|
-
lang='zh'
|
|
58
|
-
)
|
|
59
|
-
chapter.content = f'''
|
|
60
|
-
<!DOCTYPE html>
|
|
61
|
-
<html>
|
|
62
|
-
<head>
|
|
63
|
-
<title>{options.get('title', 'Untitled')}</title>
|
|
64
|
-
<meta charset="utf-8" />
|
|
65
|
-
</head>
|
|
66
|
-
<body>
|
|
67
|
-
<h1>{options.get('title', 'Untitled')}</h1>
|
|
68
|
-
{html_content}
|
|
69
|
-
</body>
|
|
70
|
-
</html>
|
|
71
|
-
'''
|
|
36
|
+
# 使用EPUBRenderer渲染EPUB文件
|
|
37
|
+
epub_content = self.renderer.render(parsed_content, options)
|
|
72
38
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
39
|
+
return epub_content
|
|
40
|
+
|
|
41
|
+
def _enhance_parsed_content(self, parsed_content: Dict[str, Any], options: Dict[str, Any]) -> Dict[str, Any]:
|
|
42
|
+
"""增强解析结果"""
|
|
43
|
+
# 添加选项中的元数据
|
|
44
|
+
if 'title' in options:
|
|
45
|
+
parsed_content.setdefault('book_metadata', {})['title'] = options['title']
|
|
46
|
+
if 'author' in options:
|
|
47
|
+
parsed_content.setdefault('book_metadata', {})['author'] = options['author']
|
|
48
|
+
if 'language' in options:
|
|
49
|
+
parsed_content.setdefault('book_metadata', {})['language'] = options['language']
|
|
50
|
+
if 'publisher' in options:
|
|
51
|
+
parsed_content.setdefault('book_metadata', {})['publisher'] = options['publisher']
|
|
52
|
+
if 'publish_date' in options:
|
|
53
|
+
parsed_content.setdefault('book_metadata', {})['publish_date'] = options['publish_date']
|
|
54
|
+
if 'description' in options:
|
|
55
|
+
parsed_content.setdefault('book_metadata', {})['description'] = options['description']
|
|
85
56
|
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
57
|
+
# 添加封面信息
|
|
58
|
+
if 'cover' in options:
|
|
59
|
+
parsed_content['cover'] = {
|
|
60
|
+
'src': options['cover'],
|
|
61
|
+
'type': 'image'
|
|
62
|
+
}
|
|
91
63
|
|
|
92
|
-
return
|
|
64
|
+
return parsed_content
|
|
93
65
|
|
|
94
66
|
def get_supported_formats(self) -> tuple:
|
|
95
67
|
"""获取支持的格式"""
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Markdown到LaTeX格式转换器"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Dict
|
|
4
|
+
from hos_m2f.converters.base_converter import BaseConverter
|
|
5
|
+
from hos_m2f.renderers.latex_renderer import LaTeXRenderer
|
|
6
|
+
from hos_m2f.structure.semantic_parser import SemanticParser
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MDToLaTeXConverter(BaseConverter):
|
|
10
|
+
"""Markdown到LaTeX格式转换器"""
|
|
11
|
+
|
|
12
|
+
def __init__(self):
|
|
13
|
+
"""初始化转换器"""
|
|
14
|
+
self.renderer = LaTeXRenderer()
|
|
15
|
+
self.parser = SemanticParser()
|
|
16
|
+
|
|
17
|
+
def convert(self, input_content: str, options: Optional[Dict[str, Any]] = None) -> bytes:
|
|
18
|
+
"""将Markdown转换为LaTeX
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
input_content: Markdown内容
|
|
22
|
+
options: 转换选项
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
bytes: LaTeX文件的二进制数据
|
|
26
|
+
"""
|
|
27
|
+
if options is None:
|
|
28
|
+
options = {}
|
|
29
|
+
|
|
30
|
+
# 使用SemanticParser解析Markdown内容
|
|
31
|
+
parsed_content = self.parser.parse(input_content)
|
|
32
|
+
|
|
33
|
+
# 增强解析结果
|
|
34
|
+
parsed_content = self._enhance_parsed_content(parsed_content, options)
|
|
35
|
+
|
|
36
|
+
# 使用LaTeXRenderer渲染LaTeX文件
|
|
37
|
+
latex_content = self.renderer.render(parsed_content, options)
|
|
38
|
+
|
|
39
|
+
return latex_content
|
|
40
|
+
|
|
41
|
+
def _enhance_parsed_content(self, parsed_content: Dict[str, Any], options: Dict[str, Any]) -> Dict[str, Any]:
|
|
42
|
+
"""增强解析结果"""
|
|
43
|
+
# 添加选项中的元数据
|
|
44
|
+
if 'title' in options:
|
|
45
|
+
parsed_content.setdefault('metadata', {})['title'] = options['title']
|
|
46
|
+
if 'author' in options:
|
|
47
|
+
parsed_content.setdefault('metadata', {})['author'] = options['author']
|
|
48
|
+
if 'date' in options:
|
|
49
|
+
parsed_content.setdefault('metadata', {})['date'] = options['date']
|
|
50
|
+
if 'abstract' in options:
|
|
51
|
+
parsed_content.setdefault('metadata', {})['abstract'] = options['abstract']
|
|
52
|
+
if 'keywords' in options:
|
|
53
|
+
parsed_content.setdefault('metadata', {})['keywords'] = options['keywords']
|
|
54
|
+
|
|
55
|
+
# 添加文档类型
|
|
56
|
+
if 'document_class' in options:
|
|
57
|
+
parsed_content['document_class'] = options['document_class']
|
|
58
|
+
|
|
59
|
+
return parsed_content
|
|
60
|
+
|
|
61
|
+
def get_supported_formats(self) -> tuple:
|
|
62
|
+
"""获取支持的格式"""
|
|
63
|
+
return ('markdown', 'latex')
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""PDF到Markdown格式转换器"""
|
|
2
|
+
|
|
3
|
+
from typing import Any, Optional, Dict
|
|
4
|
+
from hos_m2f.converters.base_converter import BaseConverter
|
|
5
|
+
|
|
6
|
+
# 延迟导入PyPDF2
|
|
7
|
+
pypdf2_available = False
|
|
8
|
+
PdfReader = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _check_pypdf2():
|
|
12
|
+
"""检查PyPDF2是否可用"""
|
|
13
|
+
global pypdf2_available, PdfReader
|
|
14
|
+
if not pypdf2_available:
|
|
15
|
+
try:
|
|
16
|
+
from PyPDF2 import PdfReader
|
|
17
|
+
pypdf2_available = True
|
|
18
|
+
except ImportError as e:
|
|
19
|
+
print(f"Warning: PyPDF2 not available: {e}")
|
|
20
|
+
print("PDF to Markdown conversion is disabled.")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class PDFToMDConverter(BaseConverter):
|
|
24
|
+
"""PDF到Markdown格式转换器"""
|
|
25
|
+
|
|
26
|
+
def convert(self, input_content: bytes, options: Optional[Dict[str, Any]] = None) -> bytes:
|
|
27
|
+
"""将PDF转换为Markdown
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
input_content: PDF文件的二进制数据
|
|
31
|
+
options: 转换选项
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
bytes: Markdown文件的二进制数据
|
|
35
|
+
"""
|
|
36
|
+
# 检查PyPDF2是否可用
|
|
37
|
+
_check_pypdf2()
|
|
38
|
+
if not pypdf2_available:
|
|
39
|
+
raise ImportError("PyPDF2 is not available. PDF to Markdown conversion is disabled.")
|
|
40
|
+
|
|
41
|
+
if options is None:
|
|
42
|
+
options = {}
|
|
43
|
+
|
|
44
|
+
# 解析PDF内容
|
|
45
|
+
markdown_content = self._parse_pdf(input_content, options)
|
|
46
|
+
|
|
47
|
+
return markdown_content.encode('utf-8')
|
|
48
|
+
|
|
49
|
+
def _parse_pdf(self, pdf_content: bytes, options: Dict[str, Any]) -> str:
|
|
50
|
+
"""解析PDF内容并转换为Markdown"""
|
|
51
|
+
import io
|
|
52
|
+
|
|
53
|
+
# 创建PDF阅读器
|
|
54
|
+
pdf_reader = PdfReader(io.BytesIO(pdf_content))
|
|
55
|
+
|
|
56
|
+
# 提取文本
|
|
57
|
+
text_content = []
|
|
58
|
+
for page_num in range(len(pdf_reader.pages)):
|
|
59
|
+
page = pdf_reader.pages[page_num]
|
|
60
|
+
text = page.extract_text()
|
|
61
|
+
if text:
|
|
62
|
+
text_content.append(text)
|
|
63
|
+
|
|
64
|
+
# 合并文本
|
|
65
|
+
full_text = '\n\n'.join(text_content)
|
|
66
|
+
|
|
67
|
+
# 转换为Markdown
|
|
68
|
+
markdown_content = self._text_to_markdown(full_text, options)
|
|
69
|
+
|
|
70
|
+
return markdown_content
|
|
71
|
+
|
|
72
|
+
def _text_to_markdown(self, text: str, options: Dict[str, Any]) -> str:
|
|
73
|
+
"""将纯文本转换为Markdown"""
|
|
74
|
+
import re
|
|
75
|
+
|
|
76
|
+
# 分割行
|
|
77
|
+
lines = text.split('\n')
|
|
78
|
+
|
|
79
|
+
# 处理标题
|
|
80
|
+
markdown_lines = []
|
|
81
|
+
for line in lines:
|
|
82
|
+
line = line.strip()
|
|
83
|
+
if not line:
|
|
84
|
+
markdown_lines.append('')
|
|
85
|
+
continue
|
|
86
|
+
|
|
87
|
+
# 简单的标题识别
|
|
88
|
+
# 假设以数字开头的行可能是标题
|
|
89
|
+
if re.match(r'^\d+\.', line):
|
|
90
|
+
# 检查数字级别
|
|
91
|
+
match = re.match(r'^(\d+)\.', line)
|
|
92
|
+
if match:
|
|
93
|
+
level = len(match.group(1).split('.'))
|
|
94
|
+
if level <= 6:
|
|
95
|
+
markdown_lines.append(f'{'#' * level} {line}')
|
|
96
|
+
continue
|
|
97
|
+
|
|
98
|
+
# 检查是否是大写标题
|
|
99
|
+
if line.isupper() and len(line) < 50:
|
|
100
|
+
markdown_lines.append(f'## {line}')
|
|
101
|
+
continue
|
|
102
|
+
|
|
103
|
+
# 普通行
|
|
104
|
+
markdown_lines.append(line)
|
|
105
|
+
|
|
106
|
+
# 合并行
|
|
107
|
+
markdown_content = '\n'.join(markdown_lines)
|
|
108
|
+
|
|
109
|
+
# 处理列表
|
|
110
|
+
markdown_content = re.sub(r'^\s*\-\s(.*)$', r'* \1', markdown_content, flags=re.MULTILINE)
|
|
111
|
+
markdown_content = re.sub(r'^\s*\*\s(.*)$', r'* \1', markdown_content, flags=re.MULTILINE)
|
|
112
|
+
|
|
113
|
+
# 处理粗体
|
|
114
|
+
markdown_content = re.sub(r'\b([A-Z]{3,})\b', r'**\1**', markdown_content)
|
|
115
|
+
|
|
116
|
+
return markdown_content
|
|
117
|
+
|
|
118
|
+
def get_supported_formats(self) -> tuple:
|
|
119
|
+
"""获取支持的格式"""
|
|
120
|
+
return ('pdf', 'md')
|
|
@@ -7,17 +7,20 @@ hos_m2f/converters/docx_to_md.py,sha256=_HBp3TOD9ZkTFhHR_f3ObLlpDcv0tnSPjPfeGxuv
|
|
|
7
7
|
hos_m2f/converters/epub_to_md.py,sha256=cFfHmK4IrJKwzEWVE3ue7Jw8tBfWu1q7wG9o7oMf4Pw,4612
|
|
8
8
|
hos_m2f/converters/html_to_md.py,sha256=26GqdynSxKKO2NTxPKgfFs9bTuisLaEIJdBhz4CJ5Eg,4487
|
|
9
9
|
hos_m2f/converters/json_to_md.py,sha256=jeLBQ3jTkgA5a2Kr2gsOPjZB-D4PZxumciFHbyPKNmc,3670
|
|
10
|
-
hos_m2f/converters/md_to_docx.py,sha256=
|
|
11
|
-
hos_m2f/converters/md_to_epub.py,sha256=
|
|
10
|
+
hos_m2f/converters/md_to_docx.py,sha256=GFAAQppSiCff7pkDAPEmvuoj_f4DMzNWHsbv-9cbqmU,12248
|
|
11
|
+
hos_m2f/converters/md_to_epub.py,sha256=wNoniOSgIz7qiuIagJzqsF6f4pu_HLUigq-w0a_HoFg,2572
|
|
12
12
|
hos_m2f/converters/md_to_html.py,sha256=Pn5K6_QiCdasK1M3hdyr4jlTzzu3OpQLJ-wznGiomPo,2502
|
|
13
13
|
hos_m2f/converters/md_to_json.py,sha256=4VzUQFQ8nStmqm7td6MOFKji25hSiydMZhVJcsRHdYU,11246
|
|
14
|
+
hos_m2f/converters/md_to_latex.py,sha256=7Fra7f984XLLWJTSbjPJP3ljSUldvpc2sqF2QyyPUJg,2348
|
|
14
15
|
hos_m2f/converters/md_to_xml.py,sha256=ARuf4rEX4Of-VdGJI45lAejJV8OmtlHQMK8rltzg6B0,14217
|
|
16
|
+
hos_m2f/converters/pdf_to_md.py,sha256=CgKrvv3CWc6H94nNrDO5nLIegttDzokDpoP2E2oSmEs,3851
|
|
15
17
|
hos_m2f/converters/xml_to_md.py,sha256=zOkaEaSZdvyHag05kIHiWF4VyGMMjfmWmBllBpzwJ4E,4051
|
|
16
18
|
tests/__init__.py,sha256=q1Fh8atmZO-c9dA8JDMvlWaIZxlwABwe_HgNgFNDKJc,16
|
|
17
19
|
tests/test_converters.py,sha256=0sAG1fLR0UjJIWzlKWBR2QU7yl8a8LP8NwwSaU1TI5E,5150
|
|
20
|
+
tests/test_latex.py,sha256=-KCCYKRDu6RoI3gOt0HTtExsW2IJ6KoNfIWeocbdFyY,6619
|
|
18
21
|
tests/test_modes.py,sha256=FFZN1cp4sUJUR5fjbZXo2z-Z4Q5akCRBmSIiR7MCdVA,5887
|
|
19
|
-
hos_m2f-0.5.
|
|
20
|
-
hos_m2f-0.5.
|
|
21
|
-
hos_m2f-0.5.
|
|
22
|
-
hos_m2f-0.5.
|
|
23
|
-
hos_m2f-0.5.
|
|
22
|
+
hos_m2f-0.5.5.dist-info/METADATA,sha256=qzJMupv3Rq2KQqfw2sJb025gy5nSzJOq6kjuyvqCeXc,1764
|
|
23
|
+
hos_m2f-0.5.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
24
|
+
hos_m2f-0.5.5.dist-info/entry_points.txt,sha256=1opnVMOGIufdlQMvWG_e-oTUS0Yca5ysnFKhmYvBmTM,76
|
|
25
|
+
hos_m2f-0.5.5.dist-info/top_level.txt,sha256=EOB5321A6FNFviV_29qnjHtmLG-F6peX7v5s9Rw96V0,14
|
|
26
|
+
hos_m2f-0.5.5.dist-info/RECORD,,
|
tests/test_latex.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""测试LaTeX渲染器和转换器"""
|
|
2
|
+
|
|
3
|
+
import unittest
|
|
4
|
+
import os
|
|
5
|
+
import tempfile
|
|
6
|
+
from hos_m2f.renderers.latex_renderer import LaTeXRenderer
|
|
7
|
+
from hos_m2f.converters.md_to_latex import MDToLaTeXConverter
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestLaTeX(unittest.TestCase):
|
|
11
|
+
"""测试LaTeX渲染器和转换器"""
|
|
12
|
+
|
|
13
|
+
def setUp(self):
|
|
14
|
+
"""设置测试环境"""
|
|
15
|
+
# 创建测试用的Markdown内容
|
|
16
|
+
self.test_content = """
|
|
17
|
+
# 测试文档
|
|
18
|
+
|
|
19
|
+
## 摘要
|
|
20
|
+
|
|
21
|
+
这是一个测试文档,用于测试LaTeX渲染功能。
|
|
22
|
+
|
|
23
|
+
## 引言
|
|
24
|
+
|
|
25
|
+
这是引言章节的内容。
|
|
26
|
+
|
|
27
|
+
### 背景
|
|
28
|
+
|
|
29
|
+
这是背景部分的内容。
|
|
30
|
+
|
|
31
|
+
## 方法
|
|
32
|
+
|
|
33
|
+
这是方法章节的内容。
|
|
34
|
+
|
|
35
|
+
### 实验设计
|
|
36
|
+
|
|
37
|
+
这是实验设计部分的内容。
|
|
38
|
+
|
|
39
|
+
## 结果
|
|
40
|
+
|
|
41
|
+
这是结果章节的内容。
|
|
42
|
+
|
|
43
|
+
### 数据表格
|
|
44
|
+
|
|
45
|
+
| 列1 | 列2 | 列3 |
|
|
46
|
+
| --- | --- | --- |
|
|
47
|
+
| 行1 | 行1 | 行1 |
|
|
48
|
+
| 行2 | 行2 | 行2 |
|
|
49
|
+
|
|
50
|
+
### 代码示例
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
print("Hello, world!")
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## 讨论
|
|
57
|
+
|
|
58
|
+
这是讨论章节的内容。
|
|
59
|
+
|
|
60
|
+
## 结论
|
|
61
|
+
|
|
62
|
+
这是结论章节的内容。
|
|
63
|
+
|
|
64
|
+
## 参考文献
|
|
65
|
+
|
|
66
|
+
[1] 参考文献1
|
|
67
|
+
[2] 参考文献2
|
|
68
|
+
""".strip()
|
|
69
|
+
|
|
70
|
+
# 创建测试用的结构化内容
|
|
71
|
+
self.structured_content = {
|
|
72
|
+
"metadata": {
|
|
73
|
+
"title": "测试文档",
|
|
74
|
+
"author": "测试作者",
|
|
75
|
+
"date": "2023-01-01",
|
|
76
|
+
"abstract": "这是一个测试文档,用于测试LaTeX渲染功能。",
|
|
77
|
+
"keywords": ["测试", "LaTeX", "渲染"]
|
|
78
|
+
},
|
|
79
|
+
"structure": [
|
|
80
|
+
{"level": 1, "title": "测试文档", "line_number": 1},
|
|
81
|
+
{"level": 2, "title": "摘要", "line_number": 3},
|
|
82
|
+
{"level": 2, "title": "引言", "line_number": 7},
|
|
83
|
+
{"level": 3, "title": "背景", "line_number": 9},
|
|
84
|
+
{"level": 2, "title": "方法", "line_number": 13},
|
|
85
|
+
{"level": 3, "title": "实验设计", "line_number": 15},
|
|
86
|
+
{"level": 2, "title": "结果", "line_number": 19},
|
|
87
|
+
{"level": 3, "title": "数据表格", "line_number": 21},
|
|
88
|
+
{"level": 3, "title": "代码示例", "line_number": 29},
|
|
89
|
+
{"level": 2, "title": "讨论", "line_number": 35},
|
|
90
|
+
{"level": 2, "title": "结论", "line_number": 39},
|
|
91
|
+
{"level": 2, "title": "参考文献", "line_number": 43}
|
|
92
|
+
],
|
|
93
|
+
"chapters": [
|
|
94
|
+
{"title": "测试文档", "content": "", "level": 1, "start_line": 1, "end_line": 1},
|
|
95
|
+
{"title": "摘要", "content": "这是一个测试文档,用于测试LaTeX渲染功能。", "level": 2, "start_line": 3, "end_line": 5},
|
|
96
|
+
{"title": "引言", "content": "这是引言章节的内容。", "level": 2, "start_line": 7, "end_line": 8},
|
|
97
|
+
{"title": "背景", "content": "这是背景部分的内容。", "level": 3, "start_line": 9, "end_line": 11},
|
|
98
|
+
{"title": "方法", "content": "这是方法章节的内容。", "level": 2, "start_line": 13, "end_line": 14},
|
|
99
|
+
{"title": "实验设计", "content": "这是实验设计部分的内容。", "level": 3, "start_line": 15, "end_line": 17},
|
|
100
|
+
{"title": "结果", "content": "这是结果章节的内容。", "level": 2, "start_line": 19, "end_line": 20},
|
|
101
|
+
{"title": "数据表格", "content": "| 列1 | 列2 | 列3 |\n| --- | --- | --- |\n| 行1 | 行1 | 行1 |\n| 行2 | 行2 | 行2 |", "level": 3, "start_line": 21, "end_line": 28},
|
|
102
|
+
{"title": "代码示例", "content": "```python\nprint(\"Hello, world!\")\n```", "level": 3, "start_line": 29, "end_line": 34},
|
|
103
|
+
{"title": "讨论", "content": "这是讨论章节的内容。", "level": 2, "start_line": 35, "end_line": 37},
|
|
104
|
+
{"title": "结论", "content": "这是结论章节的内容。", "level": 2, "start_line": 39, "end_line": 41},
|
|
105
|
+
{"title": "参考文献", "content": "[1] 参考文献1\n[2] 参考文献2", "level": 2, "start_line": 43, "end_line": 46}
|
|
106
|
+
],
|
|
107
|
+
"references": [
|
|
108
|
+
{"text": "参考文献1"},
|
|
109
|
+
{"text": "参考文献2"}
|
|
110
|
+
]
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
def test_latex_renderer(self):
|
|
114
|
+
"""测试LaTeX渲染器"""
|
|
115
|
+
renderer = LaTeXRenderer()
|
|
116
|
+
|
|
117
|
+
# 测试渲染功能
|
|
118
|
+
latex_content = renderer.render(self.structured_content)
|
|
119
|
+
self.assertIsInstance(latex_content, bytes)
|
|
120
|
+
self.assertGreater(len(latex_content), 0)
|
|
121
|
+
|
|
122
|
+
# 保存为临时文件,以便手动检查
|
|
123
|
+
with tempfile.NamedTemporaryFile(suffix=".tex", delete=False) as tmp:
|
|
124
|
+
tmp.write(latex_content)
|
|
125
|
+
tmp_path = tmp.name
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
# 验证文件存在且大小大于0
|
|
129
|
+
self.assertTrue(os.path.exists(tmp_path))
|
|
130
|
+
self.assertGreater(os.path.getsize(tmp_path), 0)
|
|
131
|
+
finally:
|
|
132
|
+
# 清理临时文件
|
|
133
|
+
if os.path.exists(tmp_path):
|
|
134
|
+
os.unlink(tmp_path)
|
|
135
|
+
|
|
136
|
+
def test_md_to_latex_converter(self):
|
|
137
|
+
"""测试Markdown到LaTeX转换器"""
|
|
138
|
+
converter = MDToLaTeXConverter()
|
|
139
|
+
|
|
140
|
+
# 测试转换功能
|
|
141
|
+
result = converter.convert(self.test_content)
|
|
142
|
+
self.assertIsInstance(result, bytes)
|
|
143
|
+
self.assertGreater(len(result), 0)
|
|
144
|
+
|
|
145
|
+
# 保存为临时文件,以便手动检查
|
|
146
|
+
with tempfile.NamedTemporaryFile(suffix=".tex", delete=False) as tmp:
|
|
147
|
+
tmp.write(result)
|
|
148
|
+
tmp_path = tmp.name
|
|
149
|
+
|
|
150
|
+
try:
|
|
151
|
+
# 验证文件存在且大小大于0
|
|
152
|
+
self.assertTrue(os.path.exists(tmp_path))
|
|
153
|
+
self.assertGreater(os.path.getsize(tmp_path), 0)
|
|
154
|
+
finally:
|
|
155
|
+
# 清理临时文件
|
|
156
|
+
if os.path.exists(tmp_path):
|
|
157
|
+
os.unlink(tmp_path)
|
|
158
|
+
|
|
159
|
+
def test_latex_with_options(self):
|
|
160
|
+
"""测试带选项的LaTeX渲染"""
|
|
161
|
+
renderer = LaTeXRenderer()
|
|
162
|
+
converter = MDToLaTeXConverter()
|
|
163
|
+
|
|
164
|
+
# 测试带选项的渲染
|
|
165
|
+
options = {
|
|
166
|
+
"document_class": "article",
|
|
167
|
+
"document_options": "a4paper, 12pt",
|
|
168
|
+
"table_of_contents": True
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
latex_content = renderer.render(self.structured_content, options)
|
|
172
|
+
self.assertIsInstance(latex_content, bytes)
|
|
173
|
+
self.assertGreater(len(latex_content), 0)
|
|
174
|
+
|
|
175
|
+
# 测试带选项的转换
|
|
176
|
+
result = converter.convert(self.test_content, options)
|
|
177
|
+
self.assertIsInstance(result, bytes)
|
|
178
|
+
self.assertGreater(len(result), 0)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
if __name__ == '__main__':
|
|
182
|
+
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|