hos-m2f 0.5.4__py3-none-any.whl → 0.5.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -95,10 +95,28 @@ class MDToDOCXConverter(BaseConverter):
95
95
  # 填充表头
96
96
  header_row = table.rows[0]
97
97
  for i, cell_text in enumerate(header_cells):
98
- header_row.cells[i].text = cell_text
98
+ cell = header_row.cells[i]
99
+ cell.text = cell_text
100
+ # 设置表头样式
101
+ for paragraph in cell.paragraphs:
102
+ for run in paragraph.runs:
103
+ run.bold = True
104
+ run.font.size = Pt(11)
105
+ paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
99
106
 
100
- # 跳过分隔线行
107
+ # 跳过分隔线行,并解析对齐方式
108
+ alignments = []
101
109
  if len(rows) > 1 and '---' in rows[1]:
110
+ # 解析对齐方式
111
+ alignment_row = rows[1]
112
+ alignment_cells = [cell.strip() for cell in alignment_row.split('|') if cell.strip()]
113
+ for cell in alignment_cells:
114
+ if cell.startswith(':') and cell.endswith(':'):
115
+ alignments.append(WD_ALIGN_PARAGRAPH.CENTER)
116
+ elif cell.endswith(':'):
117
+ alignments.append(WD_ALIGN_PARAGRAPH.RIGHT)
118
+ else:
119
+ alignments.append(WD_ALIGN_PARAGRAPH.LEFT)
102
120
  data_rows = rows[2:]
103
121
  else:
104
122
  data_rows = rows[1:]
@@ -110,7 +128,12 @@ class MDToDOCXConverter(BaseConverter):
110
128
  new_row = table.add_row()
111
129
  for i, cell_text in enumerate(cells):
112
130
  if i < len(new_row.cells):
113
- new_row.cells[i].text = cell_text
131
+ cell = new_row.cells[i]
132
+ cell.text = cell_text
133
+ # 设置对齐方式
134
+ if i < len(alignments):
135
+ for paragraph in cell.paragraphs:
136
+ paragraph.alignment = alignments[i]
114
137
  except Exception as e:
115
138
  # 如果解析失败,回退到简单处理
116
139
  self.doc.add_paragraph('Table: ' + text[:100] + '...')
@@ -217,9 +240,31 @@ class MDToDOCXConverter(BaseConverter):
217
240
 
218
241
  def _render_mermaid(self, mermaid_code):
219
242
  """渲染Mermaid图表为图片"""
220
- # 简化处理,实际项目中需要使用mermaid-cli或其他工具
221
- # 这里返回None,回退到显示代码块
222
- return None
243
+ # 使用mermaid.ink API渲染Mermaid图表
244
+ try:
245
+ import requests
246
+ from io import BytesIO
247
+ import urllib.parse
248
+
249
+ # 编码Mermaid代码
250
+ encoded_code = urllib.parse.quote(mermaid_code)
251
+
252
+ # 构建API URL
253
+ url = f"https://mermaid.ink/img/{encoded_code}"
254
+
255
+ # 发送请求
256
+ response = requests.get(url, timeout=10)
257
+
258
+ if response.status_code == 200:
259
+ # 返回图片数据流
260
+ return BytesIO(response.content)
261
+ else:
262
+ # 如果API调用失败,返回None
263
+ return None
264
+ except Exception as e:
265
+ # 如果处理失败,返回None
266
+ print(f"Error rendering Mermaid chart: {e}")
267
+ return None
223
268
 
224
269
  # 渲染Markdown
225
270
  renderer = DOCXRenderer(doc)
@@ -2,14 +2,18 @@
2
2
 
3
3
  from typing import Any, Optional, Dict
4
4
  from hos_m2f.converters.base_converter import BaseConverter
5
- import ebooklib
6
- from ebooklib import epub
7
- import mistune
5
+ from hos_m2f.renderers.epub_renderer import EPUBRenderer
6
+ from hos_m2f.structure.book_parser import BookParser
8
7
 
9
8
 
10
9
  class MDToEPUBConverter(BaseConverter):
11
10
  """Markdown到EPUB格式转换器"""
12
11
 
12
+ def __init__(self):
13
+ """初始化转换器"""
14
+ self.renderer = EPUBRenderer()
15
+ self.book_parser = BookParser()
16
+
13
17
  def convert(self, input_content: str, options: Optional[Dict[str, Any]] = None) -> bytes:
14
18
  """将Markdown转换为EPUB
15
19
 
@@ -23,73 +27,41 @@ class MDToEPUBConverter(BaseConverter):
23
27
  if options is None:
24
28
  options = {}
25
29
 
26
- # 创建EPUB书籍
27
- book = epub.EpubBook()
28
-
29
- # 设置元数据
30
- book.set_identifier('id12345')
31
- book.set_title(options.get('title', 'Untitled'))
32
- book.set_language(options.get('language', 'zh'))
33
- book.add_author(options.get('author', 'Unknown'))
34
-
35
- # 添加封面
36
- if 'cover' in options:
37
- cover_image = epub.EpubItem(
38
- uid='cover-image',
39
- file_name='images/cover.jpg',
40
- media_type='image/jpeg',
41
- content=options['cover']
42
- )
43
- book.add_item(cover_image)
44
- book.set_cover('images/cover.jpg', cover_image)
45
-
46
- # 解析Markdown
47
- markdown = mistune.create_markdown()
48
-
30
+ # 使用BookParser解析Markdown内容
31
+ parsed_content = self.book_parser.parse(input_content, options)
49
32
 
50
- # 转换为HTML
51
- html_content = markdown(input_content)
33
+ # 增强解析结果
34
+ parsed_content = self._enhance_parsed_content(parsed_content, options)
52
35
 
53
- # 创建章节
54
- chapter = epub.EpubHtml(
55
- title=options.get('title', 'Chapter 1'),
56
- file_name='chapter1.xhtml',
57
- lang='zh'
58
- )
59
- chapter.content = f'''
60
- <!DOCTYPE html>
61
- <html>
62
- <head>
63
- <title>{options.get('title', 'Untitled')}</title>
64
- <meta charset="utf-8" />
65
- </head>
66
- <body>
67
- <h1>{options.get('title', 'Untitled')}</h1>
68
- {html_content}
69
- </body>
70
- </html>
71
- '''
36
+ # 使用EPUBRenderer渲染EPUB文件
37
+ epub_content = self.renderer.render(parsed_content, options)
72
38
 
73
- # 添加章节
74
- book.add_item(chapter)
75
-
76
- # 创建目录
77
- book.toc = [chapter]
78
-
79
- # 添加导航文件
80
- book.add_item(epub.EpubNcx())
81
- book.add_item(epub.EpubNav())
82
-
83
- # 定义spine
84
- book.spine = ['nav', chapter]
39
+ return epub_content
40
+
41
+ def _enhance_parsed_content(self, parsed_content: Dict[str, Any], options: Dict[str, Any]) -> Dict[str, Any]:
42
+ """增强解析结果"""
43
+ # 添加选项中的元数据
44
+ if 'title' in options:
45
+ parsed_content.setdefault('book_metadata', {})['title'] = options['title']
46
+ if 'author' in options:
47
+ parsed_content.setdefault('book_metadata', {})['author'] = options['author']
48
+ if 'language' in options:
49
+ parsed_content.setdefault('book_metadata', {})['language'] = options['language']
50
+ if 'publisher' in options:
51
+ parsed_content.setdefault('book_metadata', {})['publisher'] = options['publisher']
52
+ if 'publish_date' in options:
53
+ parsed_content.setdefault('book_metadata', {})['publish_date'] = options['publish_date']
54
+ if 'description' in options:
55
+ parsed_content.setdefault('book_metadata', {})['description'] = options['description']
85
56
 
86
- # 保存为二进制数据
87
- import io
88
- output = io.BytesIO()
89
- epub.write_epub(output, book, {})
90
- output.seek(0)
57
+ # 添加封面信息
58
+ if 'cover' in options:
59
+ parsed_content['cover'] = {
60
+ 'src': options['cover'],
61
+ 'type': 'image'
62
+ }
91
63
 
92
- return output.getvalue()
64
+ return parsed_content
93
65
 
94
66
  def get_supported_formats(self) -> tuple:
95
67
  """获取支持的格式"""
@@ -0,0 +1,63 @@
1
+ """Markdown到LaTeX格式转换器"""
2
+
3
+ from typing import Any, Optional, Dict
4
+ from hos_m2f.converters.base_converter import BaseConverter
5
+ from hos_m2f.renderers.latex_renderer import LaTeXRenderer
6
+ from hos_m2f.structure.semantic_parser import SemanticParser
7
+
8
+
9
+ class MDToLaTeXConverter(BaseConverter):
10
+ """Markdown到LaTeX格式转换器"""
11
+
12
+ def __init__(self):
13
+ """初始化转换器"""
14
+ self.renderer = LaTeXRenderer()
15
+ self.parser = SemanticParser()
16
+
17
+ def convert(self, input_content: str, options: Optional[Dict[str, Any]] = None) -> bytes:
18
+ """将Markdown转换为LaTeX
19
+
20
+ Args:
21
+ input_content: Markdown内容
22
+ options: 转换选项
23
+
24
+ Returns:
25
+ bytes: LaTeX文件的二进制数据
26
+ """
27
+ if options is None:
28
+ options = {}
29
+
30
+ # 使用SemanticParser解析Markdown内容
31
+ parsed_content = self.parser.parse(input_content)
32
+
33
+ # 增强解析结果
34
+ parsed_content = self._enhance_parsed_content(parsed_content, options)
35
+
36
+ # 使用LaTeXRenderer渲染LaTeX文件
37
+ latex_content = self.renderer.render(parsed_content, options)
38
+
39
+ return latex_content
40
+
41
+ def _enhance_parsed_content(self, parsed_content: Dict[str, Any], options: Dict[str, Any]) -> Dict[str, Any]:
42
+ """增强解析结果"""
43
+ # 添加选项中的元数据
44
+ if 'title' in options:
45
+ parsed_content.setdefault('metadata', {})['title'] = options['title']
46
+ if 'author' in options:
47
+ parsed_content.setdefault('metadata', {})['author'] = options['author']
48
+ if 'date' in options:
49
+ parsed_content.setdefault('metadata', {})['date'] = options['date']
50
+ if 'abstract' in options:
51
+ parsed_content.setdefault('metadata', {})['abstract'] = options['abstract']
52
+ if 'keywords' in options:
53
+ parsed_content.setdefault('metadata', {})['keywords'] = options['keywords']
54
+
55
+ # 添加文档类型
56
+ if 'document_class' in options:
57
+ parsed_content['document_class'] = options['document_class']
58
+
59
+ return parsed_content
60
+
61
+ def get_supported_formats(self) -> tuple:
62
+ """获取支持的格式"""
63
+ return ('markdown', 'latex')
@@ -0,0 +1,120 @@
1
+ """PDF到Markdown格式转换器"""
2
+
3
+ from typing import Any, Optional, Dict
4
+ from hos_m2f.converters.base_converter import BaseConverter
5
+
6
+ # 延迟导入PyPDF2
7
+ pypdf2_available = False
8
+ PdfReader = None
9
+
10
+
11
+ def _check_pypdf2():
12
+ """检查PyPDF2是否可用"""
13
+ global pypdf2_available, PdfReader
14
+ if not pypdf2_available:
15
+ try:
16
+ from PyPDF2 import PdfReader
17
+ pypdf2_available = True
18
+ except ImportError as e:
19
+ print(f"Warning: PyPDF2 not available: {e}")
20
+ print("PDF to Markdown conversion is disabled.")
21
+
22
+
23
+ class PDFToMDConverter(BaseConverter):
24
+ """PDF到Markdown格式转换器"""
25
+
26
+ def convert(self, input_content: bytes, options: Optional[Dict[str, Any]] = None) -> bytes:
27
+ """将PDF转换为Markdown
28
+
29
+ Args:
30
+ input_content: PDF文件的二进制数据
31
+ options: 转换选项
32
+
33
+ Returns:
34
+ bytes: Markdown文件的二进制数据
35
+ """
36
+ # 检查PyPDF2是否可用
37
+ _check_pypdf2()
38
+ if not pypdf2_available:
39
+ raise ImportError("PyPDF2 is not available. PDF to Markdown conversion is disabled.")
40
+
41
+ if options is None:
42
+ options = {}
43
+
44
+ # 解析PDF内容
45
+ markdown_content = self._parse_pdf(input_content, options)
46
+
47
+ return markdown_content.encode('utf-8')
48
+
49
+ def _parse_pdf(self, pdf_content: bytes, options: Dict[str, Any]) -> str:
50
+ """解析PDF内容并转换为Markdown"""
51
+ import io
52
+
53
+ # 创建PDF阅读器
54
+ pdf_reader = PdfReader(io.BytesIO(pdf_content))
55
+
56
+ # 提取文本
57
+ text_content = []
58
+ for page_num in range(len(pdf_reader.pages)):
59
+ page = pdf_reader.pages[page_num]
60
+ text = page.extract_text()
61
+ if text:
62
+ text_content.append(text)
63
+
64
+ # 合并文本
65
+ full_text = '\n\n'.join(text_content)
66
+
67
+ # 转换为Markdown
68
+ markdown_content = self._text_to_markdown(full_text, options)
69
+
70
+ return markdown_content
71
+
72
+ def _text_to_markdown(self, text: str, options: Dict[str, Any]) -> str:
73
+ """将纯文本转换为Markdown"""
74
+ import re
75
+
76
+ # 分割行
77
+ lines = text.split('\n')
78
+
79
+ # 处理标题
80
+ markdown_lines = []
81
+ for line in lines:
82
+ line = line.strip()
83
+ if not line:
84
+ markdown_lines.append('')
85
+ continue
86
+
87
+ # 简单的标题识别
88
+ # 假设以数字开头的行可能是标题
89
+ if re.match(r'^\d+\.', line):
90
+ # 检查数字级别
91
+ match = re.match(r'^(\d+)\.', line)
92
+ if match:
93
+ level = len(match.group(1).split('.'))
94
+ if level <= 6:
95
+ markdown_lines.append(f'{'#' * level} {line}')
96
+ continue
97
+
98
+ # 检查是否是大写标题
99
+ if line.isupper() and len(line) < 50:
100
+ markdown_lines.append(f'## {line}')
101
+ continue
102
+
103
+ # 普通行
104
+ markdown_lines.append(line)
105
+
106
+ # 合并行
107
+ markdown_content = '\n'.join(markdown_lines)
108
+
109
+ # 处理列表
110
+ markdown_content = re.sub(r'^\s*\-\s(.*)$', r'* \1', markdown_content, flags=re.MULTILINE)
111
+ markdown_content = re.sub(r'^\s*\*\s(.*)$', r'* \1', markdown_content, flags=re.MULTILINE)
112
+
113
+ # 处理粗体
114
+ markdown_content = re.sub(r'\b([A-Z]{3,})\b', r'**\1**', markdown_content)
115
+
116
+ return markdown_content
117
+
118
+ def get_supported_formats(self) -> tuple:
119
+ """获取支持的格式"""
120
+ return ('pdf', 'md')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: hos-m2f
3
- Version: 0.5.4
3
+ Version: 0.5.5
4
4
  Summary: HOS-M2F: Markdown to Industry Standard Format Compiler Engine
5
5
  Author: HOS Team
6
6
  Author-email: team@hos-m2f.com
@@ -7,17 +7,20 @@ hos_m2f/converters/docx_to_md.py,sha256=_HBp3TOD9ZkTFhHR_f3ObLlpDcv0tnSPjPfeGxuv
7
7
  hos_m2f/converters/epub_to_md.py,sha256=cFfHmK4IrJKwzEWVE3ue7Jw8tBfWu1q7wG9o7oMf4Pw,4612
8
8
  hos_m2f/converters/html_to_md.py,sha256=26GqdynSxKKO2NTxPKgfFs9bTuisLaEIJdBhz4CJ5Eg,4487
9
9
  hos_m2f/converters/json_to_md.py,sha256=jeLBQ3jTkgA5a2Kr2gsOPjZB-D4PZxumciFHbyPKNmc,3670
10
- hos_m2f/converters/md_to_docx.py,sha256=CPlw5GZDVBCyveBGX3ZqlNN33w-y_KYXocYiAzR0kCk,9963
11
- hos_m2f/converters/md_to_epub.py,sha256=1cQlzkx_8eqZDHzEMKFrmFG4gpVMos2weF11v987W5A,2668
10
+ hos_m2f/converters/md_to_docx.py,sha256=GFAAQppSiCff7pkDAPEmvuoj_f4DMzNWHsbv-9cbqmU,12248
11
+ hos_m2f/converters/md_to_epub.py,sha256=wNoniOSgIz7qiuIagJzqsF6f4pu_HLUigq-w0a_HoFg,2572
12
12
  hos_m2f/converters/md_to_html.py,sha256=Pn5K6_QiCdasK1M3hdyr4jlTzzu3OpQLJ-wznGiomPo,2502
13
13
  hos_m2f/converters/md_to_json.py,sha256=4VzUQFQ8nStmqm7td6MOFKji25hSiydMZhVJcsRHdYU,11246
14
+ hos_m2f/converters/md_to_latex.py,sha256=7Fra7f984XLLWJTSbjPJP3ljSUldvpc2sqF2QyyPUJg,2348
14
15
  hos_m2f/converters/md_to_xml.py,sha256=ARuf4rEX4Of-VdGJI45lAejJV8OmtlHQMK8rltzg6B0,14217
16
+ hos_m2f/converters/pdf_to_md.py,sha256=CgKrvv3CWc6H94nNrDO5nLIegttDzokDpoP2E2oSmEs,3851
15
17
  hos_m2f/converters/xml_to_md.py,sha256=zOkaEaSZdvyHag05kIHiWF4VyGMMjfmWmBllBpzwJ4E,4051
16
18
  tests/__init__.py,sha256=q1Fh8atmZO-c9dA8JDMvlWaIZxlwABwe_HgNgFNDKJc,16
17
19
  tests/test_converters.py,sha256=0sAG1fLR0UjJIWzlKWBR2QU7yl8a8LP8NwwSaU1TI5E,5150
20
+ tests/test_latex.py,sha256=-KCCYKRDu6RoI3gOt0HTtExsW2IJ6KoNfIWeocbdFyY,6619
18
21
  tests/test_modes.py,sha256=FFZN1cp4sUJUR5fjbZXo2z-Z4Q5akCRBmSIiR7MCdVA,5887
19
- hos_m2f-0.5.4.dist-info/METADATA,sha256=6VH8kNleEj0SZUtsUgOjsrNYO5VbkQgUgsvWTqUslV4,1764
20
- hos_m2f-0.5.4.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
21
- hos_m2f-0.5.4.dist-info/entry_points.txt,sha256=1opnVMOGIufdlQMvWG_e-oTUS0Yca5ysnFKhmYvBmTM,76
22
- hos_m2f-0.5.4.dist-info/top_level.txt,sha256=EOB5321A6FNFviV_29qnjHtmLG-F6peX7v5s9Rw96V0,14
23
- hos_m2f-0.5.4.dist-info/RECORD,,
22
+ hos_m2f-0.5.5.dist-info/METADATA,sha256=qzJMupv3Rq2KQqfw2sJb025gy5nSzJOq6kjuyvqCeXc,1764
23
+ hos_m2f-0.5.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
24
+ hos_m2f-0.5.5.dist-info/entry_points.txt,sha256=1opnVMOGIufdlQMvWG_e-oTUS0Yca5ysnFKhmYvBmTM,76
25
+ hos_m2f-0.5.5.dist-info/top_level.txt,sha256=EOB5321A6FNFviV_29qnjHtmLG-F6peX7v5s9Rw96V0,14
26
+ hos_m2f-0.5.5.dist-info/RECORD,,
tests/test_latex.py ADDED
@@ -0,0 +1,182 @@
1
+ """测试LaTeX渲染器和转换器"""
2
+
3
+ import unittest
4
+ import os
5
+ import tempfile
6
+ from hos_m2f.renderers.latex_renderer import LaTeXRenderer
7
+ from hos_m2f.converters.md_to_latex import MDToLaTeXConverter
8
+
9
+
10
+ class TestLaTeX(unittest.TestCase):
11
+ """测试LaTeX渲染器和转换器"""
12
+
13
+ def setUp(self):
14
+ """设置测试环境"""
15
+ # 创建测试用的Markdown内容
16
+ self.test_content = """
17
+ # 测试文档
18
+
19
+ ## 摘要
20
+
21
+ 这是一个测试文档,用于测试LaTeX渲染功能。
22
+
23
+ ## 引言
24
+
25
+ 这是引言章节的内容。
26
+
27
+ ### 背景
28
+
29
+ 这是背景部分的内容。
30
+
31
+ ## 方法
32
+
33
+ 这是方法章节的内容。
34
+
35
+ ### 实验设计
36
+
37
+ 这是实验设计部分的内容。
38
+
39
+ ## 结果
40
+
41
+ 这是结果章节的内容。
42
+
43
+ ### 数据表格
44
+
45
+ | 列1 | 列2 | 列3 |
46
+ | --- | --- | --- |
47
+ | 行1 | 行1 | 行1 |
48
+ | 行2 | 行2 | 行2 |
49
+
50
+ ### 代码示例
51
+
52
+ ```python
53
+ print("Hello, world!")
54
+ ```
55
+
56
+ ## 讨论
57
+
58
+ 这是讨论章节的内容。
59
+
60
+ ## 结论
61
+
62
+ 这是结论章节的内容。
63
+
64
+ ## 参考文献
65
+
66
+ [1] 参考文献1
67
+ [2] 参考文献2
68
+ """.strip()
69
+
70
+ # 创建测试用的结构化内容
71
+ self.structured_content = {
72
+ "metadata": {
73
+ "title": "测试文档",
74
+ "author": "测试作者",
75
+ "date": "2023-01-01",
76
+ "abstract": "这是一个测试文档,用于测试LaTeX渲染功能。",
77
+ "keywords": ["测试", "LaTeX", "渲染"]
78
+ },
79
+ "structure": [
80
+ {"level": 1, "title": "测试文档", "line_number": 1},
81
+ {"level": 2, "title": "摘要", "line_number": 3},
82
+ {"level": 2, "title": "引言", "line_number": 7},
83
+ {"level": 3, "title": "背景", "line_number": 9},
84
+ {"level": 2, "title": "方法", "line_number": 13},
85
+ {"level": 3, "title": "实验设计", "line_number": 15},
86
+ {"level": 2, "title": "结果", "line_number": 19},
87
+ {"level": 3, "title": "数据表格", "line_number": 21},
88
+ {"level": 3, "title": "代码示例", "line_number": 29},
89
+ {"level": 2, "title": "讨论", "line_number": 35},
90
+ {"level": 2, "title": "结论", "line_number": 39},
91
+ {"level": 2, "title": "参考文献", "line_number": 43}
92
+ ],
93
+ "chapters": [
94
+ {"title": "测试文档", "content": "", "level": 1, "start_line": 1, "end_line": 1},
95
+ {"title": "摘要", "content": "这是一个测试文档,用于测试LaTeX渲染功能。", "level": 2, "start_line": 3, "end_line": 5},
96
+ {"title": "引言", "content": "这是引言章节的内容。", "level": 2, "start_line": 7, "end_line": 8},
97
+ {"title": "背景", "content": "这是背景部分的内容。", "level": 3, "start_line": 9, "end_line": 11},
98
+ {"title": "方法", "content": "这是方法章节的内容。", "level": 2, "start_line": 13, "end_line": 14},
99
+ {"title": "实验设计", "content": "这是实验设计部分的内容。", "level": 3, "start_line": 15, "end_line": 17},
100
+ {"title": "结果", "content": "这是结果章节的内容。", "level": 2, "start_line": 19, "end_line": 20},
101
+ {"title": "数据表格", "content": "| 列1 | 列2 | 列3 |\n| --- | --- | --- |\n| 行1 | 行1 | 行1 |\n| 行2 | 行2 | 行2 |", "level": 3, "start_line": 21, "end_line": 28},
102
+ {"title": "代码示例", "content": "```python\nprint(\"Hello, world!\")\n```", "level": 3, "start_line": 29, "end_line": 34},
103
+ {"title": "讨论", "content": "这是讨论章节的内容。", "level": 2, "start_line": 35, "end_line": 37},
104
+ {"title": "结论", "content": "这是结论章节的内容。", "level": 2, "start_line": 39, "end_line": 41},
105
+ {"title": "参考文献", "content": "[1] 参考文献1\n[2] 参考文献2", "level": 2, "start_line": 43, "end_line": 46}
106
+ ],
107
+ "references": [
108
+ {"text": "参考文献1"},
109
+ {"text": "参考文献2"}
110
+ ]
111
+ }
112
+
113
+ def test_latex_renderer(self):
114
+ """测试LaTeX渲染器"""
115
+ renderer = LaTeXRenderer()
116
+
117
+ # 测试渲染功能
118
+ latex_content = renderer.render(self.structured_content)
119
+ self.assertIsInstance(latex_content, bytes)
120
+ self.assertGreater(len(latex_content), 0)
121
+
122
+ # 保存为临时文件,以便手动检查
123
+ with tempfile.NamedTemporaryFile(suffix=".tex", delete=False) as tmp:
124
+ tmp.write(latex_content)
125
+ tmp_path = tmp.name
126
+
127
+ try:
128
+ # 验证文件存在且大小大于0
129
+ self.assertTrue(os.path.exists(tmp_path))
130
+ self.assertGreater(os.path.getsize(tmp_path), 0)
131
+ finally:
132
+ # 清理临时文件
133
+ if os.path.exists(tmp_path):
134
+ os.unlink(tmp_path)
135
+
136
+ def test_md_to_latex_converter(self):
137
+ """测试Markdown到LaTeX转换器"""
138
+ converter = MDToLaTeXConverter()
139
+
140
+ # 测试转换功能
141
+ result = converter.convert(self.test_content)
142
+ self.assertIsInstance(result, bytes)
143
+ self.assertGreater(len(result), 0)
144
+
145
+ # 保存为临时文件,以便手动检查
146
+ with tempfile.NamedTemporaryFile(suffix=".tex", delete=False) as tmp:
147
+ tmp.write(result)
148
+ tmp_path = tmp.name
149
+
150
+ try:
151
+ # 验证文件存在且大小大于0
152
+ self.assertTrue(os.path.exists(tmp_path))
153
+ self.assertGreater(os.path.getsize(tmp_path), 0)
154
+ finally:
155
+ # 清理临时文件
156
+ if os.path.exists(tmp_path):
157
+ os.unlink(tmp_path)
158
+
159
+ def test_latex_with_options(self):
160
+ """测试带选项的LaTeX渲染"""
161
+ renderer = LaTeXRenderer()
162
+ converter = MDToLaTeXConverter()
163
+
164
+ # 测试带选项的渲染
165
+ options = {
166
+ "document_class": "article",
167
+ "document_options": "a4paper, 12pt",
168
+ "table_of_contents": True
169
+ }
170
+
171
+ latex_content = renderer.render(self.structured_content, options)
172
+ self.assertIsInstance(latex_content, bytes)
173
+ self.assertGreater(len(latex_content), 0)
174
+
175
+ # 测试带选项的转换
176
+ result = converter.convert(self.test_content, options)
177
+ self.assertIsInstance(result, bytes)
178
+ self.assertGreater(len(result), 0)
179
+
180
+
181
+ if __name__ == '__main__':
182
+ unittest.main()