PyPI - hos-m2f - Versions diffs - 0.5.3__tar.gz → 0.5.4__tar.gz - Mend

hos-m2f 0.5.3tar.gz → 0.5.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hos-m2f
-Version: 0.5.3
+Version: 0.5.4
 Summary: HOS-M2F: Markdown to Industry Standard Format Compiler Engine
 Author: HOS Team
 Author-email: team@hos-m2f.com

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f/cli/__init__.py RENAMED Viewed

@@ -1,5 +1,5 @@
 """CLI模块"""
-from hos_m2f.cli.cli import CLI
+from .cli import CLI
 __all__ = ['CLI']

hos_m2f-0.5.4/hos_m2f/converters/md_to_docx.py ADDED Viewed

@@ -0,0 +1,257 @@
+"""Markdown到DOCX格式转换器"""
+from typing import Any, Optional, Dict
+from docx import Document
+from docx.shared import Inches, Pt, RGBColor
+from docx.enum.text import WD_ALIGN_PARAGRAPH, WD_UNDERLINE
+from docx.enum.style import WD_STYLE_TYPE
+from hos_m2f.converters.base_converter import BaseConverter
+import mistune
+class MDToDOCXConverter(BaseConverter):
+    """Markdown到DOCX格式转换器"""
+    def convert(self, input_content: str, options: Optional[Dict[str, Any]] = None) -> bytes:
+        """将Markdown转换为DOCX
+        Args:
+            input_content: Markdown内容
+            options: 转换选项
+        Returns:
+            bytes: DOCX文件的二进制数据
+        """
+        if options is None:
+            options = {}
+        # 创建文档
+        doc = Document()
+        # 设置默认样式
+        self._setup_styles(doc)
+        # 自定义渲染器
+        class DOCXRenderer(mistune.HTMLRenderer):
+            def __init__(self, doc):
+                super().__init__()
+                self.doc = doc
+                self.current_paragraph = None
+                self.list_level = 0
+                self.lists = []
+            def paragraph(self, text):
+                if text.strip():
+                    p = self.doc.add_paragraph()
+                    p.add_run(text)
+                return ''
+            def heading(self, text, level):
+                if level == 1:
+                    self.doc.add_heading(text, level=0)
+                else:
+                    self.doc.add_heading(text, level=level-1)
+                return ''
+            def list(self, text, ordered, level, start=None):
+                self.list_level += 1
+                self.lists.append(ordered)
+                return ''
+            def list_item(self, text, level):
+                if text.strip():
+                    p = self.doc.add_paragraph(
+                        text,
+                        style='List Number' if self.lists[level-1] else 'List Bullet'
+                    )
+                    # 缩进
+                    for i in range(level-1):
+                        p.paragraph_format.left_indent += Inches(0.5)
+                return ''
+            def list_end(self, level):
+                self.list_level -= 1
+                if self.lists:
+                    self.lists.pop()
+                return ''
+            def table(self, text):
+                # 解析Markdown表格并转换为DOCX表格
+                try:
+                    # 分割表格行
+                    rows = text.strip().split('\n')
+                    if not rows:
+                        return ''
+                    # 解析表头
+                    header_cells = [cell.strip() for cell in rows[0].split('|') if cell.strip()]
+                    if not header_cells:
+                        return ''
+                    # 创建表格
+                    table = self.doc.add_table(rows=1, cols=len(header_cells))
+                    table.style = 'Table Grid'
+                    # 填充表头
+                    header_row = table.rows[0]
+                    for i, cell_text in enumerate(header_cells):
+                        header_row.cells[i].text = cell_text
+                    # 跳过分隔线行
+                    if len(rows) > 1 and '---' in rows[1]:
+                        data_rows = rows[2:]
+                    else:
+                        data_rows = rows[1:]
+                    # 填充数据行
+                    for row in data_rows:
+                        cells = [cell.strip() for cell in row.split('|') if cell.strip()]
+                        if cells:
+                            new_row = table.add_row()
+                            for i, cell_text in enumerate(cells):
+                                if i < len(new_row.cells):
+                                    new_row.cells[i].text = cell_text
+                except Exception as e:
+                    # 如果解析失败，回退到简单处理
+                    self.doc.add_paragraph('Table: ' + text[:100] + '...')
+                return ''
+            def image(self, text, url=None, title=None, alt=None):
+                try:
+                    # 尝试处理本地和远程图片
+                    import os
+                    import requests
+                    from io import BytesIO
+                    # 使用alt作为替代文本
+                    if alt is None:
+                        alt = text
+                    # 检查是否有图片URL
+                    if not url:
+                        self.doc.add_paragraph(f'Image: {alt}')
+                        return ''
+                    # 检查是否是本地图片
+                    if os.path.exists(url):
+                        # 添加本地图片
+                        self.doc.add_picture(url)
+                    else:
+                        # 尝试从网络获取图片
+                        response = requests.get(url, timeout=5)
+                        if response.status_code == 200:
+                            # 添加远程图片
+                            image_stream = BytesIO(response.content)
+                            self.doc.add_picture(image_stream)
+                        else:
+                            # 如果获取失败，添加图片描述
+                            self.doc.add_paragraph(f'Image: {alt} ({url})')
+                except Exception as e:
+                    # 如果处理失败，添加图片描述
+                    self.doc.add_paragraph(f'Image: {alt or text} ({url or ""})')
+                return ''
+            def link(self, text, url=None, title=None):
+                if text and url:
+                    # 简化处理，直接添加文本和链接
+                    p = self.doc.add_paragraph()
+                    run = p.add_run(text)
+                    run.font.color.rgb = RGBColor(0, 0, 255)  # 蓝色
+                    run.underline = WD_UNDERLINE.SINGLE
+                    p.add_run(f' ({url})')
+                elif text:
+                    p = self.doc.add_paragraph(text)
+                elif url:
+                    p = self.doc.add_paragraph(url)
+                return ''
+            def emphasis(self, text):
+                # 直接添加斜体文本
+                p = self.doc.add_paragraph()
+                run = p.add_run(text)
+                run.italic = True
+                return ''
+            def strong(self, text):
+                # 直接添加粗体文本
+                p = self.doc.add_paragraph()
+                run = p.add_run(text)
+                run.bold = True
+                return ''
+            def codespan(self, text):
+                p = self.doc.add_paragraph()
+                run = p.add_run(text)
+                run.font.name = 'Courier New'
+                return ''
+            def block_code(self, code, info=None):
+                # 处理Mermaid图表
+                if info == 'mermaid':
+                    try:
+                        # 尝试渲染Mermaid图表为图片
+                        mermaid_image = self._render_mermaid(code)
+                        if mermaid_image:
+                            # 添加图片
+                            self.doc.add_picture(mermaid_image)
+                            return ''
+                        else:
+                            # 如果渲染失败，添加代码块
+                            p = self.doc.add_paragraph('Mermaid Chart:')
+                            p = self.doc.add_paragraph(code)
+                            p.paragraph_format.left_indent = Inches(0.5)
+                            return ''
+                    except Exception as e:
+                        # 如果处理失败，添加代码块
+                        p = self.doc.add_paragraph('Mermaid Chart:')
+                        p = self.doc.add_paragraph(code)
+                        p.paragraph_format.left_indent = Inches(0.5)
+                        return ''
+                else:
+                    # 处理普通代码块
+                    p = self.doc.add_paragraph()
+                    run = p.add_run(code)
+                    run.font.name = 'Courier New'
+                    p.paragraph_format.left_indent = Inches(0.5)
+                    return ''
+            def _render_mermaid(self, mermaid_code):
+                """渲染Mermaid图表为图片"""
+                # 简化处理，实际项目中需要使用mermaid-cli或其他工具
+                # 这里返回None，回退到显示代码块
+                return None
+        # 渲染Markdown
+        renderer = DOCXRenderer(doc)
+        markdown = mistune.create_markdown(renderer=renderer)
+        markdown(input_content)
+        # 保存为二进制数据
+        import io
+        output = io.BytesIO()
+        doc.save(output)
+        output.seek(0)
+        return output.getvalue()
+    def _setup_styles(self, doc):
+        """设置文档样式"""
+        styles = doc.styles
+        # 设置正文样式
+        normal_style = styles['Normal']
+        font = normal_style.font
+        font.name = 'Microsoft YaHei'
+        font.size = Pt(12)
+        # 设置标题样式
+        for i in range(1, 6):
+            heading_style = styles[f'Heading {i}']
+            font = heading_style.font
+            font.name = 'Microsoft YaHei'
+            font.size = Pt(14 + (6 - i) * 2)
+            font.bold = True
+    def get_supported_formats(self) -> tuple:
+        """获取支持的格式"""
+        return ('markdown', 'docx')

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f/converters/md_to_epub.py RENAMED Viewed

@@ -44,20 +44,8 @@ class MDToEPUBConverter(BaseConverter):
             book.set_cover('images/cover.jpg', cover_image)
         # 解析Markdown
-        markdown = mistune.create_markdown(
-            plugins=[
-                'url',
-                'abbr',
-                'def_list',
-                'footnotes',
-                'tables',
-                'task_lists',
-                'strikethrough',
-                'highlight',
-                'superscript',
-                'subscript'
-            ]
-        )
+        markdown = mistune.create_markdown()
         # 转换为HTML
         html_content = markdown(input_content)

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f/converters/md_to_html.py RENAMED Viewed

@@ -22,20 +22,8 @@ class MDToHTMLConverter(BaseConverter):
             options = {}
         # 解析Markdown
-        markdown = mistune.create_markdown(
-            plugins=[
-                'url',
-                'abbr',
-                'def_list',
-                'footnotes',
-                'tables',
-                'task_lists',
-                'strikethrough',
-                'highlight',
-                'superscript',
-                'subscript'
-            ]
-        )
+        markdown = mistune.create_markdown()
         # 转换为HTML
         html_content = markdown(input_content)

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f/converters/md_to_json.py RENAMED Viewed

@@ -193,19 +193,30 @@ class MDToJSONConverter(BaseConverter):
                 language = line[3:].strip()
                 # 读取代码内容
-                for i, code_line in enumerate(lines[lines.index(line)+1:]):
-                    if code_line.startswith('```'):
-                        break
-                    code_lines.append(code_line)
-                structure['children'].append({
-                    'type': 'code_block',
-                    'language': language,
-                    'content': '\n'.join(code_lines)
-                })
-                # 跳过已处理的代码行
-                lines = lines[:lines.index(line)] + lines[lines.index(line)+i+2:]
+                try:
+                    line_idx = lines.index(line)
+                    code_end_idx = line_idx + 1
+                    for i, code_line in enumerate(lines[line_idx+1:]):
+                        if code_line.startswith('```'):
+                            code_end_idx = line_idx + i + 1
+                            break
+                        code_lines.append(code_line)
+                        code_end_idx = line_idx + i + 1
+                    structure['children'].append({
+                        'type': 'code_block',
+                        'language': language,
+                        'content': '\n'.join(code_lines)
+                    })
+                    # 跳过已处理的代码行
+                    if code_end_idx < len(lines):
+                        lines = lines[:line_idx] + lines[code_end_idx+1:]
+                    else:
+                        lines = lines[:line_idx]
+                except ValueError:
+                    # 如果找不到行，跳过代码块解析
+                    continue
             # 处理表格
             elif line.startswith('|') and '|' in line[1:]:
@@ -231,19 +242,28 @@ class MDToJSONConverter(BaseConverter):
                 table_lines = [line]
                 # 读取表格内容
-                for i, table_line in enumerate(lines[lines.index(line)+1:]):
-                    if table_line.startswith('|'):
-                        table_lines.append(table_line)
-                    else:
-                        break
+                try:
+                    line_idx = lines.index(line)
+                    for i, table_line in enumerate(lines[line_idx+1:]):
+                        if table_line.startswith('|'):
+                            table_lines.append(table_line)
+                        else:
+                            break
+                except ValueError:
+                    # 如果找不到行，跳过表格解析
+                    continue
                 # 解析表格结构
                 if len(table_lines) >= 2:
                     headers = [h.strip() for h in table_lines[0].split('|') if h.strip()]
                     rows = []
-                    # 跳过分隔线
-                    for table_line in table_lines[2:]:
+                    # 跳过分隔线（如果存在）
+                    start_idx = 1
+                    if len(table_lines) > 1 and any('---' in cell for cell in table_lines[1].split('|')):
+                        start_idx = 2
+                    for table_line in table_lines[start_idx:]:
                         cells = [c.strip() for c in table_line.split('|') if c.strip()]
                         if cells:
                             rows.append(dict(zip(headers, cells)))

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f/converters/md_to_xml.py RENAMED Viewed

@@ -200,19 +200,30 @@ class MDToXMLConverter(BaseConverter):
                 language = line[3:].strip()
                 # 读取代码内容
-                for i, code_line in enumerate(lines[lines.index(line)+1:]):
-                    if code_line.startswith('```'):
-                        break
-                    code_lines.append(code_line)
-                structure['children'].append({
-                    'type': 'code_block',
-                    'language': language,
-                    'content': '\n'.join(code_lines)
-                })
-                # 跳过已处理的代码行
-                lines = lines[:lines.index(line)] + lines[lines.index(line)+i+2:]
+                try:
+                    line_idx = lines.index(line)
+                    code_end_idx = line_idx + 1
+                    for i, code_line in enumerate(lines[line_idx+1:]):
+                        if code_line.startswith('```'):
+                            code_end_idx = line_idx + i + 1
+                            break
+                        code_lines.append(code_line)
+                        code_end_idx = line_idx + i + 1
+                    structure['children'].append({
+                        'type': 'code_block',
+                        'language': language,
+                        'content': '\n'.join(code_lines)
+                    })
+                    # 跳过已处理的代码行
+                    if code_end_idx < len(lines):
+                        lines = lines[:line_idx] + lines[code_end_idx+1:]
+                    else:
+                        lines = lines[:line_idx]
+                except ValueError:
+                    # 如果找不到行，跳过代码块解析
+                    continue
             # 处理表格
             elif line.startswith('|') and '|' in line[1:]:
@@ -238,19 +249,28 @@ class MDToXMLConverter(BaseConverter):
                 table_lines = [line]
                 # 读取表格内容
-                for i, table_line in enumerate(lines[lines.index(line)+1:]):
-                    if table_line.startswith('|'):
-                        table_lines.append(table_line)
-                    else:
-                        break
+                try:
+                    line_idx = lines.index(line)
+                    for i, table_line in enumerate(lines[line_idx+1:]):
+                        if table_line.startswith('|'):
+                            table_lines.append(table_line)
+                        else:
+                            break
+                except ValueError:
+                    # 如果找不到行，跳过表格解析
+                    continue
                 # 解析表格结构
                 if len(table_lines) >= 2:
                     headers = [h.strip() for h in table_lines[0].split('|') if h.strip()]
                     rows = []
-                    # 跳过分隔线
-                    for table_line in table_lines[2:]:
+                    # 跳过分隔线（如果存在）
+                    start_idx = 1
+                    if len(table_lines) > 1 and any('---' in cell for cell in table_lines[1].split('|')):
+                        start_idx = 2
+                    for table_line in table_lines[start_idx:]:
                         cells = [c.strip() for c in table_line.split('|') if c.strip()]
                         if cells:
                             rows.append(dict(zip(headers, cells)))

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: hos-m2f
-Version: 0.5.3
+Version: 0.5.4
 Summary: HOS-M2F: Markdown to Industry Standard Format Compiler Engine
 Author: HOS Team
 Author-email: team@hos-m2f.com

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f.egg-info/SOURCES.txt RENAMED Viewed

@@ -20,4 +20,7 @@ hos_m2f/converters/md_to_epub.py
 hos_m2f/converters/md_to_html.py
 hos_m2f/converters/md_to_json.py
 hos_m2f/converters/md_to_xml.py
-hos_m2f/converters/xml_to_md.py
+hos_m2f/converters/xml_to_md.py
+tests/__init__.py
+tests/test_converters.py
+tests/test_modes.py

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f.egg-info/entry_points.txt RENAMED Viewed

@@ -1,2 +1,3 @@
 [console_scripts]
+hos = hos_m2f.cli.cli:main
 hos-m2f = hos_m2f.cli.cli:main

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/hos_m2f.egg-info/top_level.txt RENAMED Viewed

	@@ -1 +1,2 @@
1 1	hos_m2f
2	+ tests

{hos_m2f-0.5.3 → hos_m2f-0.5.4}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="hos-m2f",
-    version="0.5.3",
+    version="0.5.4",
     description="HOS-M2F: Markdown to Industry Standard Format Compiler Engine",
     long_description="""HOS-M2F is a powerful compiler engine that converts Markdown files to various industry standard formats.
@@ -40,6 +40,7 @@ HOS-M2F simplifies the process of creating professionally formatted documents fr
     ],
     entry_points={
         "console_scripts": [
+            "hos=hos_m2f.cli.cli:main",
             "hos-m2f=hos_m2f.cli.cli:main"
         ]
     },

hos_m2f-0.5.4/tests/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """测试包"""

hos_m2f-0.5.4/tests/test_converters.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""测试转换器模块"""
+import unittest
+import os
+import tempfile
+from hos_m2f.converters.md_to_docx import MDToDOCXConverter
+from hos_m2f.converters.md_to_html import MDToHTMLConverter
+from hos_m2f.converters.md_to_json import MDToJSONConverter
+from hos_m2f.converters.md_to_xml import MDToXMLConverter
+from hos_m2f.converters.md_to_epub import MDToEPUBConverter
+class TestConverters(unittest.TestCase):
+    """测试转换器"""
+    def setUp(self):
+        """设置测试环境"""
+        # 创建测试用的Markdown内容
+        self.test_content = """
+# 测试文档
+这是一个测试文档，用于测试各种格式转换器。
+## 章节1
+这是章节1的内容。
+### 子章节1.1
+这是子章节1.1的内容。
+## 章节2
+这是章节2的内容。
+### 表格测试
+| 列1 | 列2 | 列3 |
+| --- | --- | --- |
+| 行1 | 行1 | 行1 |
+| 行2 | 行2 | 行2 |
+### Mermaid图表测试
+```mermaid
+graph TD
+    A[开始] --> B[处理]
+    B --> C[结束]
+```
+### 链接测试
+[百度](https://www.baidu.com)
+### 图片测试
+![测试图片](https://example.com/test.jpg)
+### 格式化测试
+*斜体文本*
+**粗体文本**
+`代码`
+```python
+print("Hello, world!")
+```
+        """.strip()
+    def test_md_to_docx(self):
+        """测试Markdown到DOCX转换"""
+        converter = MDToDOCXConverter()
+        result = converter.convert(self.test_content)
+        self.assertIsInstance(result, bytes)
+        self.assertGreater(len(result), 0)
+        # 保存为临时文件，以便手动检查
+        with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as tmp:
+            tmp.write(result)
+            tmp_path = tmp.name
+        try:
+            # 验证文件存在且大小大于0
+            self.assertTrue(os.path.exists(tmp_path))
+            self.assertGreater(os.path.getsize(tmp_path), 0)
+        finally:
+            # 清理临时文件
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+    def test_md_to_html(self):
+        """测试Markdown到HTML转换"""
+        converter = MDToHTMLConverter()
+        result = converter.convert(self.test_content)
+        self.assertIsInstance(result, bytes)
+        self.assertGreater(len(result), 0)
+        # 保存为临时文件，以便手动检查
+        with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp:
+            tmp.write(result)
+            tmp_path = tmp.name
+        try:
+            # 验证文件存在且大小大于0
+            self.assertTrue(os.path.exists(tmp_path))
+            self.assertGreater(os.path.getsize(tmp_path), 0)
+        finally:
+            # 清理临时文件
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+    def test_md_to_json(self):
+        """测试Markdown到JSON转换"""
+        converter = MDToJSONConverter()
+        result = converter.convert(self.test_content)
+        self.assertIsInstance(result, bytes)
+        self.assertGreater(len(result), 0)
+        # 保存为临时文件，以便手动检查
+        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp:
+            tmp.write(result)
+            tmp_path = tmp.name
+        try:
+            # 验证文件存在且大小大于0
+            self.assertTrue(os.path.exists(tmp_path))
+            self.assertGreater(os.path.getsize(tmp_path), 0)
+        finally:
+            # 清理临时文件
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+    def test_md_to_xml(self):
+        """测试Markdown到XML转换"""
+        converter = MDToXMLConverter()
+        result = converter.convert(self.test_content)
+        self.assertIsInstance(result, bytes)
+        self.assertGreater(len(result), 0)
+        # 保存为临时文件，以便手动检查
+        with tempfile.NamedTemporaryFile(suffix=".xml", delete=False) as tmp:
+            tmp.write(result)
+            tmp_path = tmp.name
+        try:
+            # 验证文件存在且大小大于0
+            self.assertTrue(os.path.exists(tmp_path))
+            self.assertGreater(os.path.getsize(tmp_path), 0)
+        finally:
+            # 清理临时文件
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+    def test_md_to_epub(self):
+        """测试Markdown到EPUB转换"""
+        converter = MDToEPUBConverter()
+        result = converter.convert(self.test_content)
+        self.assertIsInstance(result, bytes)
+        self.assertGreater(len(result), 0)
+        # 保存为临时文件，以便手动检查
+        with tempfile.NamedTemporaryFile(suffix=".epub", delete=False) as tmp:
+            tmp.write(result)
+            tmp_path = tmp.name
+        try:
+            # 验证文件存在且大小大于0
+            self.assertTrue(os.path.exists(tmp_path))
+            self.assertGreater(os.path.getsize(tmp_path), 0)
+        finally:
+            # 清理临时文件
+            if os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+if __name__ == '__main__':
+    unittest.main()

hos_m2f-0.5.4/tests/test_modes.py ADDED Viewed

@@ -0,0 +1,202 @@
+"""测试模式模块"""
+import unittest
+import os
+import tempfile
+from hos_m2f.modes.book_mode import BookMode
+from hos_m2f.modes.patent_mode import PatentMode
+from hos_m2f.modes.sop_mode import SOPMode
+from hos_m2f.modes.paper_mode import PaperMode
+class TestModes(unittest.TestCase):
+    """测试模式"""
+    def setUp(self):
+        """设置测试环境"""
+        # 创建测试用的Markdown内容
+        self.book_content = """
+# 第1章 引言
+这是引言章节的内容。
+## 1.1 背景
+这是背景部分的内容。
+# 第2章 方法
+这是方法章节的内容。
+## 2.1 实验设计
+这是实验设计部分的内容。
+# 第3章 结果
+这是结果章节的内容。
+# 第4章 结论
+这是结论章节的内容。
+        """.strip()
+        self.patent_content = """
+# 一种新型的太阳能电池
+## 摘要
+本发明涉及一种新型的太阳能电池，具有高效率、低成本的特点。
+## 权利要求
+1. 一种太阳能电池，其特征在于，包括：
+   - 基板
+   - 光吸收层
+   - 电极
+2. 根据权利要求1所述的太阳能电池，其特征在于，所述光吸收层采用钙钛矿材料。
+3. 根据权利要求1所述的太阳能电池，其特征在于，所述电极采用透明导电氧化物。
+## 说明书
+本发明公开了一种新型的太阳能电池，包括基板、光吸收层和电极。所述光吸收层采用钙钛矿材料，具有高效率、低成本的特点。所述电极采用透明导电氧化物，提高了光利用率。
+        """.strip()
+        self.sop_content = """
+# 服务器维护SOP
+## 概述
+本文档描述了服务器日常维护的标准操作流程。
+## 步骤
+1. 检查服务器状态
+2. 更新系统补丁
+3. 备份关键数据
+4. 检查磁盘空间
+5. 检查内存使用情况
+6. 检查CPU负载
+7. 检查网络连接
+8. 生成维护报告
+## 检查项
+- [x] 服务器状态正常
+- [ ] 系统补丁已更新
+- [x] 关键数据已备份
+- [x] 磁盘空间充足
+- [x] 内存使用正常
+- [x] CPU负载正常
+- [x] 网络连接正常
+- [ ] 维护报告已生成
+## 风险评估
+| 风险 | 等级 | 缓解措施 |
+| --- | --- | --- |
+| 系统宕机 | 高 | 提前通知用户，安排在非业务高峰期进行维护 |
+| 数据丢失 | 高 | 多重备份，确保数据安全 |
+| 网络中断 | 中 | 提前检查网络设备，确保网络稳定 |
+        """.strip()
+        self.paper_content = """
+# 深度学习在图像处理中的应用
+## 摘要
+深度学习技术在图像处理领域取得了显著的成果，本文综述了深度学习在图像处理中的主要应用和最新进展。
+## 引言
+图像处理是计算机视觉的重要组成部分，传统的图像处理方法依赖于手工设计的特征提取器，而深度学习技术通过自动学习特征，显著提高了图像处理的性能。
+## 相关工作
+近年来，深度学习在图像处理领域的应用主要包括图像分类、目标检测、图像分割、图像生成等。
+## 方法
+本文采用文献综述的方法，系统分析了深度学习在图像处理中的应用。
+## 结果与讨论
+深度学习技术在图像处理领域取得了显著的成果，特别是在图像分类、目标检测等任务上，性能已经超过了人类专家。
+## 结论
+深度学习技术在图像处理领域具有广阔的应用前景，未来的研究方向包括模型轻量化、多模态融合等。
+## 参考文献
+[1] Krizhevsky A, Sutskever I, Hinton G E. ImageNet classification with deep convolutional neural networks[J]. Communications of the ACM, 2017, 60(6): 84-90.
+[2] Redmon J, Divvala S, Girshick R, et al. You only look once: Unified, real-time object detection[C]//Proceedings of the IEEE conference on computer vision and pattern recognition. 2016: 779-788.
+        """.strip()
+    def test_book_mode(self):
+        """测试Book模式"""
+        mode = BookMode()
+        # 测试处理功能
+        processed_content = mode.process(self.book_content)
+        self.assertIsInstance(processed_content, dict)
+        self.assertIn('book_structure', processed_content)
+        self.assertIn('toc', processed_content)
+        self.assertIn('book_metadata', processed_content)
+        # 测试验证功能
+        validation_result = mode.validate(self.book_content)
+        self.assertIsInstance(validation_result, dict)
+        self.assertIn('valid', validation_result)
+    def test_patent_mode(self):
+        """测试Patent模式"""
+        mode = PatentMode()
+        # 测试处理功能
+        processed_content = mode.process(self.patent_content)
+        self.assertIsInstance(processed_content, dict)
+        # 测试验证功能
+        validation_result = mode.validate(self.patent_content)
+        self.assertIsInstance(validation_result, dict)
+        self.assertIn('valid', validation_result)
+    def test_sop_mode(self):
+        """测试SOP模式"""
+        mode = SOPMode()
+        # 测试处理功能
+        processed_content = mode.process(self.sop_content)
+        self.assertIsInstance(processed_content, dict)
+        # 测试验证功能
+        validation_result = mode.validate(self.sop_content)
+        self.assertIsInstance(validation_result, dict)
+        self.assertIn('valid', validation_result)
+    def test_paper_mode(self):
+        """测试Paper模式"""
+        mode = PaperMode()
+        # 测试处理功能
+        processed_content = mode.process(self.paper_content)
+        self.assertIsInstance(processed_content, dict)
+        # 测试验证功能
+        validation_result = mode.validate(self.paper_content)
+        self.assertIsInstance(validation_result, dict)
+        self.assertIn('valid', validation_result)
+if __name__ == '__main__':
+    unittest.main()

hos_m2f-0.5.3/hos_m2f/converters/md_to_docx.py DELETED Viewed

@@ -1,171 +0,0 @@
-"""Markdown到DOCX格式转换器"""
-from typing import Any, Optional, Dict
-from docx import Document
-from docx.shared import Inches, Pt
-from docx.enum.text import WD_ALIGN_PARAGRAPH
-from docx.enum.style import WD_STYLE_TYPE
-from hos_m2f.converters.base_converter import BaseConverter
-import mistune
-class MDToDOCXConverter(BaseConverter):
-    """Markdown到DOCX格式转换器"""
-    def convert(self, input_content: str, options: Optional[Dict[str, Any]] = None) -> bytes:
-        """将Markdown转换为DOCX
-        Args:
-            input_content: Markdown内容
-            options: 转换选项
-        Returns:
-            bytes: DOCX文件的二进制数据
-        """
-        if options is None:
-            options = {}
-        # 创建文档
-        doc = Document()
-        # 设置默认样式
-        self._setup_styles(doc)
-        # 解析Markdown
-        markdown = mistune.create_markdown(
-            plugins=[
-                'url',
-                'abbr',
-                'def_list',
-                'footnotes',
-                'tables',
-                'task_lists',
-                'strikethrough',
-                'highlight',
-                'superscript',
-                'subscript'
-            ]
-        )
-        # 自定义渲染器
-        class DOCXRenderer(mistune.HTMLRenderer):
-            def __init__(self, doc):
-                super().__init__()
-                self.doc = doc
-                self.current_paragraph = None
-                self.list_level = 0
-                self.lists = []
-            def paragraph(self, text):
-                if text.strip():
-                    p = self.doc.add_paragraph()
-                    p.add_run(text)
-                return ''
-            def heading(self, text, level):
-                if level == 1:
-                    self.doc.add_heading(text, level=0)
-                else:
-                    self.doc.add_heading(text, level=level-1)
-                return ''
-            def list(self, text, ordered, level, start=None):
-                self.list_level += 1
-                self.lists.append(ordered)
-                return ''
-            def list_item(self, text, level):
-                if text.strip():
-                    p = self.doc.add_paragraph(
-                        text,
-                        style='List Number' if self.lists[level-1] else 'List Bullet'
-                    )
-                    # 缩进
-                    for i in range(level-1):
-                        p.paragraph_format.left_indent += Inches(0.5)
-                return ''
-            def list_end(self, level):
-                self.list_level -= 1
-                if self.lists:
-                    self.lists.pop()
-                return ''
-            def table(self, text):
-                # 简化处理，实际项目中需要更复杂的表格解析
-                self.doc.add_paragraph('Table: ' + text[:100] + '...')
-                return ''
-            def image(self, src, alt='', title=None):
-                try:
-                    # 简化处理，实际项目中需要处理本地和远程图片
-                    self.doc.add_paragraph(f'Image: {alt} ({src})')
-                except Exception:
-                    pass
-                return ''
-            def link(self, link, text=None, title=None):
-                if text:
-                    p = self.doc.add_paragraph()
-                    run = p.add_run(text)
-                    # 实际项目中需要添加超链接
-                return ''
-            def emphasis(self, text):
-                if self.current_paragraph:
-                    run = self.current_paragraph.add_run(text)
-                    run.italic = True
-                return ''
-            def strong(self, text):
-                if self.current_paragraph:
-                    run = self.current_paragraph.add_run(text)
-                    run.bold = True
-                return ''
-            def codespan(self, text):
-                p = self.doc.add_paragraph()
-                run = p.add_run(text)
-                run.font.name = 'Courier New'
-                return ''
-            def block_code(self, code, lang=None):
-                p = self.doc.add_paragraph()
-                run = p.add_run(code)
-                run.font.name = 'Courier New'
-                p.paragraph_format.left_indent = Inches(0.5)
-                return ''
-        # 渲染Markdown
-        renderer = DOCXRenderer(doc)
-        markdown(input_content, renderer)
-        # 保存为二进制数据
-        import io
-        output = io.BytesIO()
-        doc.save(output)
-        output.seek(0)
-        return output.getvalue()
-    def _setup_styles(self, doc):
-        """设置文档样式"""
-        styles = doc.styles
-        # 设置正文样式
-        normal_style = styles['Normal']
-        font = normal_style.font
-        font.name = 'Microsoft YaHei'
-        font.size = Pt(12)
-        # 设置标题样式
-        for i in range(1, 6):
-            heading_style = styles[f'Heading {i}']
-            font = heading_style.font
-            font.name = 'Microsoft YaHei'
-            font.size = Pt(14 + (6 - i) * 2)
-            font.bold = True
-    def get_supported_formats(self) -> tuple:
-        """获取支持的格式"""
-        return ('markdown', 'docx')