md2word 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
md2word/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ """
2
+ md2word - Convert Markdown to Word documents.
3
+
4
+ A Python library and CLI tool for converting Markdown files to Word documents (.docx)
5
+ with extensive customization options.
6
+ """
7
+
8
+ from .config import CHINESE_FONT_SIZE_MAP, DEFAULT_CONFIG, Config, StyleConfig
9
+ from .converter import convert, convert_file
10
+ from .latex import extract_latex_formulas, latex_to_omml
11
+
12
+ __version__ = "0.1.0"
13
+ __all__ = [
14
+ "Config",
15
+ "StyleConfig",
16
+ "DEFAULT_CONFIG",
17
+ "CHINESE_FONT_SIZE_MAP",
18
+ "convert",
19
+ "convert_file",
20
+ "extract_latex_formulas",
21
+ "latex_to_omml",
22
+ ]
md2word/__main__.py ADDED
@@ -0,0 +1,91 @@
1
+ """
2
+ CLI entry point for md2word.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import argparse
8
+ import sys
9
+ from pathlib import Path
10
+
11
+ from .config import DEFAULT_CONFIG, Config
12
+ from .converter import convert_file
13
+
14
+
15
+ def main() -> int:
16
+ """Main CLI entry point."""
17
+ parser = argparse.ArgumentParser(
18
+ prog="md2word",
19
+ description="Convert Markdown files to Word documents (.docx)",
20
+ formatter_class=argparse.RawDescriptionHelpFormatter,
21
+ epilog="""
22
+ Examples:
23
+ md2word input.md Convert to input.docx
24
+ md2word input.md -o output.docx Specify output file
25
+ md2word input.md --toc Add table of contents
26
+ md2word input.md -c config.json Use custom config file
27
+ md2word --init-config Generate default config file
28
+ """,
29
+ )
30
+ parser.add_argument("input", nargs="?", help="Input Markdown file path")
31
+ parser.add_argument("-o", "--output", help="Output Word file path (default: input with .docx extension)")
32
+ parser.add_argument("-c", "--config", default="config.json", help="Config file path (default: config.json)")
33
+ parser.add_argument("--toc", action="store_true", help="Add table of contents at the beginning")
34
+ parser.add_argument("--toc-title", default="目录", help="TOC title (default: 目录)")
35
+ parser.add_argument("--toc-level", type=int, default=3, help="Maximum heading level for TOC (default: 3)")
36
+ parser.add_argument("--init-config", action="store_true", help="Generate default config file")
37
+ parser.add_argument("-v", "--version", action="store_true", help="Show version")
38
+
39
+ args = parser.parse_args()
40
+
41
+ if args.version:
42
+ from . import __version__
43
+
44
+ print(f"md2word {__version__}")
45
+ return 0
46
+
47
+ if args.init_config:
48
+ import json
49
+
50
+ config_path = Path(args.config)
51
+ if config_path.exists():
52
+ print(f"[ERROR] Config file already exists: {config_path}")
53
+ return 1
54
+ with open(config_path, "w", encoding="utf-8") as f:
55
+ json.dump(DEFAULT_CONFIG, f, ensure_ascii=False, indent=4)
56
+ print(f"[INFO] Config file created: {config_path}")
57
+ return 0
58
+
59
+ if not args.input:
60
+ parser.print_help()
61
+ return 1
62
+
63
+ input_path = Path(args.input)
64
+ if not input_path.exists():
65
+ print(f"[ERROR] Input file not found: {input_path}")
66
+ return 1
67
+
68
+ # Load config
69
+ config_path = Path(args.config)
70
+ if config_path.exists():
71
+ config = Config.from_file(config_path)
72
+ else:
73
+ config = Config()
74
+
75
+ try:
76
+ convert_file(
77
+ input_path,
78
+ args.output,
79
+ config,
80
+ toc=args.toc,
81
+ toc_title=args.toc_title,
82
+ toc_max_level=args.toc_level,
83
+ )
84
+ return 0
85
+ except Exception as e:
86
+ print(f"[ERROR] Conversion failed: {e}")
87
+ return 1
88
+
89
+
90
+ if __name__ == "__main__":
91
+ sys.exit(main())
md2word/config.py ADDED
@@ -0,0 +1,351 @@
1
+ """Configuration classes for md2word."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ from dataclasses import dataclass, field
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ # Chinese font size mapping (font name -> point size)
11
+ CHINESE_FONT_SIZE_MAP: dict[str, float] = {
12
+ "初号": 42,
13
+ "小初": 36,
14
+ "一号": 26,
15
+ "小一": 24,
16
+ "二号": 22,
17
+ "小二": 18,
18
+ "三号": 16,
19
+ "小三": 15,
20
+ "四号": 14,
21
+ "小四": 12,
22
+ "五号": 10.5,
23
+ "小五": 9,
24
+ "六号": 7.5,
25
+ "小六": 6.5,
26
+ "七号": 5.5,
27
+ "八号": 5,
28
+ }
29
+
30
+
31
+ def parse_font_size(value: int | float | str) -> float:
32
+ """Parse font size, supporting both numeric (points) and Chinese font sizes."""
33
+ if isinstance(value, (int, float)):
34
+ return float(value)
35
+ if isinstance(value, str):
36
+ value = value.strip()
37
+ if value in CHINESE_FONT_SIZE_MAP:
38
+ return CHINESE_FONT_SIZE_MAP[value]
39
+ try:
40
+ return float(value)
41
+ except ValueError:
42
+ pass
43
+ print(f"[WARN] Unrecognized font size: {value}, using default 10.5 (五号)")
44
+ return 10.5
45
+
46
+
47
+ @dataclass
48
+ class StyleConfig:
49
+ """Style configuration for document elements."""
50
+
51
+ font_name: str = "微软雅黑"
52
+ font_size: float = 11
53
+ bold: bool = False
54
+ italic: bool = False
55
+ color: str = "000000"
56
+ space_before: int = 0
57
+ space_after: int = 6
58
+ line_spacing: float = 1.0
59
+ left_indent: float = 0
60
+ background_color: str | None = None
61
+ # Extended configuration
62
+ alignment: str = "left" # left, center, right, justify
63
+ line_spacing_rule: str = "multiple" # single, 1.5, double, multiple, exact, at_least
64
+ line_spacing_value: float | None = None # Line spacing value (points or multiple)
65
+ first_line_indent: float = 0 # First line indent (in characters)
66
+ is_heading: bool = True # Whether to treat as heading (for TOC)
67
+ numbering_format: str | None = None # Numbering format
68
+
69
+ @classmethod
70
+ def from_dict(cls, data: dict[str, Any], default_font: str = "微软雅黑") -> StyleConfig:
71
+ """Create StyleConfig from dictionary."""
72
+ return cls(
73
+ font_name=data.get("font_name", default_font),
74
+ font_size=parse_font_size(data.get("font_size", 11)),
75
+ bold=data.get("bold", False),
76
+ italic=data.get("italic", False),
77
+ color=data.get("color", "000000"),
78
+ space_before=data.get("space_before", 0),
79
+ space_after=data.get("space_after", 6),
80
+ line_spacing=data.get("line_spacing", 1.0),
81
+ left_indent=data.get("left_indent", 0),
82
+ background_color=data.get("background_color"),
83
+ alignment=data.get("alignment", "left"),
84
+ line_spacing_rule=data.get("line_spacing_rule", "multiple"),
85
+ line_spacing_value=data.get("line_spacing_value"),
86
+ first_line_indent=data.get("first_line_indent", 0),
87
+ is_heading=data.get("is_heading", True),
88
+ numbering_format=data.get("numbering_format"),
89
+ )
90
+
91
+ def to_dict(self) -> dict[str, Any]:
92
+ """Convert to dictionary."""
93
+ return {
94
+ "font_name": self.font_name,
95
+ "font_size": self.font_size,
96
+ "bold": self.bold,
97
+ "italic": self.italic,
98
+ "color": self.color,
99
+ "space_before": self.space_before,
100
+ "space_after": self.space_after,
101
+ "line_spacing": self.line_spacing,
102
+ "left_indent": self.left_indent,
103
+ "background_color": self.background_color,
104
+ "alignment": self.alignment,
105
+ "line_spacing_rule": self.line_spacing_rule,
106
+ "line_spacing_value": self.line_spacing_value,
107
+ "first_line_indent": self.first_line_indent,
108
+ "is_heading": self.is_heading,
109
+ "numbering_format": self.numbering_format,
110
+ }
111
+
112
+
113
+ @dataclass
114
+ class TableConfig:
115
+ """Table style configuration."""
116
+
117
+ # Border settings
118
+ border_style: str = "single" # single, double, dotted, dashed, none
119
+ border_color: str = "000000"
120
+ border_width: int = 4 # in eighths of a point (4 = 0.5pt, 8 = 1pt)
121
+
122
+ # Background colors
123
+ header_background_color: str | None = None # e.g., "D9E2F3" for light blue
124
+ cell_background_color: str | None = None
125
+ alternating_row_color: str | None = None # For zebra striping
126
+
127
+ # Cell padding (in points)
128
+ cell_padding_top: float = 2
129
+ cell_padding_bottom: float = 2
130
+ cell_padding_left: float = 5
131
+ cell_padding_right: float = 5
132
+
133
+ # Table width
134
+ width_mode: str = "auto" # auto, full (100% page width), fixed
135
+ width_inches: float | None = None # Used when width_mode is "fixed"
136
+
137
+ @classmethod
138
+ def from_dict(cls, data: dict[str, Any]) -> TableConfig:
139
+ """Create TableConfig from dictionary."""
140
+ return cls(
141
+ border_style=data.get("border_style", "single"),
142
+ border_color=data.get("border_color", "000000"),
143
+ border_width=data.get("border_width", 4),
144
+ header_background_color=data.get("header_background_color"),
145
+ cell_background_color=data.get("cell_background_color"),
146
+ alternating_row_color=data.get("alternating_row_color"),
147
+ cell_padding_top=data.get("cell_padding_top", 2),
148
+ cell_padding_bottom=data.get("cell_padding_bottom", 2),
149
+ cell_padding_left=data.get("cell_padding_left", 5),
150
+ cell_padding_right=data.get("cell_padding_right", 5),
151
+ width_mode=data.get("width_mode", "auto"),
152
+ width_inches=data.get("width_inches"),
153
+ )
154
+
155
+ def to_dict(self) -> dict[str, Any]:
156
+ """Convert to dictionary."""
157
+ return {
158
+ "border_style": self.border_style,
159
+ "border_color": self.border_color,
160
+ "border_width": self.border_width,
161
+ "header_background_color": self.header_background_color,
162
+ "cell_background_color": self.cell_background_color,
163
+ "alternating_row_color": self.alternating_row_color,
164
+ "cell_padding_top": self.cell_padding_top,
165
+ "cell_padding_bottom": self.cell_padding_bottom,
166
+ "cell_padding_left": self.cell_padding_left,
167
+ "cell_padding_right": self.cell_padding_right,
168
+ "width_mode": self.width_mode,
169
+ "width_inches": self.width_inches,
170
+ }
171
+
172
+
173
+ @dataclass
174
+ class Config:
175
+ """Global configuration for md2word converter."""
176
+
177
+ default_font: str = "微软雅黑"
178
+ page_width_inches: float = 8.5
179
+ page_height_inches: float = 11
180
+ max_image_width_inches: float = 6.0
181
+ image_local_dir: str = "./images"
182
+ image_download_timeout: int = 30
183
+ image_user_agent: str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
184
+ styles: dict[str, StyleConfig] = field(default_factory=dict)
185
+ table: TableConfig = field(default_factory=TableConfig)
186
+
187
+ @classmethod
188
+ def from_file(cls, config_path: str | Path) -> Config:
189
+ """Load configuration from JSON file."""
190
+ config_path = Path(config_path)
191
+ if not config_path.exists():
192
+ print(f"Config file not found: {config_path}, using defaults")
193
+ return cls()
194
+
195
+ with open(config_path, encoding="utf-8") as f:
196
+ data = json.load(f)
197
+
198
+ return cls.from_dict(data)
199
+
200
+ @classmethod
201
+ def from_dict(cls, data: dict[str, Any]) -> Config:
202
+ """Create Config from dictionary."""
203
+ config = cls()
204
+
205
+ # Document configuration
206
+ doc_config = data.get("document", {})
207
+ config.default_font = doc_config.get("default_font", config.default_font)
208
+ config.page_width_inches = doc_config.get("page_width_inches", config.page_width_inches)
209
+ config.page_height_inches = doc_config.get("page_height_inches", config.page_height_inches)
210
+ config.max_image_width_inches = doc_config.get("max_image_width_inches", config.max_image_width_inches)
211
+
212
+ # Image configuration
213
+ img_config = data.get("image", {})
214
+ config.image_local_dir = img_config.get("local_dir", config.image_local_dir)
215
+ config.image_download_timeout = img_config.get("download_timeout", config.image_download_timeout)
216
+ config.image_user_agent = img_config.get("user_agent", config.image_user_agent)
217
+
218
+ # Style configuration
219
+ styles_data = data.get("styles", {})
220
+ for style_name, style_config in styles_data.items():
221
+ config.styles[style_name] = StyleConfig.from_dict(style_config, config.default_font)
222
+
223
+ # Table configuration
224
+ table_data = data.get("table", {})
225
+ if table_data:
226
+ config.table = TableConfig.from_dict(table_data)
227
+
228
+ return config
229
+
230
+ def get_style(self, style_name: str) -> StyleConfig:
231
+ """Get style configuration by name, returns default if not found."""
232
+ return self.styles.get(style_name, StyleConfig(font_name=self.default_font))
233
+
234
+ def to_dict(self) -> dict[str, Any]:
235
+ """Convert to dictionary."""
236
+ return {
237
+ "document": {
238
+ "default_font": self.default_font,
239
+ "page_width_inches": self.page_width_inches,
240
+ "page_height_inches": self.page_height_inches,
241
+ "max_image_width_inches": self.max_image_width_inches,
242
+ },
243
+ "image": {
244
+ "local_dir": self.image_local_dir,
245
+ "download_timeout": self.image_download_timeout,
246
+ "user_agent": self.image_user_agent,
247
+ },
248
+ "styles": {name: style.to_dict() for name, style in self.styles.items()},
249
+ "table": self.table.to_dict(),
250
+ }
251
+
252
+ def save(self, path: str | Path) -> None:
253
+ """Save configuration to JSON file."""
254
+ with open(path, "w", encoding="utf-8") as f:
255
+ json.dump(self.to_dict(), f, ensure_ascii=False, indent=4)
256
+
257
+
258
+ # Default configuration template
259
+ DEFAULT_CONFIG = {
260
+ "document": {
261
+ "default_font": "仿宋",
262
+ "page_width_inches": 8.5,
263
+ "page_height_inches": 11,
264
+ "max_image_width_inches": 6.0,
265
+ },
266
+ "styles": {
267
+ "heading_1": {
268
+ "font_name": "黑体",
269
+ "font_size": "三号",
270
+ "bold": True,
271
+ "alignment": "center",
272
+ "line_spacing_rule": "exact",
273
+ "line_spacing_value": 28,
274
+ "first_line_indent": 0,
275
+ "space_before": 24,
276
+ "space_after": 12,
277
+ "numbering_format": "chapter",
278
+ },
279
+ "heading_2": {
280
+ "font_name": "黑体",
281
+ "font_size": "三号",
282
+ "bold": True,
283
+ "alignment": "left",
284
+ "line_spacing_rule": "exact",
285
+ "line_spacing_value": 28,
286
+ "first_line_indent": 2,
287
+ "space_before": 12,
288
+ "space_after": 6,
289
+ "numbering_format": "section",
290
+ },
291
+ "heading_3": {
292
+ "font_name": "黑体",
293
+ "font_size": "三号",
294
+ "bold": True,
295
+ "alignment": "center",
296
+ "line_spacing_rule": "exact",
297
+ "line_spacing_value": 28,
298
+ "first_line_indent": 0,
299
+ "space_before": 6,
300
+ "space_after": 6,
301
+ "numbering_format": "chinese",
302
+ },
303
+ "body": {
304
+ "font_name": "仿宋",
305
+ "font_size": 11,
306
+ "alignment": "justify",
307
+ "line_spacing_rule": "multiple",
308
+ "line_spacing_value": 1.5,
309
+ "first_line_indent": 2,
310
+ "space_before": 0,
311
+ "space_after": 10,
312
+ },
313
+ "code": {
314
+ "font_name": "Consolas",
315
+ "font_size": 10,
316
+ "alignment": "left",
317
+ "line_spacing_rule": "single",
318
+ "first_line_indent": 0,
319
+ "background_color": "f5f5f5",
320
+ },
321
+ "blockquote": {
322
+ "font_name": "仿宋",
323
+ "font_size": 11,
324
+ "italic": True,
325
+ "color": "666666",
326
+ "alignment": "left",
327
+ "line_spacing_rule": "multiple",
328
+ "line_spacing_value": 1.5,
329
+ "left_indent": 0.5,
330
+ "first_line_indent": 0,
331
+ },
332
+ "table_header": {
333
+ "font_name": "仿宋",
334
+ "font_size": 11,
335
+ "bold": True,
336
+ "alignment": "center",
337
+ "line_spacing_rule": "single",
338
+ },
339
+ "table_cell": {
340
+ "font_name": "仿宋",
341
+ "font_size": 11,
342
+ "alignment": "left",
343
+ "line_spacing_rule": "single",
344
+ },
345
+ },
346
+ "image": {
347
+ "local_dir": "./images",
348
+ "download_timeout": 30,
349
+ "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
350
+ },
351
+ }