mdproc 0.2.1__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ .env
2
+ .venv
3
+ __pycache__/
4
+ dist/
5
+ src/mdproc/assets/mermaid.bundle.js
@@ -0,0 +1,41 @@
1
+ {
2
+ "configurations": [
3
+ {
4
+ "name": "Python Debugger: Current File",
5
+ "type": "debugpy",
6
+ "request": "launch",
7
+ "program": "${file}",
8
+ "console": "integratedTerminal"
9
+ },
10
+ {
11
+ "name": "Python Debugger: mdimgupload",
12
+ "type": "debugpy",
13
+ "request": "launch",
14
+ "module": "mdproc.mdimgupload",
15
+ "args": [
16
+ "mdproc/demo.md"
17
+ ],
18
+ "cwd": "${workspaceFolder}/src",
19
+ },
20
+ {
21
+ "name": "Python Debugger: mdtable2img",
22
+ "type": "debugpy",
23
+ "request": "launch",
24
+ "module": "mdproc.mdtable2img",
25
+ "args": [
26
+ "mdproc/demo_table.md"
27
+ ],
28
+ "cwd": "${workspaceFolder}/src",
29
+ },
30
+ {
31
+ "name": "Python Debugger: mdmermaid2img",
32
+ "type": "debugpy",
33
+ "request": "launch",
34
+ "module": "mdproc.mdmermaid2img",
35
+ "args": [
36
+ "mdproc/demo_mermaid.md"
37
+ ],
38
+ "cwd": "${workspaceFolder}/src",
39
+ }
40
+ ]
41
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ "editor.formatOnSave": true,
3
+ "editor.codeActionsOnSave": {
4
+ "source.fixAll.ruff": "always"
5
+ }
6
+ }
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mdproc
3
- Version: 0.2.1
3
+ Version: 0.3.0
4
4
  Summary: A tool to process markdown files.
5
5
  Project-URL: Homepage, https://github.com/honghe/mdproc
6
6
  Project-URL: Repository, https://github.com/honghe/mdproc
@@ -19,6 +19,8 @@ Classifier: Topic :: Multimedia :: Video
19
19
  Requires-Python: >=3.10
20
20
  Requires-Dist: cos-python-sdk-v5
21
21
  Requires-Dist: httpx
22
+ Requires-Dist: markdown-it-py
23
+ Requires-Dist: playwright
22
24
  Requires-Dist: python-dotenv
23
25
  Provides-Extra: dev
24
26
  Requires-Dist: build; extra == 'dev'
@@ -32,10 +34,13 @@ A simple Python tool to process markdown files.
32
34
  ## Features
33
35
 
34
36
  - Markdown Image Uploader to COS.
37
+ - Convert Markdown tables to images and upload to COS.
38
+ - Convert mermaid chart to image. (dependency `npm install -g @mermaid-js/mermaid-cli`)
35
39
 
36
40
  ## Config
37
41
 
38
42
  `.env` or configure environment variables:
43
+
39
44
  ```
40
45
  COS_SECRET_ID=<xyz>
41
46
  COS_SECRET_KEY=<xyz>
@@ -45,27 +50,49 @@ COS_BUCKET=<xyz>
45
50
 
46
51
  ## Usage
47
52
 
48
- 1. Install dependencies:
49
- ```bash
50
- pip install mdproc
51
- ```
52
- 2. Run the script:
53
- ```bash
54
- mdproc-imgupload your_markdown.md
55
- ```
53
+ - Install dependencies:
54
+ ```bash
55
+ pip install mdproc
56
+ # for md-table2img
57
+ playwright install chromium
58
+ ```
59
+ - Markdown images upload:
60
+ ```bash
61
+ mdproc-imgupload your_markdown.md
62
+ ```
63
+ - Markdown table to image:
64
+ ```bash
65
+ mdproc-table2img your_markdown.md
66
+ ```
67
+ - Markdown mermaid to image:
68
+ ```bash
69
+ mdproc-mermaid2img your_markdown.md
70
+ ```
56
71
 
57
72
  ## Demo
58
73
 
59
74
  demo.md:
75
+
60
76
  ```
61
77
  ![first-version](https://www.python.org/static/img/python-logo.png)
62
78
  ```
63
79
 
64
80
  demo_output.md
81
+
65
82
  ```
66
83
  ![first-version](https://pic-1251484506.cos.ap-guangzhou.myqcloud.com/imgs/python-logo_ae79195a.png)
67
84
  ```
68
85
 
86
+ ## mermaid2img Benchmark
87
+
88
+ Note: Browser is Chromium. mermaid-cli use puppeteer.
89
+
90
+ | mermaid2img | Cold Start /s | Warm Start /s |
91
+ | --------------------------------- | ------------- | ------------- |
92
+ | playwright (memaidjs cdn) | 2.5 | 1.5 |
93
+ | playwright (local mermaid bundle) | 2.5 | 1.5 |
94
+ | mermaid-cli | 5.7 | 3.7 |
95
+
69
96
  ## License
70
97
 
71
- Apache License
98
+ Apache License
mdproc-0.3.0/README.md ADDED
@@ -0,0 +1,69 @@
1
+ # mdproc
2
+
3
+ A simple Python tool to process markdown files.
4
+
5
+ ## Features
6
+
7
+ - Markdown Image Uploader to COS.
8
+ - Convert Markdown tables to images and upload to COS.
9
+ - Convert mermaid chart to image. (dependency `npm install -g @mermaid-js/mermaid-cli`)
10
+
11
+ ## Config
12
+
13
+ `.env` or configure environment variables:
14
+
15
+ ```
16
+ COS_SECRET_ID=<xyz>
17
+ COS_SECRET_KEY=<xyz>
18
+ COS_REGION=<xyz>
19
+ COS_BUCKET=<xyz>
20
+ ```
21
+
22
+ ## Usage
23
+
24
+ - Install dependencies:
25
+ ```bash
26
+ pip install mdproc
27
+ # for md-table2img
28
+ playwright install chromium
29
+ ```
30
+ - Markdown images upload:
31
+ ```bash
32
+ mdproc-imgupload your_markdown.md
33
+ ```
34
+ - Markdown table to image:
35
+ ```bash
36
+ mdproc-table2img your_markdown.md
37
+ ```
38
+ - Markdown mermaid to image:
39
+ ```bash
40
+ mdproc-mermaid2img your_markdown.md
41
+ ```
42
+
43
+ ## Demo
44
+
45
+ demo.md:
46
+
47
+ ```
48
+ ![first-version](https://www.python.org/static/img/python-logo.png)
49
+ ```
50
+
51
+ demo_output.md
52
+
53
+ ```
54
+ ![first-version](https://pic-1251484506.cos.ap-guangzhou.myqcloud.com/imgs/python-logo_ae79195a.png)
55
+ ```
56
+
57
+ ## mermaid2img Benchmark
58
+
59
+ Note: Browser is Chromium. mermaid-cli use puppeteer.
60
+
61
+ | mermaid2img | Cold Start /s | Warm Start /s |
62
+ | --------------------------------- | ------------- | ------------- |
63
+ | playwright (memaidjs cdn) | 2.5 | 1.5 |
64
+ | playwright (local mermaid bundle) | 2.5 | 1.5 |
65
+ | mermaid-cli | 5.7 | 3.7 |
66
+
67
+ ## License
68
+
69
+ Apache License
@@ -4,12 +4,18 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "mdproc"
7
- version = "0.2.1"
7
+ version = "0.3.0"
8
8
  description = "A tool to process markdown files."
9
9
  authors = [{ name = "Honghe" }]
10
10
  readme = "README.md"
11
11
  requires-python = ">=3.10"
12
- dependencies = ["httpx", "python-dotenv", "cos-python-sdk-v5"]
12
+ dependencies = [
13
+ "httpx",
14
+ "python-dotenv",
15
+ "cos-python-sdk-v5",
16
+ "markdown-it-py",
17
+ "playwright",
18
+ ]
13
19
  keywords = ["markdown", "jpg", "png", "process"]
14
20
 
15
21
  classifiers = [
@@ -27,7 +33,9 @@ classifiers = [
27
33
  [project.scripts]
28
34
  mdproc = "mdproc.mdproc:main"
29
35
  mdproc-imgupload = "mdproc.mdimgupload:main"
30
-
36
+ mdproc-forzhihu = "mdproc.mdforzhihu:main"
37
+ mdproc-table2img = "mdproc.mdtable2img:main"
38
+ mdproc-mermaid2img = "mdproc.mdmermaid2img:main"
31
39
 
32
40
  [project.urls]
33
41
  Homepage = "https://github.com/honghe/mdproc"
@@ -0,0 +1,12 @@
1
+ This is doc.
2
+
3
+ ```mermaid
4
+ graph TD
5
+ A[Start] --> B{Is it?}
6
+ B -->|Yes| C[OK]
7
+ C --> D[Rethink]
8
+ D --> B
9
+ B ---->|No| E[End]
10
+ ```
11
+
12
+ Done.
@@ -0,0 +1,5 @@
1
+ This is doc.
2
+
3
+ ![mermaid 1](https://pic-1251484506.cos.ap-guangzhou.myqcloud.com/imgs/mermaid_1031221472.png)
4
+
5
+ Done.
@@ -0,0 +1,10 @@
1
+ table
2
+
3
+ | ID | txt_0 | txt_1 | txt_2 | txt_3 | txt_4 | txt_5 | txt_6 | txt_7 | txt_8 | txt_9 | cat_Appetizers & Sides | cat_Aussie Pub Classics | cat_Burgers & Sandwiches | cat_Drinks & Desserts | cat_Mexican Specialties | cat_Pasta & Risotto | cat_Pizzas | cat_Salads & Healthy Options | is_coffee | price |
4
+ | ------- | ---------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ----------- | ---------------------- | ----------------------- | ------------------------ | --------------------- | ----------------------- | ------------------- | ---------- | ---------------------------- | --------- | ------ |
5
+ | 8 | 0.3354452 | 0.36037982 | -0.04443971 | 0.14370468 | -0.19956689 | -0.17493485 | -0.18741444 | -0.02776922 | -0.07173516 | -0.11751403 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0.3887 |
6
+ | 42 | 0.3015529 | 0.28032377 | 0.03035132 | 0.21287075 | 0.04236558 | -0.054545 | -0.10349114 | -0.13550489 | -0.04504355 | -0.22817583 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0.5832 |
7
+ | 61 | 0.53950787 | -0.020039 | -0.36858445 | -0.10636957 | 0.00259933 | 0.15990224 | 0.04153050 | 0.11348728 | -0.02482079 | -0.23463035 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.6110 |
8
+ | 101 | 0.20630628 | -0.04121789 | 0.11134595 | -0.2160106 | 0.00511632 | -0.20131038 | 0.05482014 | -0.19734132 | 0.35356910 | 0.23985470 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0.2765 |
9
+
10
+ end
@@ -0,0 +1,3 @@
1
+ table
2
+ ![Table 1](https://pic-1251484506.cos.ap-guangzhou.myqcloud.com/imgs/table_1.png)
3
+ end
@@ -0,0 +1,64 @@
1
+ from markdown_it import MarkdownIt
2
+
3
+
4
+ def extract_raw_tables(md_text):
5
+ """
6
+ Extracts the raw markdown strings of tables from a given markdown text.
7
+ """
8
+ # Configure the parser to enable tables (GFM-like is a good preset)
9
+ md = MarkdownIt("gfm-like", {"linkify": False}).enable("table")
10
+
11
+ # Parse the markdown into tokens
12
+ tokens = md.parse(md_text, {})
13
+
14
+ raw_tables = []
15
+ current_table_start = None
16
+
17
+ for i, token in enumerate(tokens):
18
+ if token.type == "table_open":
19
+ # Store the starting line number if source map is available
20
+ if token.map:
21
+ current_table_start = token.map[0]
22
+
23
+ if token.type == "table_close":
24
+ # If we have a start, extract the lines up to the end line number
25
+ if current_table_start is not None and token.map:
26
+ current_table_end = token.map[1]
27
+ # Extract the relevant lines from the original text
28
+ table_lines = md_text.splitlines()[
29
+ current_table_start:current_table_end
30
+ ]
31
+ raw_tables.append("\n".join(table_lines))
32
+ current_table_start = None
33
+
34
+ return raw_tables
35
+
36
+
37
+ def main():
38
+ # Example usage:
39
+ markdown_content = """
40
+ Here is some introductory text.
41
+
42
+ | Header 1 | Header 2 |
43
+ |---|---|
44
+ | Cell 1 | Cell 2 |
45
+ | Cell 3 | Cell 4 |
46
+
47
+ Some text in between.
48
+
49
+ | Name | Age |
50
+ |---|---|
51
+ | Alice | 30 |
52
+ | Bob | 25 |
53
+ """
54
+
55
+ tables = extract_raw_tables(markdown_content)
56
+
57
+ for i, table_str in enumerate(tables):
58
+ print(f"--- Table {i + 1} Raw String ---")
59
+ print(table_str)
60
+ print("----------------------------\n")
61
+
62
+
63
+ if __name__ == "__main__":
64
+ main()
@@ -0,0 +1,48 @@
1
+ import re
2
+ import os
3
+ import argparse
4
+ def main():
5
+ # delete (multi) empty lines before and after img tags
6
+ parser = argparse.ArgumentParser(
7
+ description="Process markdown file for Zhihu."
8
+ )
9
+ parser.add_argument("input_file", help="Path to the input markdown file.")
10
+ args = parser.parse_args()
11
+ input_file = args.input_file
12
+ output_file = f"{os.path.splitext(input_file)[0]}_4zhihu.md"
13
+ with open(input_file, "r", encoding="utf-8") as f:
14
+ lines = f.readlines()
15
+ new_lines = []
16
+ i = 0
17
+ n = len(lines)
18
+ img_tag_count = 0
19
+ removed_empty_count = 0
20
+ while i < n:
21
+ line = lines[i]
22
+ stripped = line.strip()
23
+ if re.match(r"!\[.*?\]\(.*?\)", stripped):
24
+ img_tag_count += 1
25
+ # Remove all empty lines before img tag
26
+ before = len(new_lines)
27
+ while new_lines and new_lines[-1].strip() == "":
28
+ new_lines.pop()
29
+ removed_empty_count += before - len(new_lines)
30
+ new_lines.append(line)
31
+ # Skip all empty lines after img tag
32
+ j = i + 1
33
+ after = 0
34
+ while j < n and lines[j].strip() == "":
35
+ after += 1
36
+ j += 1
37
+ removed_empty_count += after
38
+ i = j
39
+ else:
40
+ new_lines.append(line)
41
+ i += 1
42
+
43
+ with open(output_file, "w", encoding="utf-8") as f:
44
+ f.writelines(new_lines)
45
+ print(f"Image tags: {img_tag_count}, removed empty lines: {removed_empty_count}")
46
+
47
+ if __name__ == "__main__":
48
+ main()
@@ -0,0 +1,282 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Process Mermaid charts in Markdown documents: Convert → Upload → Replace.
4
+
5
+ TRUE 3-STEP WORKFLOW (writes file ONCE):
6
+ 1. convert_mermaid_in_markdown() - Convert mermaid code blocks to images
7
+ 2. upload_mermaid_images_to_cos() - Upload images to COS
8
+ 3. replace_mermaid_with_images() - Replace mermaid blocks with image links (local or COS)
9
+
10
+ Use the unified pipeline: process_mermaid_markdown_3steps()
11
+ - Reads file once
12
+ - Does all replacements in memory
13
+ - Writes file once at the end
14
+ """
15
+
16
+ from dotenv import load_dotenv
17
+
18
+ import argparse
19
+
20
+ import os
21
+ import re
22
+ import tempfile
23
+ from pathlib import Path
24
+ from typing import Optional, Tuple, Dict, List
25
+
26
+ from .mermaid2img_playwright import render_mermaid_playwright
27
+ from .cos_uploader import upload
28
+
29
+ load_dotenv()
30
+
31
+
32
+ def extract_mermaid_code(markdown_content: str) -> list[Tuple[str, str]]:
33
+ """
34
+ Extract mermaid code blocks from markdown content.
35
+
36
+ Args:
37
+ markdown_content: The markdown text content
38
+
39
+ Returns:
40
+ List of tuples (mermaid_code, original_block) where:
41
+ - mermaid_code: Clean mermaid code without markdown fences
42
+ - original_block: Original markdown block including fences
43
+ """
44
+ # Pattern to match ```mermaid ... ```
45
+ pattern = r"```mermaid\n(.*?)\n```"
46
+ matches = re.finditer(pattern, markdown_content, re.DOTALL)
47
+
48
+ results = []
49
+ for match in matches:
50
+ mermaid_code = match.group(1).strip()
51
+ original_block = match.group(0)
52
+ results.append((mermaid_code, original_block))
53
+
54
+ return results
55
+
56
+
57
+ def mermaid_to_image(
58
+ mermaid_code: str,
59
+ output_dir: Optional[str] = None,
60
+ theme: str = "default",
61
+ scale: int = 2,
62
+ ) -> str:
63
+ """
64
+ Convert mermaid code to image file.
65
+
66
+ Args:
67
+ mermaid_code: Raw mermaid diagram code (without markdown fences)
68
+ output_dir: Directory to save the image. If None, uses temp directory
69
+ theme: Theme for rendering ("default" or "dark")
70
+ scale: Scale factor for image
71
+
72
+ Returns:
73
+ Path to the generated image file
74
+ """
75
+ if output_dir is None:
76
+ output_dir = os.path.join(tempfile.gettempdir(), "mermaid2img")
77
+
78
+ os.makedirs(output_dir, exist_ok=True)
79
+
80
+ # Generate unique filename based on mermaid code hash
81
+ code_hash = hash(mermaid_code) & 0x7FFFFFFF
82
+ output_filename = f"mermaid_{code_hash}.png"
83
+ output_path = os.path.join(output_dir, output_filename)
84
+
85
+ # Render mermaid code to image
86
+ render_mermaid_playwright(mermaid_code, output_path, theme=theme, scale=scale)
87
+
88
+ return output_path
89
+
90
+
91
+ def convert_mermaid_in_markdown(
92
+ markdown_content: str,
93
+ img_output_dir: Optional[str] = None,
94
+ theme: str = "default",
95
+ scale: int = 1,
96
+ ) -> Tuple[str, Dict[str, str]]:
97
+ """
98
+ Convert all mermaid charts in markdown to images.
99
+ DOES NOT modify markdown content, only generates images.
100
+
101
+ Args:
102
+ markdown_content: The markdown text content
103
+ img_output_dir: Directory to save images. If None, uses temp directory
104
+ theme: Theme for rendering ("default" or "dark")
105
+ scale: Scale factor for image
106
+
107
+ Returns:
108
+ Tuple of (markdown_unchanged, image_map_dict)
109
+ where image_map_dict contains {original_block: img_path}
110
+ """
111
+ # Extract mermaid blocks
112
+ mermaid_blocks = extract_mermaid_code(markdown_content)
113
+
114
+ if not mermaid_blocks:
115
+ print("No mermaid blocks found.")
116
+ return markdown_content, {}
117
+
118
+ print(f"Found {len(mermaid_blocks)} mermaid blocks")
119
+
120
+ image_map = {} # {original_block: img_path}
121
+
122
+ # Convert each mermaid block to image
123
+ for i, (mermaid_code, original_block) in enumerate(mermaid_blocks, 1):
124
+ try:
125
+ print(f"Converting block {i}/{len(mermaid_blocks)}...")
126
+
127
+ # Convert to image
128
+ img_path = mermaid_to_image(mermaid_code, img_output_dir, theme, scale)
129
+ print(f" Generated: {img_path}")
130
+
131
+ # Store mapping
132
+ image_map[original_block] = img_path
133
+
134
+ except Exception as e:
135
+ print(f" Error: {e}")
136
+ continue
137
+
138
+ return markdown_content, image_map
139
+
140
+
141
+ def upload_mermaid_images_to_cos(local_image_paths: List[str]) -> Dict[str, str]:
142
+ """
143
+ Upload images to COS and map to URLs.
144
+
145
+ Args:
146
+ local_image_paths: List of local image file paths
147
+
148
+ Returns:
149
+ Dictionary mapping {img_path: cos_url}
150
+ """
151
+ upload_results = {}
152
+
153
+ for i, img_path in enumerate(local_image_paths, 1):
154
+ try:
155
+ print(f"Uploading image {i}/{len(local_image_paths)}...")
156
+ print(f" Source: {img_path}")
157
+
158
+ cos_url = upload(Path(img_path))
159
+ upload_results[img_path] = cos_url
160
+ print(f" COS URL: {cos_url}")
161
+
162
+ except Exception as e:
163
+ print(f" Upload failed: {e}")
164
+ continue
165
+
166
+ return upload_results
167
+
168
+
169
+ def replace_mermaid_with_images(
170
+ markdown_content: str,
171
+ mermaid_to_img_map: Dict[str, str],
172
+ img_to_url_map: Dict[str, str],
173
+ ) -> str:
174
+ """
175
+ Replace mermaid code blocks with image links (local or COS URLs).
176
+
177
+ Args:
178
+ markdown_content: Original markdown text
179
+ mermaid_to_img_map: Dictionary mapping {original_mermaid_block: img_path}
180
+ img_to_url_map: dictionary mapping {img_path: cos_url}. If None, use local paths.
181
+ markdown_path: Path to markdown file (for calculating relative paths)
182
+
183
+ Returns:
184
+ Updated markdown content with image links
185
+ """
186
+ updated_content = markdown_content
187
+
188
+ for i, (original_block, img_path) in enumerate(mermaid_to_img_map.items(), 1):
189
+ # Determine image URL: COS if available, otherwise local path
190
+ if img_to_url_map and img_path in img_to_url_map:
191
+ image_url = img_to_url_map[img_path]
192
+ print(f" Using COS URL: {image_url}")
193
+ else:
194
+ raise ValueError(f"No COS URL found for image path: {img_path}")
195
+
196
+ # Create markdown image link
197
+ image_link = f"![mermaid {i}]({image_url})"
198
+ # Replace original mermaid block
199
+ updated_content = updated_content.replace(original_block, image_link)
200
+
201
+ return updated_content
202
+
203
+
204
+ def process_mermaid_markdown_3steps(
205
+ markdown_path: str,
206
+ output_path: Optional[str] = None,
207
+ theme: str = "default",
208
+ scale: int = 1,
209
+ img_output_dir: Optional[str] = None,
210
+ ):
211
+ """
212
+ Process markdown in 3 steps: Convert → Upload (optional) → Replace links.
213
+ Write file only ONCE at the end.
214
+
215
+ Args:
216
+ markdown_path: Path to input markdown file
217
+ output_path: Path to output markdown file. If None, overwrites input
218
+ upload_to_cos: Whether to upload images to COS
219
+ theme: Theme for rendering ("default" or "dark")
220
+ scale: Scale factor for image
221
+ img_output_dir: Directory to save images. If None, uses temp directory
222
+
223
+ Returns:
224
+ Tuple of (final_markdown_content, results_dict)
225
+ """
226
+ # Read markdown file once
227
+ with open(markdown_path, "r", encoding="utf-8") as f:
228
+ markdown_content = f.read()
229
+
230
+ if output_path is None:
231
+ output_path = markdown_path
232
+
233
+ # ===== STEP 1: Convert mermaid to images =====
234
+ print("STEP 1: Converting mermaid charts to images...")
235
+ _, mermaid_to_img_map = convert_mermaid_in_markdown(
236
+ markdown_content, img_output_dir, theme, scale
237
+ )
238
+
239
+ if not mermaid_to_img_map:
240
+ print("No mermaid blocks found. Writing unchanged content.")
241
+ with open(output_path, "w", encoding="utf-8") as f:
242
+ f.write(markdown_content)
243
+ return markdown_content, {}
244
+
245
+ results = {"images": mermaid_to_img_map}
246
+
247
+ # ===== STEP 2: Upload =====
248
+ print("STEP 2: Uploading images to COS...")
249
+ image_paths = list(mermaid_to_img_map.values())
250
+ img_to_url_map = upload_mermaid_images_to_cos(image_paths)
251
+
252
+ if img_to_url_map:
253
+ results["cos_urls"] = img_to_url_map
254
+ print(f"Uploaded {len(img_to_url_map)} images successfully.")
255
+
256
+ # ===== STEP 3: Replace in memory =====
257
+ print("STEP 3: Replacing mermaid blocks with image links...")
258
+ final_content = replace_mermaid_with_images(
259
+ markdown_content, mermaid_to_img_map, img_to_url_map
260
+ )
261
+
262
+ # Write file ONCE
263
+ print("Writing output file...")
264
+ with open(output_path, "w", encoding="utf-8") as f:
265
+ f.write(final_content)
266
+ print(f"Output saved to: {output_path}")
267
+
268
+
269
+ def main():
270
+ parser = argparse.ArgumentParser(
271
+ description="Convert tables in a Markdown file to images and upload to COS."
272
+ )
273
+ parser.add_argument("input_file", help="Path to the input markdown file.")
274
+ args = parser.parse_args()
275
+ input_file = args.input_file
276
+
277
+ output_file = f"{os.path.splitext(input_file)[0]}_mm2img.md"
278
+ process_mermaid_markdown_3steps(input_file, output_path=output_file, scale=2)
279
+
280
+
281
+ if __name__ == "__main__":
282
+ main()
@@ -0,0 +1,118 @@
1
+ """
2
+ Markdown table to image and Uploader to COS.
3
+ table with few columns: the table unfolds naturally.
4
+ table with many columns: the table is spread out horizontally.
5
+ """
6
+
7
+ import argparse
8
+ import os
9
+ import re
10
+ import tempfile
11
+ from pathlib import Path
12
+
13
+ from dotenv import load_dotenv
14
+ from markdown_it import MarkdownIt
15
+ from playwright.sync_api import sync_playwright
16
+
17
+ from .cos_uploader import upload
18
+
19
+ load_dotenv()
20
+
21
+
22
+ def extract_tables(md_text):
23
+ # re is simple than markdown-it table extractor for our use case
24
+ table_pattern = re.compile(r"(?:^\s*\|.*\|\s*\n)+", re.MULTILINE)
25
+ return [m.group(0) for m in table_pattern.finditer(md_text)]
26
+
27
+
28
+ def table_to_image(md_text, output_path):
29
+ # md_text = """
30
+ # | A | B | C | D | E | F | G |
31
+ # |---|---|---|---|---|---|---|
32
+ # | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
33
+ # """
34
+
35
+ html_table = (
36
+ MarkdownIt("gfm-like", {"linkify": False}).enable("table").render(md_text)
37
+ )
38
+ html_table = html_table.replace("<table", '<table id="mdtable2img"', 1)
39
+
40
+ html = f"""
41
+ <html>
42
+ <head>
43
+ <style>
44
+ # usually 800px in width, but can be wider if there no sentenses to break.
45
+ table {{
46
+ border-collapse: collapse;
47
+ width: auto;
48
+ max-width: 800px;
49
+ table-layout: auto;
50
+ font-size: 14px;
51
+ }}
52
+ td, th {{
53
+ border: 1px solid #333;
54
+ padding: 6px 10px;
55
+ white-space: pre-line;
56
+ }}
57
+ </style>
58
+ </head>
59
+ <body>
60
+ {html_table}
61
+ </body>
62
+ </html>
63
+ """
64
+
65
+ with sync_playwright() as p:
66
+ browser = p.chromium.launch()
67
+ page = browser.new_page(viewport={"width": 2000, "height": 800})
68
+ page.set_content(html)
69
+ table_locator = page.locator("#mdtable2img")
70
+ table_locator.screenshot(path=output_path)
71
+ browser.close()
72
+
73
+
74
+ def main():
75
+ parser = argparse.ArgumentParser(
76
+ description="Convert tables in a Markdown file to images and upload to COS."
77
+ )
78
+ parser.add_argument("input_file", help="Path to the input markdown file.")
79
+ args = parser.parse_args()
80
+ input_file = args.input_file
81
+
82
+ output_file = f"{os.path.splitext(input_file)[0]}_tb2img.md"
83
+
84
+ with open(input_file, "r", encoding="utf-8") as f:
85
+ content = f.read()
86
+
87
+ # Directory to store temporary imgs
88
+ img_dir = os.path.join(tempfile.gettempdir(), "mdtable2img")
89
+ os.makedirs(img_dir, exist_ok=True)
90
+
91
+ # Process tables and convert to images
92
+ tables = extract_tables(content)
93
+ print(f"Find {len(tables)} tables")
94
+ images = []
95
+ for i, table_md in enumerate(tables):
96
+ img_path = os.path.join(img_dir, f"table_{i + 1}.png")
97
+ table_to_image(table_md, img_path)
98
+ print(f"Converted table {i + 1} to image: {img_path}")
99
+ images.append(img_path)
100
+ print(f"Converted {len(images)} tables to images.")
101
+
102
+ # Upload images to COS and replace in markdown
103
+ for i, img_path in enumerate(images):
104
+ cos_url = upload(Path(img_path))
105
+ # Replace the first occurrence of the table markdown with image markdown
106
+ table_md = tables[i]
107
+ img_md = f"![Table {i + 1}]({cos_url})\n"
108
+ content = content.replace(table_md, img_md, 1)
109
+ print(f"Uploaded {len(images)} table images to COS.")
110
+
111
+ with open(output_file, "w", encoding="utf-8") as f:
112
+ f.write(content)
113
+
114
+ print(f"Processed markdown saved to {output_file}")
115
+
116
+
117
+ if __name__ == "__main__":
118
+ main()
@@ -0,0 +1,69 @@
1
+ import tempfile
2
+ import os
3
+ import shutil
4
+ import subprocess
5
+
6
+
7
+ def render_mermaid_cli(code: str, output_path: str, theme="default", scale=1):
8
+ """
9
+ Render mermaid code to image using mermaid-cli.
10
+ Supports Chinese and other Unicode characters.
11
+ """
12
+ mmdc_path = os.environ.get("MMDC_PATH") or shutil.which("mmdc")
13
+ if mmdc_path and os.name == "nt":
14
+ candidate_cmd = f"{mmdc_path}.cmd"
15
+ if os.path.exists(candidate_cmd):
16
+ mmdc_path = candidate_cmd
17
+ if not mmdc_path:
18
+ raise FileNotFoundError(
19
+ "mmdc not found. Add it to PATH or set MMDC_PATH to the full path "
20
+ )
21
+
22
+ cmd = [
23
+ mmdc_path,
24
+ "-i",
25
+ "-",
26
+ "-o",
27
+ output_path,
28
+ "--theme",
29
+ theme,
30
+ "--scale",
31
+ str(scale),
32
+ "--backgroundColor",
33
+ "white",
34
+ ]
35
+
36
+ process = subprocess.run(
37
+ cmd,
38
+ input=code.encode(
39
+ "utf-8"
40
+ ), # Explicitly specify UTF-8 encoding to support Chinese
41
+ capture_output=True,
42
+ text=False, # Receive as bytes to avoid encoding issues
43
+ )
44
+
45
+ if process.returncode != 0:
46
+ stderr_msg = process.stderr.decode("utf-8", errors="replace")
47
+ print("Error:", stderr_msg)
48
+ raise RuntimeError(f"mermaid-cli execution failed: {stderr_msg}")
49
+
50
+
51
+ def main():
52
+ demo_code = """
53
+ flowchart TD
54
+ A[开始] --> B{Is it?}
55
+ B -->|Yes| C[OK]
56
+ C --> D[Rethink]
57
+ D --> B
58
+ B ---->|No| E[End]
59
+ """
60
+
61
+ img_dir = os.path.join(tempfile.gettempdir(), "mermaid2img")
62
+ os.makedirs(img_dir, exist_ok=True)
63
+ output_path = os.path.join(img_dir, "output.png")
64
+ render_mermaid_cli(demo_code, output_path, theme="default", scale=1)
65
+ print(f"Image saved to: {output_path}")
66
+
67
+
68
+ if __name__ == "__main__":
69
+ main()
@@ -0,0 +1,175 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Render Mermaid diagrams to images using Playwright.
4
+ Alternative to mermaid-cli that uses browser rendering.
5
+ """
6
+
7
+ import os
8
+ import tempfile
9
+ from pathlib import Path
10
+ from playwright.sync_api import sync_playwright
11
+
12
+
13
+ def render_mermaid_playwright(
14
+ mermaid_code: str,
15
+ output_path: str,
16
+ theme: str = "default",
17
+ background_color: str = "white",
18
+ scale: float = 2.0,
19
+ layout: str = "elk",
20
+ ) -> None:
21
+ """
22
+ Render mermaid diagram to PNG image using Playwright.
23
+
24
+ Args:
25
+ mermaid_code: Raw mermaid diagram code (without ```mermaid fences)
26
+ output_path: Path to save the output PNG image
27
+ theme: Mermaid theme ("default", "dark", "forest", "neutral")
28
+ background_color: Background color (CSS color)
29
+ scale: Device scale factor for higher resolution (default 2.0)
30
+ layout: Layout engine for flowchart ("dagre" or "elk"). Only applies to flowchart type.
31
+
32
+ Raises:
33
+ RuntimeError: If rendering fails
34
+ """
35
+ # Determine if we need flowchart layout config
36
+ # ELK layout only works for flowchart diagrams
37
+ is_flowchart = "flowchart" in mermaid_code.lower()
38
+
39
+ if is_flowchart and layout != "dagre":
40
+ flowchart_config = f"""
41
+ flowchart: {{
42
+ defaultRenderer: '{layout}'
43
+ }},"""
44
+ else:
45
+ flowchart_config = ""
46
+
47
+ # Get the absolute path to your local bundle
48
+ assets_dir = Path(__file__).parent / "assets"
49
+ # Copy from https://github.com/Honghe/mermaid-bundle/blob/master/mermaid.bundle.js
50
+ mermaid_bundle_path = (assets_dir / "mermaid.bundle.js").absolute().as_uri()
51
+
52
+ # HTML template with Mermaid.js
53
+ html_template = """
54
+ <!DOCTYPE html>
55
+ <html>
56
+ <head>
57
+ <meta charset="UTF-8">
58
+ <script src="{mermaid_bundle_path}"></script>
59
+ <script type="module">
60
+ mermaid.initialize({{
61
+ startOnLoad: true,
62
+ theme: '{theme}',
63
+ securityLevel: 'loose',
64
+ {flowchart_config}
65
+ }});
66
+ </script>
67
+ <style>
68
+ body {{
69
+ margin: 0;
70
+ padding: 20px;
71
+ background-color: {background_color};
72
+ display: flex;
73
+ justify-content: center;
74
+ align-items: center;
75
+ min-height: 100vh;
76
+ }}
77
+ #diagram {{
78
+ max-width: 100%;
79
+ }}
80
+ </style>
81
+ </head>
82
+ <body>
83
+ <div class="mermaid" id="diagram">
84
+ {mermaid_code}
85
+ </div>
86
+ </body>
87
+ </html>
88
+ """
89
+
90
+ html_content = html_template.format(
91
+ theme=theme,
92
+ background_color=background_color,
93
+ mermaid_code=mermaid_code,
94
+ flowchart_config=flowchart_config,
95
+ mermaid_bundle_path=mermaid_bundle_path,
96
+ )
97
+
98
+ # Create temporary HTML file
99
+ with tempfile.NamedTemporaryFile(
100
+ mode="w", encoding="utf-8", suffix=".html", delete=False
101
+ ) as f:
102
+ temp_html_path = f.name
103
+ f.write(html_content)
104
+
105
+ try:
106
+ with sync_playwright() as p:
107
+ # Launch browser in headless mode
108
+ browser = p.chromium.launch(
109
+ headless=True,
110
+ )
111
+ context = browser.new_context(
112
+ viewport={"width": 800, "height": 800},
113
+ device_scale_factor=scale,
114
+ )
115
+ page = context.new_page()
116
+
117
+ # Load HTML file
118
+ page.goto(f"file://{Path(temp_html_path).as_posix()}")
119
+
120
+ # Wait for mermaid to render
121
+ page.wait_for_selector("#diagram svg", timeout=3000)
122
+
123
+ # Get the SVG element for precise cropping
124
+ diagram = page.locator("#diagram")
125
+
126
+ # Take screenshot
127
+ diagram.screenshot(path=output_path, type="png")
128
+
129
+ browser.close()
130
+
131
+ except Exception as e:
132
+ raise RuntimeError(f"Failed to render mermaid diagram: {e}")
133
+
134
+ finally:
135
+ # Clean up temporary HTML file
136
+ if os.path.exists(temp_html_path):
137
+ os.remove(temp_html_path)
138
+
139
+
140
+ def main():
141
+ """Demo: render mermaid diagram using Playwright."""
142
+
143
+ demo_code = """
144
+ flowchart TD
145
+ A[开始] --> B["Popen()"]
146
+ B --> C[子进程启动<br>独立运行]
147
+ B --> D[主进程继续执行]
148
+ D --> E{需要<br>子进程结果?}
149
+ E -->|否| D
150
+ E -->|是| F["P.wait()"]
151
+ F -->|阻塞等待| G[子进程结束]
152
+ G --> H[拿到 returncode]
153
+ H --> I[可安全读 stdout/stderr<br>(如果用了 PIPE)]
154
+ I --> J[结束]
155
+ """
156
+
157
+ # Create output directory
158
+ img_dir = os.path.join(tempfile.gettempdir(), "mermaid2img")
159
+ os.makedirs(img_dir, exist_ok=True)
160
+ output_path = os.path.join(img_dir, "output_playwright.png")
161
+
162
+ print("Rendering mermaid diagram with Playwright...")
163
+ render_mermaid_playwright(
164
+ demo_code,
165
+ output_path,
166
+ theme="default",
167
+ background_color="white",
168
+ scale=2.0,
169
+ layout="elk",
170
+ )
171
+ print(f"Image saved to: {output_path}")
172
+
173
+
174
+ if __name__ == "__main__":
175
+ main()
@@ -0,0 +1,169 @@
1
+ # -*- coding: utf-8 -*-
2
+ """
3
+ Render Mermaid diagrams to images using Playwright.
4
+ Alternative to mermaid-cli that uses browser rendering.
5
+ """
6
+
7
+ import os
8
+ import tempfile
9
+ from pathlib import Path
10
+ from playwright.sync_api import sync_playwright
11
+
12
+
13
+ def render_mermaid_playwright(
14
+ mermaid_code: str,
15
+ output_path: str,
16
+ theme: str = "default",
17
+ background_color: str = "white",
18
+ scale: float = 2.0,
19
+ layout: str = "elk",
20
+ ) -> None:
21
+ """
22
+ Render mermaid diagram to PNG image using Playwright.
23
+
24
+ Args:
25
+ mermaid_code: Raw mermaid diagram code (without ```mermaid fences)
26
+ output_path: Path to save the output PNG image
27
+ theme: Mermaid theme ("default", "dark", "forest", "neutral")
28
+ background_color: Background color (CSS color)
29
+ scale: Device scale factor for higher resolution (default 2.0)
30
+ layout: Layout engine for flowchart ("dagre" or "elk"). Only applies to flowchart type.
31
+
32
+ Raises:
33
+ RuntimeError: If rendering fails
34
+ """
35
+ # Determine if we need flowchart layout config
36
+ # ELK layout only works for flowchart diagrams
37
+ is_flowchart = "flowchart" in mermaid_code.lower()
38
+
39
+ if is_flowchart and layout != "dagre":
40
+ flowchart_config = f"""
41
+ flowchart: {{
42
+ defaultRenderer: '{layout}'
43
+ }},"""
44
+ else:
45
+ flowchart_config = ""
46
+
47
+ # Get the absolute path to your local bundle
48
+ assets_dir = Path(__file__).parent / "assets"
49
+ mermaid_bundle_path = (assets_dir / "mermaid.bundle.js").absolute().as_uri()
50
+
51
+ # HTML template with Mermaid.js
52
+ html_template = """
53
+ <!DOCTYPE html>
54
+ <html>
55
+ <head>
56
+ <meta charset="UTF-8">
57
+ <script type="module">
58
+ import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
59
+ import elkLayouts from 'https://cdn.jsdelivr.net/npm/@mermaid-js/layout-elk@0/dist/mermaid-layout-elk.esm.min.mjs';
60
+ mermaid.registerLayoutLoaders(elkLayouts);
61
+ mermaid.initialize({{
62
+ startOnLoad: true,
63
+ theme: '{theme}',
64
+ securityLevel: 'loose',
65
+ {flowchart_config}
66
+ }});
67
+ </script>
68
+ <style>
69
+ body {{
70
+ margin: 0;
71
+ padding: 20px;
72
+ background-color: {background_color};
73
+ display: flex;
74
+ justify-content: center;
75
+ align-items: center;
76
+ min-height: 100vh;
77
+ }}
78
+ #diagram {{
79
+ max-width: 100%;
80
+ }}
81
+ </style>
82
+ </head>
83
+ <body>
84
+ <div class="mermaid" id="diagram">
85
+ {mermaid_code}
86
+ </div>
87
+ </body>
88
+ </html>
89
+ """
90
+
91
+ html_content = html_template.format(
92
+ theme=theme,
93
+ background_color=background_color,
94
+ mermaid_code=mermaid_code,
95
+ flowchart_config=flowchart_config,
96
+ mermaid_bundle_path=mermaid_bundle_path,
97
+ )
98
+
99
+ # Create temporary HTML file
100
+ with tempfile.NamedTemporaryFile(
101
+ mode="w", encoding="utf-8", suffix=".html", delete=False
102
+ ) as f:
103
+ temp_html_path = f.name
104
+ f.write(html_content)
105
+
106
+ try:
107
+ with sync_playwright() as p:
108
+ # Launch browser in headless mode
109
+ browser = p.chromium.launch(headless=True)
110
+ context = browser.new_context(
111
+ viewport={"width": 800, "height": 800},
112
+ device_scale_factor=scale,
113
+ )
114
+ page = context.new_page()
115
+
116
+ # Load HTML file
117
+ page.goto(f"file://{Path(temp_html_path).as_posix()}")
118
+
119
+ # Wait for mermaid to render
120
+ page.wait_for_selector("#diagram svg", timeout=3000)
121
+
122
+ # Get the SVG element for precise cropping
123
+ diagram = page.locator("#diagram")
124
+
125
+ # Take screenshot
126
+ diagram.screenshot(path=output_path, type="png")
127
+
128
+ browser.close()
129
+
130
+ except Exception as e:
131
+ raise RuntimeError(f"Failed to render mermaid diagram: {e}")
132
+
133
+ finally:
134
+ # Clean up temporary HTML file
135
+ if os.path.exists(temp_html_path):
136
+ os.remove(temp_html_path)
137
+
138
+
139
+ def main():
140
+ """Demo: render mermaid diagram using Playwright."""
141
+
142
+ demo_code = """
143
+ flowchart TD
144
+ A[开始] --> B{Is it?}
145
+ B -->|Yes| C[OK]
146
+ C --> D[Rethink]
147
+ D --> B
148
+ B ---->|No| E[End]
149
+ """
150
+
151
+ # Create output directory
152
+ img_dir = os.path.join(tempfile.gettempdir(), "mermaid2img")
153
+ os.makedirs(img_dir, exist_ok=True)
154
+ output_path = os.path.join(img_dir, "output_playwright.png")
155
+
156
+ print("Rendering mermaid diagram with Playwright...")
157
+ render_mermaid_playwright(
158
+ demo_code,
159
+ output_path,
160
+ theme="default",
161
+ background_color="white",
162
+ scale=2.0,
163
+ layout="elk",
164
+ )
165
+ print(f"Image saved to: {output_path}")
166
+
167
+
168
+ if __name__ == "__main__":
169
+ main()
mdproc-0.2.1/.gitignore DELETED
@@ -1,4 +0,0 @@
1
- .env
2
- .venv
3
- __pycache__/
4
- dist/
@@ -1,12 +0,0 @@
1
- {
2
- "configurations": [
3
- {
4
- "name": "Python Debugger: Module",
5
- "type": "debugpy",
6
- "request": "launch",
7
- "module": "mdproc.mdimgupload",
8
- "args": ["mdproc/demo.md"],
9
- "cwd": "${workspaceFolder}/src",
10
- }
11
- ]
12
- }
mdproc-0.2.1/README.md DELETED
@@ -1,44 +0,0 @@
1
- # mdproc
2
-
3
- A simple Python tool to process markdown files.
4
-
5
- ## Features
6
-
7
- - Markdown Image Uploader to COS.
8
-
9
- ## Config
10
-
11
- `.env` or configure environment variables:
12
- ```
13
- COS_SECRET_ID=<xyz>
14
- COS_SECRET_KEY=<xyz>
15
- COS_REGION=<xyz>
16
- COS_BUCKET=<xyz>
17
- ```
18
-
19
- ## Usage
20
-
21
- 1. Install dependencies:
22
- ```bash
23
- pip install mdproc
24
- ```
25
- 2. Run the script:
26
- ```bash
27
- mdproc-imgupload your_markdown.md
28
- ```
29
-
30
- ## Demo
31
-
32
- demo.md:
33
- ```
34
- ![first-version](https://www.python.org/static/img/python-logo.png)
35
- ```
36
-
37
- demo_output.md
38
- ```
39
- ![first-version](https://pic-1251484506.cos.ap-guangzhou.myqcloud.com/imgs/python-logo_ae79195a.png)
40
- ```
41
-
42
- ## License
43
-
44
- Apache License
File without changes
File without changes
File without changes
File without changes
File without changes