chem-pdf2ppt 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +235 -0
- package/README_EN.md +239 -0
- package/SKILL.md +469 -0
- package/SKILL_EN.md +473 -0
- package/assets/academic_template.html +197 -0
- package/cli.js +57 -0
- package/examples/example_usage.py +407 -0
- package/index.js +109 -0
- package/package.json +50 -0
- package/references/chemistry_templates.md +228 -0
- package/references/chemistry_templates_en.md +228 -0
- package/references/visual_style.md +172 -0
- package/references/visual_style_en.md +172 -0
- package/requirements.txt +20 -0
- package/scripts/analyze_paper.py +334 -0
- package/scripts/convert_to_images.py +67 -0
- package/scripts/create_ppt.py +712 -0
- package/scripts/extract_charts.py +425 -0
- package/scripts/generate_html.py +288 -0
|
@@ -0,0 +1,288 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
HTML 学术 PPT 生成器 — 生成单文件、横向翻页的化学学术 HTML 演示文稿
|
|
4
|
+
Academic HTML Presentation Generator — Single-file horizontal-slide deck with figures
|
|
5
|
+
"""
|
|
6
|
+
import sys
|
|
7
|
+
import os
|
|
8
|
+
import json
|
|
9
|
+
import re
|
|
10
|
+
import base64
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _safe_print(msg):
|
|
14
|
+
try:
|
|
15
|
+
print(msg)
|
|
16
|
+
except UnicodeEncodeError:
|
|
17
|
+
print(msg.encode('ascii', errors='replace').decode('ascii'))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# ============================================================
|
|
21
|
+
# 配色主题 (与 create_ppt.py 一致)
|
|
22
|
+
# ============================================================
|
|
23
|
+
THEMES = {
|
|
24
|
+
"academic": {
|
|
25
|
+
"name": "学术经典",
|
|
26
|
+
"primary": "#003366", "primary_rgb": "0,51,102",
|
|
27
|
+
"accent": "#B41E1E", "bg": "#FFFFFF",
|
|
28
|
+
"bg_light": "#F0F4F8", "text": "#333333",
|
|
29
|
+
"muted": "#8C8C8C", "section_bg": "#003366",
|
|
30
|
+
"section_text": "#FFFFFF", "table_stripe": "#F0F4F8",
|
|
31
|
+
},
|
|
32
|
+
"molecular": {
|
|
33
|
+
"name": "分子科技",
|
|
34
|
+
"primary": "#1A5276", "primary_rgb": "26,82,118",
|
|
35
|
+
"accent": "#E74C3C", "bg": "#F8F9FA",
|
|
36
|
+
"bg_light": "#EBF0F5", "text": "#2C3E50",
|
|
37
|
+
"muted": "#95A5A6", "section_bg": "#1A5276",
|
|
38
|
+
"section_text": "#FFFFFF", "table_stripe": "#EBF0F5",
|
|
39
|
+
},
|
|
40
|
+
"green": {
|
|
41
|
+
"name": "绿色化学",
|
|
42
|
+
"primary": "#1E5631", "primary_rgb": "30,86,49",
|
|
43
|
+
"accent": "#D4A017", "bg": "#F7F9F4",
|
|
44
|
+
"bg_light": "#EEF3E9", "text": "#333333",
|
|
45
|
+
"muted": "#96A590", "section_bg": "#1E5631",
|
|
46
|
+
"section_text": "#FFFFFF", "table_stripe": "#EEF3E9",
|
|
47
|
+
},
|
|
48
|
+
"nature": {
|
|
49
|
+
"name": "Nature 风格",
|
|
50
|
+
"primary": "#222222", "primary_rgb": "34,34,34",
|
|
51
|
+
"accent": "#0066CC", "bg": "#FFFFFF",
|
|
52
|
+
"bg_light": "#F8F8F8", "text": "#444444",
|
|
53
|
+
"muted": "#A0A0A0", "section_bg": "#222222",
|
|
54
|
+
"section_text": "#FFFFFF", "table_stripe": "#F5F5F5",
|
|
55
|
+
},
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _img_to_data_uri(img_path):
|
|
60
|
+
"""将图片转为 base64 data URI,嵌入 HTML"""
|
|
61
|
+
if not img_path or not os.path.exists(img_path):
|
|
62
|
+
return None
|
|
63
|
+
try:
|
|
64
|
+
ext = os.path.splitext(img_path)[1].lower()
|
|
65
|
+
mime_map = {'.png': 'image/png', '.jpg': 'image/jpeg', '.jpeg': 'image/jpeg',
|
|
66
|
+
'.gif': 'image/gif', '.webp': 'image/webp', '.svg': 'image/svg+xml'}
|
|
67
|
+
mime = mime_map.get(ext, 'image/png')
|
|
68
|
+
with open(img_path, 'rb') as f:
|
|
69
|
+
data = base64.b64encode(f.read()).decode('ascii')
|
|
70
|
+
return f"data:{mime};base64,{data}"
|
|
71
|
+
except Exception:
|
|
72
|
+
return None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _escape_html(text):
|
|
76
|
+
"""转义 HTML 特殊字符"""
|
|
77
|
+
return text.replace('&', '&').replace('<', '<').replace('>', '>').replace('"', '"')
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class HtmlPPT:
|
|
81
|
+
"""化学学术 HTML PPT 构建器"""
|
|
82
|
+
|
|
83
|
+
def __init__(self, title="学术报告", theme="academic"):
|
|
84
|
+
if theme not in THEMES:
|
|
85
|
+
raise ValueError(f"Unknown theme: {theme}")
|
|
86
|
+
self.title = title
|
|
87
|
+
self.theme = theme
|
|
88
|
+
self.t = THEMES[theme]
|
|
89
|
+
self.slides = []
|
|
90
|
+
self._warnings = []
|
|
91
|
+
self._missing_images = []
|
|
92
|
+
|
|
93
|
+
def add_title_slide(self, title_cn, title_en="", authors="", journal="", doi=""):
|
|
94
|
+
meta_parts = []
|
|
95
|
+
if authors:
|
|
96
|
+
meta_parts.append(f'<span>{_escape_html(authors)}</span>')
|
|
97
|
+
if journal:
|
|
98
|
+
meta_parts.append(f'<span>{_escape_html(journal)}</span>')
|
|
99
|
+
if doi:
|
|
100
|
+
meta_parts.append(f'<span style="font-size:11px">DOI: {_escape_html(doi)}</span>')
|
|
101
|
+
|
|
102
|
+
html = f'''<section class="slide light slide-title">
|
|
103
|
+
<div class="accent-line"></div>
|
|
104
|
+
<h1>{_escape_html(title_cn)}</h1>
|
|
105
|
+
{f'<div class="en-title">{_escape_html(title_en)}</div>' if title_en else ''}
|
|
106
|
+
<div class="meta">{''.join(meta_parts)}</div>
|
|
107
|
+
<div class="page-num">1</div>
|
|
108
|
+
</section>'''
|
|
109
|
+
self.slides.append(html)
|
|
110
|
+
|
|
111
|
+
def add_section_slide(self, title, subtitle=""):
|
|
112
|
+
html = f'''<section class="slide dark slide-section">
|
|
113
|
+
<div class="accent-bar"></div>
|
|
114
|
+
<h2>{_escape_html(title)}</h2>
|
|
115
|
+
{f'<div class="sub">{_escape_html(subtitle)}</div>' if subtitle else ''}
|
|
116
|
+
</section>'''
|
|
117
|
+
self.slides.append(html)
|
|
118
|
+
|
|
119
|
+
def add_content_slide(self, title, bullets, subtitle="", notes=""):
|
|
120
|
+
li_items = '\n'.join(f'<li>{_escape_html(b)}</li>' for b in bullets)
|
|
121
|
+
html = f'''<section class="slide light slide-content">
|
|
122
|
+
<h3>{_escape_html(title)}</h3>
|
|
123
|
+
<div class="title-line"></div>
|
|
124
|
+
{f'<div class="subtitle">{_escape_html(subtitle)}</div>' if subtitle else ''}
|
|
125
|
+
<ul class="bullets">{li_items}</ul>
|
|
126
|
+
{f'<div class="notes">📝 {_escape_html(notes)}</div>' if notes else ''}
|
|
127
|
+
</section>'''
|
|
128
|
+
self.slides.append(html)
|
|
129
|
+
|
|
130
|
+
def add_figure_slide(self, title, figure_path, bullets=None,
|
|
131
|
+
figure_label="", caption="", layout="figure_right"):
|
|
132
|
+
bullets = bullets or []
|
|
133
|
+
fig_html = ""
|
|
134
|
+
|
|
135
|
+
if figure_path and os.path.exists(figure_path):
|
|
136
|
+
data_uri = _img_to_data_uri(figure_path)
|
|
137
|
+
if data_uri:
|
|
138
|
+
fig_html = f'<img src="{data_uri}" alt="{_escape_html(figure_label or title)}">'
|
|
139
|
+
else:
|
|
140
|
+
self._warnings.append(f"Failed to encode image: {figure_path}")
|
|
141
|
+
fig_html = f'<div style="padding:2vh;color:var(--muted);border:1px dashed var(--muted)">[Image: {_escape_html(os.path.basename(figure_path))}]</div>'
|
|
142
|
+
elif figure_path:
|
|
143
|
+
self._missing_images.append(figure_path)
|
|
144
|
+
fig_html = f'<div style="padding:2vh;color:var(--muted);border:1px dashed var(--muted)">[Figure: {_escape_html(os.path.basename(figure_path))}]</div>'
|
|
145
|
+
|
|
146
|
+
li_items = '\n'.join(f'<li>{_escape_html(b)}</li>' for b in bullets)
|
|
147
|
+
|
|
148
|
+
if layout == "figure_top":
|
|
149
|
+
area_html = f'''<div class="fig-area top">
|
|
150
|
+
{fig_html}
|
|
151
|
+
<ul class="fig-text">{li_items}</ul>
|
|
152
|
+
</div>'''
|
|
153
|
+
elif layout == "figure_full":
|
|
154
|
+
area_html = f'''<div class="fig-area top">
|
|
155
|
+
{fig_html}
|
|
156
|
+
{f'<ul class="fig-text" style="font-size:12px">{li_items}</ul>' if bullets else ''}
|
|
157
|
+
</div>'''
|
|
158
|
+
else:
|
|
159
|
+
# figure_right (default)
|
|
160
|
+
area_html = f'''<div class="fig-area">
|
|
161
|
+
<ul class="fig-text">{li_items}</ul>
|
|
162
|
+
{fig_html}
|
|
163
|
+
</div>'''
|
|
164
|
+
|
|
165
|
+
caption_line = ""
|
|
166
|
+
if figure_label or caption:
|
|
167
|
+
parts = []
|
|
168
|
+
if figure_label:
|
|
169
|
+
parts.append(figure_label)
|
|
170
|
+
if caption:
|
|
171
|
+
parts.append(caption)
|
|
172
|
+
caption_line = f'<div class="fig-caption">{" | ".join(parts)}</div>'
|
|
173
|
+
|
|
174
|
+
html = f'''<section class="slide light slide-figure">
|
|
175
|
+
<h3>{_escape_html(title)}</h3>
|
|
176
|
+
<div class="title-line"></div>
|
|
177
|
+
{area_html}
|
|
178
|
+
{caption_line}
|
|
179
|
+
</section>'''
|
|
180
|
+
self.slides.append(html)
|
|
181
|
+
|
|
182
|
+
def add_table_slide(self, title, headers, rows, notes=""):
|
|
183
|
+
th_html = '\n'.join(f'<th>{_escape_html(h)}</th>' for h in headers)
|
|
184
|
+
tr_html = '\n'.join(
|
|
185
|
+
'<tr>' + ''.join(f'<td>{_escape_html(str(c))}</td>' for c in row) + '</tr>'
|
|
186
|
+
for row in rows
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
html = f'''<section class="slide light slide-table">
|
|
190
|
+
<h3>{_escape_html(title)}</h3>
|
|
191
|
+
<div class="title-line"></div>
|
|
192
|
+
<table><thead><tr>{th_html}</tr></thead><tbody>{tr_html}</tbody></table>
|
|
193
|
+
{f'<div class="notes">{_escape_html(notes)}</div>' if notes else ''}
|
|
194
|
+
</section>'''
|
|
195
|
+
self.slides.append(html)
|
|
196
|
+
|
|
197
|
+
def add_summary_slide(self, title, bullets):
|
|
198
|
+
li_items = '\n'.join(f'<li>{_escape_html(b)}</li>' for b in bullets)
|
|
199
|
+
html = f'''<section class="slide light slide-summary">
|
|
200
|
+
<div class="top-bar"></div>
|
|
201
|
+
<h3>{_escape_html(title)}</h3>
|
|
202
|
+
<ul class="bullets">{li_items}</ul>
|
|
203
|
+
</section>'''
|
|
204
|
+
self.slides.append(html)
|
|
205
|
+
|
|
206
|
+
def add_thankyou_slide(self, title="谢谢!欢迎提问", subtitle="Thank you & Questions"):
|
|
207
|
+
html = f'''<section class="slide dark slide-thanks">
|
|
208
|
+
<div>
|
|
209
|
+
<h2>{_escape_html(title)}</h2>
|
|
210
|
+
{f'<div class="sub">{_escape_html(subtitle)}</div>' if subtitle else ''}
|
|
211
|
+
</div>
|
|
212
|
+
</section>'''
|
|
213
|
+
self.slides.append(html)
|
|
214
|
+
|
|
215
|
+
def _render(self):
|
|
216
|
+
"""渲染完整 HTML"""
|
|
217
|
+
template_path = os.path.join(os.path.dirname(__file__), '..', 'assets', 'academic_template.html')
|
|
218
|
+
with open(template_path, 'r', encoding='utf-8') as f:
|
|
219
|
+
template = template_raw = f.read()
|
|
220
|
+
|
|
221
|
+
theme = self.t
|
|
222
|
+
replacements = {
|
|
223
|
+
'__TITLE__': self.title,
|
|
224
|
+
'__PRIMARY__': theme['primary'],
|
|
225
|
+
'__PRIMARY_RGB__': theme['primary_rgb'],
|
|
226
|
+
'__ACCENT__': theme['accent'],
|
|
227
|
+
'__BG__': theme['bg'],
|
|
228
|
+
'__BG_LIGHT__': theme['bg_light'],
|
|
229
|
+
'__TEXT__': theme['text'],
|
|
230
|
+
'__MUTED__': theme['muted'],
|
|
231
|
+
'__SECTION_BG__': theme['section_bg'],
|
|
232
|
+
'__SECTION_TEXT__': theme['section_text'],
|
|
233
|
+
'__TABLE_STRIPE__': theme['table_stripe'],
|
|
234
|
+
'__NSLIDES__': str(len(self.slides)),
|
|
235
|
+
'__TOTAL__': str(len(self.slides)),
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
for key, val in replacements.items():
|
|
239
|
+
template = template.replace(key, val)
|
|
240
|
+
|
|
241
|
+
# Navigation dots
|
|
242
|
+
dots_html = '\n'.join(
|
|
243
|
+
f'<div class="dot{" active" if i == 0 else ""}" data-idx="{i}"></div>'
|
|
244
|
+
for i in range(len(self.slides))
|
|
245
|
+
)
|
|
246
|
+
template = template.replace('__NAV_DOTS__', dots_html)
|
|
247
|
+
|
|
248
|
+
# Slides with page numbers
|
|
249
|
+
slides_html = []
|
|
250
|
+
for i, s in enumerate(self.slides):
|
|
251
|
+
modified = re.sub(
|
|
252
|
+
r'<div class="page-num">(\d+)</div>',
|
|
253
|
+
f'<div class="page-num">{i+1}</div>', s)
|
|
254
|
+
if '<div class="page-num">' not in s and 'slide-content' in s:
|
|
255
|
+
modified += f'\n <div class="page-num">{i+1}</div>'
|
|
256
|
+
if '<div class="page-num">' not in s and 'slide-figure' in s:
|
|
257
|
+
modified += f'\n <div class="page-num">{i+1}</div>'
|
|
258
|
+
if '<div class="page-num">' not in s and 'slide-table' in s:
|
|
259
|
+
modified += f'\n <div class="page-num">{i+1}</div>'
|
|
260
|
+
if '<div class="page-num">' not in s and 'slide-summary' in s:
|
|
261
|
+
modified += f'\n <div class="page-num">{i+1}</div>'
|
|
262
|
+
slides_html.append(modified)
|
|
263
|
+
|
|
264
|
+
template = template.replace('__SLIDES__', '\n'.join(slides_html))
|
|
265
|
+
|
|
266
|
+
return template
|
|
267
|
+
|
|
268
|
+
def save(self, output_path):
|
|
269
|
+
os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
|
|
270
|
+
html = self._render()
|
|
271
|
+
with open(output_path, 'w', encoding='utf-8') as f:
|
|
272
|
+
f.write(html)
|
|
273
|
+
size_kb = len(html) / 1024
|
|
274
|
+
_safe_print(f"[HtmlPPT] Saved: {output_path} ({size_kb:.0f} KB)")
|
|
275
|
+
_safe_print(f"[HtmlPPT] Slides: {len(self.slides)} | Theme: {self.t['name']}")
|
|
276
|
+
if self._missing_images:
|
|
277
|
+
_safe_print(f"[HtmlPPT] Missing images: {len(self._missing_images)}")
|
|
278
|
+
if self._warnings:
|
|
279
|
+
_safe_print(f"[HtmlPPT] Warnings: {len(self._warnings)}")
|
|
280
|
+
return output_path
|
|
281
|
+
|
|
282
|
+
def get_report(self):
|
|
283
|
+
return {
|
|
284
|
+
"theme": self.theme,
|
|
285
|
+
"total_slides": len(self.slides),
|
|
286
|
+
"missing_images": self._missing_images,
|
|
287
|
+
"warnings": self._warnings,
|
|
288
|
+
}
|