hypermark-py 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hypermark.py
ADDED
|
@@ -0,0 +1,853 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import os
|
|
3
|
+
from bs4 import BeautifulSoup
|
|
4
|
+
import emoji
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def strip_inline_formatting(text):
|
|
8
|
+
if not text:
|
|
9
|
+
return ""
|
|
10
|
+
# Strip image:  -> Image: alt (if alt exists), else empty
|
|
11
|
+
def replace_image(match):
|
|
12
|
+
alt = match.group(1).strip()
|
|
13
|
+
return f"Image: {alt}" if alt else ""
|
|
14
|
+
text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", replace_image, text)
|
|
15
|
+
|
|
16
|
+
# Strip link: [text](url) -> text (completely omitting URL to keep TTS clean)
|
|
17
|
+
text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1", text)
|
|
18
|
+
|
|
19
|
+
# Strip footnote references completely from inline text (e.g. [^1] -> "")
|
|
20
|
+
text = re.sub(r"\[\^([^\]]+)\](?!:)", "", text)
|
|
21
|
+
|
|
22
|
+
# Strip bold/italic/strikethrough/highlight/sub/super
|
|
23
|
+
text = re.sub(r"(\*\*|__)(.*?)\1", r"\2", text)
|
|
24
|
+
text = re.sub(r"(\*|_)(.*?)\1", r"\2", text)
|
|
25
|
+
text = re.sub(r"~~(.*?)~~", r"\1", text)
|
|
26
|
+
text = re.sub(r"==(.*?)==", r"\1", text)
|
|
27
|
+
text = re.sub(r"~(.*?)~", r"\1", text)
|
|
28
|
+
text = re.sub(r"\^(.*?)\^", r"\1", text)
|
|
29
|
+
|
|
30
|
+
# Clean spoilers: ||spoiler|| -> Spoiler: spoiler
|
|
31
|
+
text = re.sub(r"\|\|(.*?)\|\|", r"Spoiler: \1", text)
|
|
32
|
+
|
|
33
|
+
# Inline code
|
|
34
|
+
text = re.sub(r"`([^`]+)`", r"\1", text)
|
|
35
|
+
|
|
36
|
+
# Emojis shortcodes
|
|
37
|
+
try:
|
|
38
|
+
text = emoji.emojize(text)
|
|
39
|
+
except Exception:
|
|
40
|
+
pass
|
|
41
|
+
return text
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class HTML:
|
|
45
|
+
def __init__(self, path_md, path_html, style="default"):
|
|
46
|
+
self.path_md = path_md
|
|
47
|
+
self.path_html = path_html
|
|
48
|
+
self.style = style
|
|
49
|
+
|
|
50
|
+
# State machine variables
|
|
51
|
+
self.is_code_block = False
|
|
52
|
+
self.is_def_list = False
|
|
53
|
+
self.list_stack = []
|
|
54
|
+
self.paragraph_lines = []
|
|
55
|
+
self.blockquote_lines = []
|
|
56
|
+
self.table_lines = []
|
|
57
|
+
self.container_state = {
|
|
58
|
+
'is_active': False,
|
|
59
|
+
'type': 'note',
|
|
60
|
+
'title': 'NOTE',
|
|
61
|
+
'lines': []
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
self.html_content = []
|
|
65
|
+
self.code_block_stores = []
|
|
66
|
+
self.code_lines = []
|
|
67
|
+
self.footnotes = {}
|
|
68
|
+
|
|
69
|
+
def read(self):
|
|
70
|
+
try:
|
|
71
|
+
with open(self.path_md, 'r', encoding="utf-8") as file:
|
|
72
|
+
return file.readlines()
|
|
73
|
+
except FileNotFoundError:
|
|
74
|
+
print(f"Error: The file at {self.path_md} was not found.")
|
|
75
|
+
return None
|
|
76
|
+
except PermissionError:
|
|
77
|
+
print(f"Error: Permission denied for file {self.path_md}.")
|
|
78
|
+
return None
|
|
79
|
+
except Exception as e:
|
|
80
|
+
print(f"An unexpected error occurred while reading the file: {e}")
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
def write(self, data, mode='w'):
|
|
84
|
+
try:
|
|
85
|
+
with open(self.path_html, mode, encoding="utf-8") as file:
|
|
86
|
+
file.write(data)
|
|
87
|
+
return True
|
|
88
|
+
except PermissionError:
|
|
89
|
+
print(f"Error: Permission denied for writing to file {self.path_html}.")
|
|
90
|
+
return False
|
|
91
|
+
except Exception as e:
|
|
92
|
+
print(f"An unexpected error occurred while writing to the file: {e}")
|
|
93
|
+
return False
|
|
94
|
+
|
|
95
|
+
def Handle_escaping_characters(self, line):
|
|
96
|
+
line = re.sub(r'&', '&', line)
|
|
97
|
+
line = re.sub(r'<', '<', line)
|
|
98
|
+
line = re.sub(r'>', '>', line)
|
|
99
|
+
line = re.sub(r'"', '"', line)
|
|
100
|
+
line = re.sub(r"'", ''', line)
|
|
101
|
+
return line
|
|
102
|
+
|
|
103
|
+
def convert_emoji(self, line):
|
|
104
|
+
try:
|
|
105
|
+
line = emoji.emojize(line)
|
|
106
|
+
except ValueError:
|
|
107
|
+
pass
|
|
108
|
+
else:
|
|
109
|
+
line = emoji.emojize(line, language="alias")
|
|
110
|
+
return line
|
|
111
|
+
|
|
112
|
+
def format_inline(self, text, footnotes=None):
|
|
113
|
+
if not text:
|
|
114
|
+
return ""
|
|
115
|
+
|
|
116
|
+
placeholders = {}
|
|
117
|
+
p_counter = 0
|
|
118
|
+
|
|
119
|
+
# 1. Protect & convert Inline Code
|
|
120
|
+
def replace_code(match):
|
|
121
|
+
nonlocal p_counter
|
|
122
|
+
code_content = match.group(1)
|
|
123
|
+
escaped_code = self.Handle_escaping_characters(code_content)
|
|
124
|
+
html = f"<code>{escaped_code}</code>"
|
|
125
|
+
placeholder = f"HYPERMARKPLACEHOLDERCODE{p_counter}"
|
|
126
|
+
placeholders[placeholder] = html
|
|
127
|
+
p_counter += 1
|
|
128
|
+
return placeholder
|
|
129
|
+
|
|
130
|
+
text = re.sub(r"`([^`]+)`", replace_code, text)
|
|
131
|
+
|
|
132
|
+
# 2. Protect & convert Images (alt, url)
|
|
133
|
+
def replace_image(match):
|
|
134
|
+
nonlocal p_counter
|
|
135
|
+
alt = match.group(1)
|
|
136
|
+
url = match.group(2)
|
|
137
|
+
alt = self.convert_emoji(alt)
|
|
138
|
+
html = f'<img src="{url}" alt="{alt}">'
|
|
139
|
+
placeholder = f"HYPERMARKPLACEHOLDERIMG{p_counter}"
|
|
140
|
+
placeholders[placeholder] = html
|
|
141
|
+
p_counter += 1
|
|
142
|
+
return placeholder
|
|
143
|
+
|
|
144
|
+
text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", replace_image, text)
|
|
145
|
+
|
|
146
|
+
# 3. Protect & convert Links (text, url)
|
|
147
|
+
def replace_link(match):
|
|
148
|
+
nonlocal p_counter
|
|
149
|
+
link_text = match.group(1)
|
|
150
|
+
url = match.group(2)
|
|
151
|
+
formatted_link_text = self.format_inline(link_text, footnotes=None)
|
|
152
|
+
html = f'<a href="{url}">{formatted_link_text}</a>'
|
|
153
|
+
placeholder = f"HYPERMARKPLACEHOLDERLINK{p_counter}"
|
|
154
|
+
placeholders[placeholder] = html
|
|
155
|
+
p_counter += 1
|
|
156
|
+
return placeholder
|
|
157
|
+
|
|
158
|
+
text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", replace_link, text)
|
|
159
|
+
|
|
160
|
+
# 4. Handle Footnote References
|
|
161
|
+
if footnotes is not None:
|
|
162
|
+
def replace_footnote(match):
|
|
163
|
+
nonlocal p_counter
|
|
164
|
+
footnote_ref = match.group(1)
|
|
165
|
+
matching_id = next((key for key, value in footnotes.items() if value[1] == footnote_ref), None)
|
|
166
|
+
if matching_id is not None:
|
|
167
|
+
html = f'<sup><a href="#fn{footnote_ref}" id="{footnote_ref}">^{matching_id}</a></sup>'
|
|
168
|
+
placeholder = f"HYPERMARKPLACEHOLDERFN{p_counter}"
|
|
169
|
+
placeholders[placeholder] = html
|
|
170
|
+
p_counter += 1
|
|
171
|
+
return placeholder
|
|
172
|
+
return match.group(0)
|
|
173
|
+
|
|
174
|
+
text = re.sub(r"\[\^([^\]]+)\](?!:)", replace_footnote, text)
|
|
175
|
+
|
|
176
|
+
# 5. Escape raw text
|
|
177
|
+
text = self.Handle_escaping_characters(text)
|
|
178
|
+
|
|
179
|
+
# 6. Emojis
|
|
180
|
+
text = self.convert_emoji(text)
|
|
181
|
+
|
|
182
|
+
# 7. Apply conversions: Bold, Italic, Strikethrough, Subscript, Superscript, Highlight, Spoiler
|
|
183
|
+
text = re.sub(r"(\*\*|__)(.*?)\1", r"<strong>\2</strong>", text)
|
|
184
|
+
text = re.sub(r"(\*|_)(.*?)\1", r"<em>\2</em>", text)
|
|
185
|
+
text = re.sub(r"~~(.*?)~~", r"<del>\1</del>", text)
|
|
186
|
+
text = re.sub(r"==(.*?)==", r"<mark>\1</mark>", text)
|
|
187
|
+
text = re.sub(r"~(.*?)~", r"<sub>\1</sub>", text)
|
|
188
|
+
text = re.sub(r"\^(.*?)\^", r"<sup>\1</sup>", text)
|
|
189
|
+
text = re.sub(
|
|
190
|
+
r"\|\|(.*?)\|\|",
|
|
191
|
+
'<span class="spoiler" title="Click to reveal" onclick="this.classList.toggle(\'revealed\')">\\1</span>',
|
|
192
|
+
text
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
# 8. Restore placeholders
|
|
196
|
+
for placeholder, html in placeholders.items():
|
|
197
|
+
text = text.replace(placeholder, html)
|
|
198
|
+
|
|
199
|
+
return text
|
|
200
|
+
|
|
201
|
+
def convert_heading(self, line):
|
|
202
|
+
level_match = re.match(r"^#+", line)
|
|
203
|
+
level = len(level_match.group())
|
|
204
|
+
content = line[level:].strip()
|
|
205
|
+
|
|
206
|
+
id_match = re.search(r"\{\#([a-zA-Z0-9_-]+)\}\s*$", content)
|
|
207
|
+
heading_id = ""
|
|
208
|
+
if id_match:
|
|
209
|
+
heading_id = id_match.group(1)
|
|
210
|
+
content = content[:id_match.start()].strip()
|
|
211
|
+
|
|
212
|
+
formatted_content = self.format_inline(content, self.footnotes)
|
|
213
|
+
if heading_id:
|
|
214
|
+
return f'<h{level} id="{heading_id}">{formatted_content}</h{level}>'
|
|
215
|
+
else:
|
|
216
|
+
return f'<h{level}>{formatted_content}</h{level}>'
|
|
217
|
+
|
|
218
|
+
def convert_blockquote(self, lines):
|
|
219
|
+
formatted_lines = [f"<p>{self.format_inline(line, self.footnotes)}</p>" for line in lines]
|
|
220
|
+
return f"<blockquote>\n" + "\n".join(formatted_lines) + "\n</blockquote>"
|
|
221
|
+
|
|
222
|
+
def convert_container(self, container_type, container_title, lines):
|
|
223
|
+
html = f'<div class="custom-block {container_type}">\n'
|
|
224
|
+
html += f' <p class="custom-block-title">{container_title}</p>\n'
|
|
225
|
+
|
|
226
|
+
current_p = []
|
|
227
|
+
for line in lines:
|
|
228
|
+
stripped = line.strip()
|
|
229
|
+
if not stripped:
|
|
230
|
+
if current_p:
|
|
231
|
+
html += f' <p>{self.format_inline(" ".join(current_p), self.footnotes)}</p>\n'
|
|
232
|
+
current_p = []
|
|
233
|
+
else:
|
|
234
|
+
current_p.append(stripped)
|
|
235
|
+
if current_p:
|
|
236
|
+
html += f' <p>{self.format_inline(" ".join(current_p), self.footnotes)}</p>\n'
|
|
237
|
+
|
|
238
|
+
html += '</div>'
|
|
239
|
+
return html
|
|
240
|
+
|
|
241
|
+
def convert_table(self, lines):
|
|
242
|
+
if len(lines) < 2:
|
|
243
|
+
return "\n".join(lines)
|
|
244
|
+
|
|
245
|
+
sep_line = lines[1].strip()
|
|
246
|
+
cleaned_sep = sep_line.replace('|', '').replace(':', '').replace('-', '').strip()
|
|
247
|
+
is_valid_sep = len(cleaned_sep) == 0 and '-' in sep_line
|
|
248
|
+
|
|
249
|
+
if not is_valid_sep:
|
|
250
|
+
return "\n".join([f"<p>{self.format_inline(line, self.footnotes)}</p>" for line in lines])
|
|
251
|
+
|
|
252
|
+
table_html = '<table style="border: 1px solid black; border-collapse: collapse;">\n'
|
|
253
|
+
|
|
254
|
+
header_line = lines[0].strip()
|
|
255
|
+
if header_line.startswith('|'):
|
|
256
|
+
header_line = header_line[1:]
|
|
257
|
+
if header_line.endswith('|'):
|
|
258
|
+
header_line = header_line[:-1]
|
|
259
|
+
headers = [h.strip() for h in header_line.split('|')]
|
|
260
|
+
|
|
261
|
+
table_html += " <thead>\n <tr>\n"
|
|
262
|
+
for h in headers:
|
|
263
|
+
formatted_h = self.format_inline(h, self.footnotes)
|
|
264
|
+
table_html += f" <th style='border: 1px solid black; padding: 5px;'>{formatted_h}</th>\n"
|
|
265
|
+
table_html += " </tr>\n </thead>\n"
|
|
266
|
+
|
|
267
|
+
if sep_line.startswith('|'):
|
|
268
|
+
sep_line = sep_line[1:]
|
|
269
|
+
if sep_line.endswith('|'):
|
|
270
|
+
sep_line = sep_line[:-1]
|
|
271
|
+
align_cols = [c.strip() for c in sep_line.split('|')]
|
|
272
|
+
alignments = []
|
|
273
|
+
for c in align_cols:
|
|
274
|
+
if c.startswith(':') and c.endswith(':'):
|
|
275
|
+
alignments.append('center')
|
|
276
|
+
elif c.endswith(':'):
|
|
277
|
+
alignments.append('right')
|
|
278
|
+
else:
|
|
279
|
+
alignments.append('left')
|
|
280
|
+
|
|
281
|
+
while len(alignments) < len(headers):
|
|
282
|
+
alignments.append('left')
|
|
283
|
+
|
|
284
|
+
table_html += " <tbody>\n"
|
|
285
|
+
for line in lines[2:]:
|
|
286
|
+
line = line.strip()
|
|
287
|
+
if not line:
|
|
288
|
+
continue
|
|
289
|
+
if line.startswith('|'):
|
|
290
|
+
line = line[1:]
|
|
291
|
+
if line.endswith('|'):
|
|
292
|
+
line = line[:-1]
|
|
293
|
+
cols = [c.strip() for c in line.split('|')]
|
|
294
|
+
|
|
295
|
+
while len(cols) < len(headers):
|
|
296
|
+
cols.append('')
|
|
297
|
+
|
|
298
|
+
table_html += " <tr>\n"
|
|
299
|
+
for col, alignment in zip(cols[:len(headers)], alignments):
|
|
300
|
+
formatted_col = self.format_inline(col, self.footnotes)
|
|
301
|
+
table_html += f" <td style='border: 1px solid black; padding: 5px; text-align: {alignment};'>{formatted_col}</td>\n"
|
|
302
|
+
table_html += " </tr>\n"
|
|
303
|
+
|
|
304
|
+
table_html += " </tbody>\n"
|
|
305
|
+
table_html += '</table>\n'
|
|
306
|
+
return table_html
|
|
307
|
+
|
|
308
|
+
def flush_blocks(self):
|
|
309
|
+
if self.list_stack:
|
|
310
|
+
while self.list_stack:
|
|
311
|
+
_, ltype = self.list_stack.pop()
|
|
312
|
+
self.html_content.append(f"</{ltype}>")
|
|
313
|
+
|
|
314
|
+
if self.paragraph_lines:
|
|
315
|
+
p_text = " ".join(self.paragraph_lines)
|
|
316
|
+
self.html_content.append(f"<p>{self.format_inline(p_text, self.footnotes)}</p>")
|
|
317
|
+
self.paragraph_lines.clear()
|
|
318
|
+
|
|
319
|
+
if self.blockquote_lines:
|
|
320
|
+
self.html_content.append(self.convert_blockquote(self.blockquote_lines))
|
|
321
|
+
self.blockquote_lines.clear()
|
|
322
|
+
|
|
323
|
+
if self.table_lines:
|
|
324
|
+
self.html_content.append(self.convert_table(self.table_lines))
|
|
325
|
+
self.table_lines.clear()
|
|
326
|
+
|
|
327
|
+
if self.container_state['is_active']:
|
|
328
|
+
self.html_content.append(self.convert_container(
|
|
329
|
+
self.container_state['type'],
|
|
330
|
+
self.container_state['title'],
|
|
331
|
+
self.container_state['lines']
|
|
332
|
+
))
|
|
333
|
+
self.container_state['is_active'] = False
|
|
334
|
+
self.container_state['lines'] = []
|
|
335
|
+
|
|
336
|
+
if self.is_def_list:
|
|
337
|
+
self.html_content.append("</dl>")
|
|
338
|
+
self.is_def_list = False
|
|
339
|
+
|
|
340
|
+
def convert(self):
|
|
341
|
+
lines = self.read()
|
|
342
|
+
if lines is None:
|
|
343
|
+
return
|
|
344
|
+
|
|
345
|
+
# Setup CSS styling block
|
|
346
|
+
css_content = ""
|
|
347
|
+
css_link = ""
|
|
348
|
+
|
|
349
|
+
style_normalized = self.style.lower().strip() if isinstance(self.style, str) else ""
|
|
350
|
+
if style_normalized in ("default", "defualt"):
|
|
351
|
+
default_css_path = os.path.join(os.path.dirname(__file__), "..", "resources", "hypermark.css")
|
|
352
|
+
try:
|
|
353
|
+
if os.path.exists(default_css_path):
|
|
354
|
+
with open(default_css_path, "r", encoding="utf-8") as f:
|
|
355
|
+
css_content = f.read()
|
|
356
|
+
else:
|
|
357
|
+
css_link = "<link rel='stylesheet' href='resources/hypermark.css'>\n"
|
|
358
|
+
except Exception:
|
|
359
|
+
css_link = "<link rel='stylesheet' href='resources/hypermark.css'>\n"
|
|
360
|
+
elif self.style and os.path.exists(self.style):
|
|
361
|
+
try:
|
|
362
|
+
with open(self.style, "r", encoding="utf-8") as f:
|
|
363
|
+
css_content = f.read()
|
|
364
|
+
except Exception:
|
|
365
|
+
css_link = f"<link rel='stylesheet' href='{self.style}'>\n"
|
|
366
|
+
elif self.style:
|
|
367
|
+
css_link = f"<link rel='stylesheet' href='{self.style}'>\n"
|
|
368
|
+
|
|
369
|
+
html_initial_body = (
|
|
370
|
+
"<!DOCTYPE html>\n"
|
|
371
|
+
"<html lang=\"en\">\n"
|
|
372
|
+
"<head>\n"
|
|
373
|
+
" <meta charset=\"UTF-8\">\n"
|
|
374
|
+
" <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"
|
|
375
|
+
" <title>Markdown to HTML</title>\n"
|
|
376
|
+
" <!-- Highlight.js Styles -->\n"
|
|
377
|
+
" <link rel=\"stylesheet\" href=\"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css\">\n"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
if css_content:
|
|
381
|
+
html_initial_body += f" <style>\n{css_content}\n </style>\n"
|
|
382
|
+
elif css_link:
|
|
383
|
+
html_initial_body += f" {css_link}"
|
|
384
|
+
|
|
385
|
+
html_initial_body += (
|
|
386
|
+
" <script src=\"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js\"></script>\n"
|
|
387
|
+
" <script src=\"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/go.min.js\"></script>\n"
|
|
388
|
+
"</head>\n"
|
|
389
|
+
"<body>\n"
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
html_end_body = (
|
|
393
|
+
" <script>\n"
|
|
394
|
+
" hljs.highlightAll();\n"
|
|
395
|
+
" function copyCode(button) {\n"
|
|
396
|
+
" const container = button.closest('.code-block-container');\n"
|
|
397
|
+
" const code = container.querySelector('code');\n"
|
|
398
|
+
" navigator.clipboard.writeText(code.innerText).then(() => {\n"
|
|
399
|
+
" button.textContent = 'Copied!';\n"
|
|
400
|
+
" button.classList.add('copied');\n"
|
|
401
|
+
" setTimeout(() => {\n"
|
|
402
|
+
" button.textContent = 'Copy code';\n"
|
|
403
|
+
" button.classList.remove('copied');\n"
|
|
404
|
+
" }, 2000);\n"
|
|
405
|
+
" });\n"
|
|
406
|
+
" }\n"
|
|
407
|
+
" </script>\n"
|
|
408
|
+
"</body>\n"
|
|
409
|
+
"</html>"
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
# PASS 1: Footnote pre-scan
|
|
413
|
+
for raw_line in lines:
|
|
414
|
+
line = raw_line.replace('\r', '').replace('\n', '')
|
|
415
|
+
fn_def_match = re.match(r"^\[\^([^\]]+)\]:\s*(.*)", line)
|
|
416
|
+
if fn_def_match:
|
|
417
|
+
fn_ref = fn_def_match.group(1)
|
|
418
|
+
fn_content = fn_def_match.group(2).strip()
|
|
419
|
+
fn_id = len(self.footnotes) + 1
|
|
420
|
+
self.footnotes[fn_id] = (self.format_inline(fn_content, footnotes=None), fn_ref)
|
|
421
|
+
|
|
422
|
+
# PASS 2: Main loop
|
|
423
|
+
for idx, raw_line in enumerate(lines):
|
|
424
|
+
line = raw_line.replace('\r', '').replace('\n', '')
|
|
425
|
+
|
|
426
|
+
# Code Blocks
|
|
427
|
+
if line.strip().startswith("```"):
|
|
428
|
+
if not self.is_code_block:
|
|
429
|
+
self.flush_blocks()
|
|
430
|
+
lang = line.strip()[3:].strip()
|
|
431
|
+
lang_class = f' class="language-{lang}"' if lang else ''
|
|
432
|
+
lang_display = lang if lang else 'code'
|
|
433
|
+
self.is_code_block = True
|
|
434
|
+
self.code_lines = []
|
|
435
|
+
else:
|
|
436
|
+
code_text = "\n".join(self.code_lines)
|
|
437
|
+
block_html = (
|
|
438
|
+
f'<div class="code-block-container">\n'
|
|
439
|
+
f' <div class="code-block-header">\n'
|
|
440
|
+
f' <span class="code-block-lang">{lang_display}</span>\n'
|
|
441
|
+
f' <button class="copy-code-button" onclick="copyCode(this)">Copy code</button>\n'
|
|
442
|
+
f' </div>\n'
|
|
443
|
+
f' <pre><code{lang_class}>{code_text}</code></pre>\n'
|
|
444
|
+
f'</div>'
|
|
445
|
+
)
|
|
446
|
+
placeholder = f"HYPERMARKBLOCKCODEBLOCKPLACEHOLDER{len(self.code_block_stores)}"
|
|
447
|
+
self.code_block_stores.append(block_html)
|
|
448
|
+
self.html_content.append(placeholder)
|
|
449
|
+
self.is_code_block = False
|
|
450
|
+
continue
|
|
451
|
+
|
|
452
|
+
if self.is_code_block:
|
|
453
|
+
self.code_lines.append(self.Handle_escaping_characters(line))
|
|
454
|
+
continue
|
|
455
|
+
|
|
456
|
+
# Fenced Containers
|
|
457
|
+
if line.strip().startswith(":::"):
|
|
458
|
+
if not self.container_state['is_active']:
|
|
459
|
+
self.flush_blocks()
|
|
460
|
+
parts = line.strip()[3:].strip().split(maxsplit=1)
|
|
461
|
+
c_type = parts[0] if parts else 'note'
|
|
462
|
+
c_title = parts[1] if len(parts) > 1 else c_type.upper()
|
|
463
|
+
self.container_state['is_active'] = True
|
|
464
|
+
self.container_state['type'] = c_type
|
|
465
|
+
self.container_state['title'] = c_title
|
|
466
|
+
self.container_state['lines'] = []
|
|
467
|
+
else:
|
|
468
|
+
self.html_content.append(self.convert_container(
|
|
469
|
+
self.container_state['type'],
|
|
470
|
+
self.container_state['title'],
|
|
471
|
+
self.container_state['lines']
|
|
472
|
+
))
|
|
473
|
+
self.container_state['is_active'] = False
|
|
474
|
+
self.container_state['lines'] = []
|
|
475
|
+
continue
|
|
476
|
+
|
|
477
|
+
if self.container_state['is_active']:
|
|
478
|
+
self.container_state['lines'].append(line)
|
|
479
|
+
continue
|
|
480
|
+
|
|
481
|
+
# Footnote definitions
|
|
482
|
+
if re.match(r"^\[\^([^\]]+)\]:\s*(.*)", line):
|
|
483
|
+
self.flush_blocks()
|
|
484
|
+
continue
|
|
485
|
+
|
|
486
|
+
# Blank Lines
|
|
487
|
+
if not line.strip():
|
|
488
|
+
self.flush_blocks()
|
|
489
|
+
continue
|
|
490
|
+
|
|
491
|
+
# Headings
|
|
492
|
+
if line.startswith("#"):
|
|
493
|
+
self.flush_blocks()
|
|
494
|
+
self.html_content.append(self.convert_heading(line))
|
|
495
|
+
continue
|
|
496
|
+
|
|
497
|
+
# Horizontal Rule
|
|
498
|
+
if line.strip() == "---":
|
|
499
|
+
self.flush_blocks()
|
|
500
|
+
self.html_content.append("<hr>")
|
|
501
|
+
continue
|
|
502
|
+
|
|
503
|
+
# Blockquote
|
|
504
|
+
if line.startswith(">"):
|
|
505
|
+
if not self.blockquote_lines:
|
|
506
|
+
self.flush_blocks()
|
|
507
|
+
self.blockquote_lines.append(line[1:])
|
|
508
|
+
continue
|
|
509
|
+
|
|
510
|
+
# Tables
|
|
511
|
+
if "|" in line:
|
|
512
|
+
if self.table_lines:
|
|
513
|
+
self.table_lines.append(line)
|
|
514
|
+
continue
|
|
515
|
+
else:
|
|
516
|
+
has_separator = False
|
|
517
|
+
if idx + 1 < len(lines):
|
|
518
|
+
next_line = lines[idx + 1].replace('\r', '').replace('\n', '').strip()
|
|
519
|
+
cleaned_sep = next_line.replace('|', '').replace(':', '').replace('-', '').strip()
|
|
520
|
+
has_separator = len(cleaned_sep) == 0 and '-' in next_line
|
|
521
|
+
|
|
522
|
+
if has_separator:
|
|
523
|
+
self.flush_blocks()
|
|
524
|
+
self.table_lines.append(line)
|
|
525
|
+
continue
|
|
526
|
+
|
|
527
|
+
# Lists
|
|
528
|
+
list_match = re.match(r"^(\s*)(\d+\.\s+|[-\+\*]\s+)(.*)", line)
|
|
529
|
+
if list_match:
|
|
530
|
+
if not self.list_stack:
|
|
531
|
+
self.flush_blocks()
|
|
532
|
+
|
|
533
|
+
indent = len(list_match.group(1))
|
|
534
|
+
marker = list_match.group(2).strip()
|
|
535
|
+
content = list_match.group(3).strip()
|
|
536
|
+
list_type = "ol" if marker.endswith('.') else "ul"
|
|
537
|
+
|
|
538
|
+
task_match = re.match(r"^\[([ xX])\]\s+(.*)", content)
|
|
539
|
+
if task_match:
|
|
540
|
+
checked = task_match.group(1).lower() == 'x'
|
|
541
|
+
item_content = task_match.group(2).strip()
|
|
542
|
+
is_task = True
|
|
543
|
+
else:
|
|
544
|
+
item_content = content
|
|
545
|
+
is_task = False
|
|
546
|
+
|
|
547
|
+
while self.list_stack and indent < self.list_stack[-1][0]:
|
|
548
|
+
_, closed_type = self.list_stack.pop()
|
|
549
|
+
self.html_content.append(f"</{closed_type}>")
|
|
550
|
+
|
|
551
|
+
if not self.list_stack or indent > self.list_stack[-1][0]:
|
|
552
|
+
self.list_stack.append((indent, list_type))
|
|
553
|
+
self.html_content.append(f"<{list_type}>")
|
|
554
|
+
elif indent == self.list_stack[-1][0] and list_type != self.list_stack[-1][1]:
|
|
555
|
+
_, closed_type = self.list_stack.pop()
|
|
556
|
+
self.html_content.append(f"</{closed_type}>")
|
|
557
|
+
self.list_stack.append((indent, list_type))
|
|
558
|
+
self.html_content.append(f"<{list_type}>")
|
|
559
|
+
|
|
560
|
+
formatted_content = self.format_inline(item_content, self.footnotes)
|
|
561
|
+
if is_task:
|
|
562
|
+
chk = "checked" if checked else ""
|
|
563
|
+
self.html_content.append(f'<li style="list-style-type: none;"><input type="checkbox" {chk} disabled> {formatted_content}</li>')
|
|
564
|
+
else:
|
|
565
|
+
self.html_content.append(f'<li>{formatted_content}</li>')
|
|
566
|
+
continue
|
|
567
|
+
|
|
568
|
+
# Definition list
|
|
569
|
+
if line.startswith(":"):
|
|
570
|
+
if not self.is_def_list:
|
|
571
|
+
if self.list_stack or self.blockquote_lines or self.table_lines or self.container_state['is_active']:
|
|
572
|
+
self.flush_blocks()
|
|
573
|
+
|
|
574
|
+
term_text = " ".join(self.paragraph_lines)
|
|
575
|
+
self.paragraph_lines.clear()
|
|
576
|
+
|
|
577
|
+
self.html_content.append("<dl>")
|
|
578
|
+
self.html_content.append(f"<dt>{self.format_inline(term_text, self.footnotes)}</dt>")
|
|
579
|
+
self.is_def_list = True
|
|
580
|
+
|
|
581
|
+
definition_text = line[1:].strip()
|
|
582
|
+
self.html_content.append(f"<dd>{self.format_inline(definition_text, self.footnotes)}</dd>")
|
|
583
|
+
continue
|
|
584
|
+
|
|
585
|
+
# Fallback: Paragraph
|
|
586
|
+
if self.list_stack or self.blockquote_lines or self.table_lines or self.container_state['is_active']:
|
|
587
|
+
self.flush_blocks()
|
|
588
|
+
|
|
589
|
+
self.paragraph_lines.append(line.strip())
|
|
590
|
+
|
|
591
|
+
self.flush_blocks()
|
|
592
|
+
|
|
593
|
+
if self.footnotes:
|
|
594
|
+
self.html_content.append("<hr><h3>Footnotes</h3><ol>")
|
|
595
|
+
for fn_id, content in self.footnotes.items():
|
|
596
|
+
self.html_content.append(f'<li id="fn{content[1]}">{content[0]} <a href="#{content[1]}">^{fn_id}</a></li>')
|
|
597
|
+
self.html_content.append("</ol>")
|
|
598
|
+
|
|
599
|
+
full_html = html_initial_body + "\n".join(self.html_content) + "\n" + html_end_body
|
|
600
|
+
|
|
601
|
+
try:
|
|
602
|
+
soup = BeautifulSoup(full_html, "html.parser")
|
|
603
|
+
final_html = soup.prettify()
|
|
604
|
+
except Exception as e:
|
|
605
|
+
print(f"BeautifulSoup parsing failed: {e}. Writing raw HTML.")
|
|
606
|
+
final_html = full_html
|
|
607
|
+
|
|
608
|
+
for idx, block_html in enumerate(self.code_block_stores):
|
|
609
|
+
placeholder = f"HYPERMARKBLOCKCODEBLOCKPLACEHOLDER{idx}"
|
|
610
|
+
final_html = re.sub(rf"\s*{placeholder}\s*", f"\n{block_html}\n", final_html)
|
|
611
|
+
|
|
612
|
+
self.write(final_html)
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
class Text:
|
|
616
|
+
def __init__(self, path_md, path_text):
|
|
617
|
+
self.path_md = path_md
|
|
618
|
+
self.path_text = path_text
|
|
619
|
+
self.text_content = []
|
|
620
|
+
|
|
621
|
+
# State machine variables
|
|
622
|
+
self.is_code_block = False
|
|
623
|
+
self.paragraph_lines = []
|
|
624
|
+
self.blockquote_lines = []
|
|
625
|
+
self.table_lines = []
|
|
626
|
+
|
|
627
|
+
def read(self):
|
|
628
|
+
try:
|
|
629
|
+
with open(self.path_md, 'r', encoding="utf-8") as file:
|
|
630
|
+
return file.readlines()
|
|
631
|
+
except Exception as e:
|
|
632
|
+
print(f"Error reading file: {e}")
|
|
633
|
+
return None
|
|
634
|
+
|
|
635
|
+
def write(self, data):
|
|
636
|
+
try:
|
|
637
|
+
with open(self.path_text, 'w', encoding="utf-8") as file:
|
|
638
|
+
file.write(data)
|
|
639
|
+
return True
|
|
640
|
+
except Exception as e:
|
|
641
|
+
print(f"Error writing file: {e}")
|
|
642
|
+
return False
|
|
643
|
+
|
|
644
|
+
def flush_table(self, table_lines):
|
|
645
|
+
if not table_lines:
|
|
646
|
+
return ""
|
|
647
|
+
|
|
648
|
+
if len(table_lines) < 2:
|
|
649
|
+
return "\n".join([strip_inline_formatting(line) for line in table_lines])
|
|
650
|
+
|
|
651
|
+
sep_line = table_lines[1].strip()
|
|
652
|
+
cleaned_sep = sep_line.replace('|', '').replace(':', '').replace('-', '').strip()
|
|
653
|
+
is_valid_sep = len(cleaned_sep) == 0 and '-' in sep_line
|
|
654
|
+
|
|
655
|
+
if not is_valid_sep:
|
|
656
|
+
return "\n".join([strip_inline_formatting(line) for line in table_lines])
|
|
657
|
+
|
|
658
|
+
formatted_rows = []
|
|
659
|
+
header_line = table_lines[0].strip()
|
|
660
|
+
if header_line.startswith('|'):
|
|
661
|
+
header_line = header_line[1:]
|
|
662
|
+
if header_line.endswith('|'):
|
|
663
|
+
header_line = header_line[:-1]
|
|
664
|
+
headers = [strip_inline_formatting(h.strip()) for h in header_line.split('|')]
|
|
665
|
+
|
|
666
|
+
for line in table_lines[2:]:
|
|
667
|
+
line = line.strip()
|
|
668
|
+
if not line:
|
|
669
|
+
continue
|
|
670
|
+
if line.startswith('|'):
|
|
671
|
+
line = line[1:]
|
|
672
|
+
if line.endswith('|'):
|
|
673
|
+
line = line[:-1]
|
|
674
|
+
cols = [strip_inline_formatting(c.strip()) for c in line.split('|')]
|
|
675
|
+
|
|
676
|
+
row_parts = []
|
|
677
|
+
for idx, col in enumerate(cols):
|
|
678
|
+
header = headers[idx] if idx < len(headers) else f"Column {idx + 1}"
|
|
679
|
+
if header and col:
|
|
680
|
+
row_parts.append(f"{header}: {col}")
|
|
681
|
+
elif col:
|
|
682
|
+
row_parts.append(col)
|
|
683
|
+
if row_parts:
|
|
684
|
+
formatted_rows.append("Row: " + "; ".join(row_parts) + ".")
|
|
685
|
+
|
|
686
|
+
if formatted_rows:
|
|
687
|
+
return "Table data:\n" + "\n".join(formatted_rows)
|
|
688
|
+
return ""
|
|
689
|
+
|
|
690
|
+
def flush_blocks(self):
|
|
691
|
+
if self.paragraph_lines:
|
|
692
|
+
p_text = " ".join(self.paragraph_lines)
|
|
693
|
+
self.text_content.append(strip_inline_formatting(p_text))
|
|
694
|
+
self.paragraph_lines.clear()
|
|
695
|
+
|
|
696
|
+
if self.blockquote_lines:
|
|
697
|
+
for line in self.blockquote_lines:
|
|
698
|
+
self.text_content.append(f" {strip_inline_formatting(line)}")
|
|
699
|
+
self.blockquote_lines.clear()
|
|
700
|
+
|
|
701
|
+
if self.table_lines:
|
|
702
|
+
self.text_content.append(self.flush_table(self.table_lines))
|
|
703
|
+
self.table_lines.clear()
|
|
704
|
+
|
|
705
|
+
def convert(self):
|
|
706
|
+
lines = self.read()
|
|
707
|
+
if lines is None:
|
|
708
|
+
return
|
|
709
|
+
|
|
710
|
+
for idx, raw_line in enumerate(lines):
|
|
711
|
+
line = raw_line.replace('\r', '').replace('\n', '')
|
|
712
|
+
|
|
713
|
+
# Code Blocks
|
|
714
|
+
if line.strip().startswith("```"):
|
|
715
|
+
if not self.is_code_block:
|
|
716
|
+
self.flush_blocks()
|
|
717
|
+
self.is_code_block = True
|
|
718
|
+
self.text_content.append("\nCode block starts:\n")
|
|
719
|
+
else:
|
|
720
|
+
self.is_code_block = False
|
|
721
|
+
self.text_content.append("\nCode block ends.\n")
|
|
722
|
+
continue
|
|
723
|
+
|
|
724
|
+
if self.is_code_block:
|
|
725
|
+
self.text_content.append(line)
|
|
726
|
+
continue
|
|
727
|
+
|
|
728
|
+
# Blank Lines
|
|
729
|
+
if not line.strip():
|
|
730
|
+
self.flush_blocks()
|
|
731
|
+
self.text_content.append("")
|
|
732
|
+
continue
|
|
733
|
+
|
|
734
|
+
# Horizontal Rule
|
|
735
|
+
if line.strip() == "---":
|
|
736
|
+
self.flush_blocks()
|
|
737
|
+
self.text_content.append("")
|
|
738
|
+
continue
|
|
739
|
+
|
|
740
|
+
# Fenced Containers (Alert Boxes)
|
|
741
|
+
if line.strip().startswith(":::"):
|
|
742
|
+
self.flush_blocks()
|
|
743
|
+
parts = line.strip()[3:].strip().split(maxsplit=1)
|
|
744
|
+
if parts:
|
|
745
|
+
c_type = parts[0]
|
|
746
|
+
c_title = parts[1] if len(parts) > 1 else c_type.upper()
|
|
747
|
+
self.text_content.append(f"\n{c_title}:\n")
|
|
748
|
+
continue
|
|
749
|
+
|
|
750
|
+
# Heading
|
|
751
|
+
if line.startswith("#"):
|
|
752
|
+
self.flush_blocks()
|
|
753
|
+
level_match = re.match(r"^#+", line)
|
|
754
|
+
level = len(level_match.group())
|
|
755
|
+
content = line[level:].strip()
|
|
756
|
+
content = re.sub(r"\{\#([a-zA-Z0-9_-]+)\}\s*$", "", content).strip()
|
|
757
|
+
stripped_content = strip_inline_formatting(content)
|
|
758
|
+
self.text_content.append(f"\n{stripped_content}\n")
|
|
759
|
+
continue
|
|
760
|
+
|
|
761
|
+
# Blockquote
|
|
762
|
+
if line.startswith(">"):
|
|
763
|
+
if not self.blockquote_lines:
|
|
764
|
+
self.flush_blocks()
|
|
765
|
+
self.blockquote_lines.append(line[1:])
|
|
766
|
+
continue
|
|
767
|
+
|
|
768
|
+
# Table
|
|
769
|
+
if "|" in line:
|
|
770
|
+
if self.table_lines:
|
|
771
|
+
self.table_lines.append(line)
|
|
772
|
+
continue
|
|
773
|
+
else:
|
|
774
|
+
has_separator = False
|
|
775
|
+
if idx + 1 < len(lines):
|
|
776
|
+
next_line = lines[idx + 1].replace('\r', '').replace('\n', '').strip()
|
|
777
|
+
cleaned_sep = next_line.replace('|', '').replace(':', '').replace('-', '').strip()
|
|
778
|
+
has_separator = len(cleaned_sep) == 0 and '-' in next_line
|
|
779
|
+
|
|
780
|
+
if has_separator:
|
|
781
|
+
self.flush_blocks()
|
|
782
|
+
self.table_lines.append(line)
|
|
783
|
+
continue
|
|
784
|
+
|
|
785
|
+
# Definition List
|
|
786
|
+
if line.startswith(":"):
|
|
787
|
+
# Definition list uses the preceding paragraph as the term.
|
|
788
|
+
# Flush everything except paragraph lines
|
|
789
|
+
if self.blockquote_lines or self.table_lines:
|
|
790
|
+
self.flush_blocks()
|
|
791
|
+
|
|
792
|
+
term_text = " ".join(self.paragraph_lines)
|
|
793
|
+
self.paragraph_lines.clear()
|
|
794
|
+
|
|
795
|
+
if term_text:
|
|
796
|
+
self.text_content.append(strip_inline_formatting(term_text))
|
|
797
|
+
|
|
798
|
+
content = line[1:].strip()
|
|
799
|
+
self.text_content.append(f" {strip_inline_formatting(content)}")
|
|
800
|
+
continue
|
|
801
|
+
|
|
802
|
+
# List Item
|
|
803
|
+
list_match = re.match(r"^(\s*)(\d+\.\s+|[-\+\*]\s+)(.*)", line)
|
|
804
|
+
if list_match:
|
|
805
|
+
self.flush_blocks()
|
|
806
|
+
indent = list_match.group(1)
|
|
807
|
+
marker = list_match.group(2).strip()
|
|
808
|
+
content = list_match.group(3).strip()
|
|
809
|
+
|
|
810
|
+
# Check for task checkbox
|
|
811
|
+
task_match = re.match(r"^\[([ xX])\]\s+(.*)", content)
|
|
812
|
+
if task_match:
|
|
813
|
+
checked = task_match.group(1).lower() == 'x'
|
|
814
|
+
item_content = task_match.group(2).strip()
|
|
815
|
+
status = "Completed: " if checked else "Todo: "
|
|
816
|
+
stripped_content = status + strip_inline_formatting(item_content)
|
|
817
|
+
else:
|
|
818
|
+
stripped_content = strip_inline_formatting(content)
|
|
819
|
+
|
|
820
|
+
if marker.endswith('.'):
|
|
821
|
+
self.text_content.append(f"{indent}{marker} {stripped_content}")
|
|
822
|
+
else:
|
|
823
|
+
self.text_content.append(f"{indent}{stripped_content}")
|
|
824
|
+
continue
|
|
825
|
+
|
|
826
|
+
# Footnote definitions
|
|
827
|
+
fn_def_match = re.match(r"^\[\^([^\]]+)\]:\s*(.*)", line)
|
|
828
|
+
if fn_def_match:
|
|
829
|
+
self.flush_blocks()
|
|
830
|
+
fn_ref = fn_def_match.group(1)
|
|
831
|
+
fn_content = fn_def_match.group(2).strip()
|
|
832
|
+
self.text_content.append(f"Footnote {fn_ref}: {strip_inline_formatting(fn_content)}")
|
|
833
|
+
continue
|
|
834
|
+
|
|
835
|
+
# Fallback: Paragraph
|
|
836
|
+
if self.blockquote_lines or self.table_lines:
|
|
837
|
+
self.flush_blocks()
|
|
838
|
+
self.paragraph_lines.append(line.strip())
|
|
839
|
+
|
|
840
|
+
self.flush_blocks()
|
|
841
|
+
|
|
842
|
+
final_text = "\n".join(self.text_content)
|
|
843
|
+
self.write(final_text)
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
def html(path_md, path_html, style="default"):
|
|
847
|
+
parser = HTML(path_md, path_html, style)
|
|
848
|
+
parser.convert()
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def text(path_md, path_text):
|
|
852
|
+
parser = Text(path_md, path_text)
|
|
853
|
+
parser.convert()
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: hypermark-py
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A Python package for converting Markdown to HTML
|
|
5
|
+
Home-page: https://github.com/SwapCodesDev/hypermark
|
|
6
|
+
Author: SwapCodesDev
|
|
7
|
+
Author-email: swapcodes.dev@gmail.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Requires-Python: >=3.6
|
|
13
|
+
Description-Content-Type: text/markdown
|
|
14
|
+
Requires-Dist: beautifulsoup4
|
|
15
|
+
Requires-Dist: emoji
|
|
16
|
+
Dynamic: author
|
|
17
|
+
Dynamic: author-email
|
|
18
|
+
Dynamic: classifier
|
|
19
|
+
Dynamic: description
|
|
20
|
+
Dynamic: description-content-type
|
|
21
|
+
Dynamic: home-page
|
|
22
|
+
Dynamic: license
|
|
23
|
+
Dynamic: requires-dist
|
|
24
|
+
Dynamic: requires-python
|
|
25
|
+
Dynamic: summary
|
|
26
|
+
|
|
27
|
+
``` bash
|
|
28
|
+
pip install git+https://github.com/SwapCodesDev/Markdown-to-HTML.git@master#subdirectory=python
|
|
29
|
+
```
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
hypermark.py,sha256=RsYJKzE0BXo3ApZjDRzR_iHWoSU4DIxhKtOTP6Y_96g,33569
|
|
2
|
+
hypermark_py-1.0.0.dist-info/METADATA,sha256=TjPKOB-bBXfqhUooUXfpUcQuT7igRorMLBSRBX4J-Pg,823
|
|
3
|
+
hypermark_py-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
4
|
+
hypermark_py-1.0.0.dist-info/top_level.txt,sha256=FosEiKNXitNw6ZPz_JsmAmuhl5jJwwpO1Dvgcm0IeMA,10
|
|
5
|
+
hypermark_py-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
hypermark
|