hypermark-py 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
hypermark.py ADDED
@@ -0,0 +1,853 @@
1
+ import re
2
+ import os
3
+ from bs4 import BeautifulSoup
4
+ import emoji
5
+
6
+
7
+ def strip_inline_formatting(text):
8
+ if not text:
9
+ return ""
10
+ # Strip image: ![alt](url) -> Image: alt (if alt exists), else empty
11
+ def replace_image(match):
12
+ alt = match.group(1).strip()
13
+ return f"Image: {alt}" if alt else ""
14
+ text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", replace_image, text)
15
+
16
+ # Strip link: [text](url) -> text (completely omitting URL to keep TTS clean)
17
+ text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"\1", text)
18
+
19
+ # Strip footnote references completely from inline text (e.g. [^1] -> "")
20
+ text = re.sub(r"\[\^([^\]]+)\](?!:)", "", text)
21
+
22
+ # Strip bold/italic/strikethrough/highlight/sub/super
23
+ text = re.sub(r"(\*\*|__)(.*?)\1", r"\2", text)
24
+ text = re.sub(r"(\*|_)(.*?)\1", r"\2", text)
25
+ text = re.sub(r"~~(.*?)~~", r"\1", text)
26
+ text = re.sub(r"==(.*?)==", r"\1", text)
27
+ text = re.sub(r"~(.*?)~", r"\1", text)
28
+ text = re.sub(r"\^(.*?)\^", r"\1", text)
29
+
30
+ # Clean spoilers: ||spoiler|| -> Spoiler: spoiler
31
+ text = re.sub(r"\|\|(.*?)\|\|", r"Spoiler: \1", text)
32
+
33
+ # Inline code
34
+ text = re.sub(r"`([^`]+)`", r"\1", text)
35
+
36
+ # Emojis shortcodes
37
+ try:
38
+ text = emoji.emojize(text)
39
+ except Exception:
40
+ pass
41
+ return text
42
+
43
+
44
+ class HTML:
45
+ def __init__(self, path_md, path_html, style="default"):
46
+ self.path_md = path_md
47
+ self.path_html = path_html
48
+ self.style = style
49
+
50
+ # State machine variables
51
+ self.is_code_block = False
52
+ self.is_def_list = False
53
+ self.list_stack = []
54
+ self.paragraph_lines = []
55
+ self.blockquote_lines = []
56
+ self.table_lines = []
57
+ self.container_state = {
58
+ 'is_active': False,
59
+ 'type': 'note',
60
+ 'title': 'NOTE',
61
+ 'lines': []
62
+ }
63
+
64
+ self.html_content = []
65
+ self.code_block_stores = []
66
+ self.code_lines = []
67
+ self.footnotes = {}
68
+
69
+ def read(self):
70
+ try:
71
+ with open(self.path_md, 'r', encoding="utf-8") as file:
72
+ return file.readlines()
73
+ except FileNotFoundError:
74
+ print(f"Error: The file at {self.path_md} was not found.")
75
+ return None
76
+ except PermissionError:
77
+ print(f"Error: Permission denied for file {self.path_md}.")
78
+ return None
79
+ except Exception as e:
80
+ print(f"An unexpected error occurred while reading the file: {e}")
81
+ return None
82
+
83
+ def write(self, data, mode='w'):
84
+ try:
85
+ with open(self.path_html, mode, encoding="utf-8") as file:
86
+ file.write(data)
87
+ return True
88
+ except PermissionError:
89
+ print(f"Error: Permission denied for writing to file {self.path_html}.")
90
+ return False
91
+ except Exception as e:
92
+ print(f"An unexpected error occurred while writing to the file: {e}")
93
+ return False
94
+
95
+ def Handle_escaping_characters(self, line):
96
+ line = re.sub(r'&', '&', line)
97
+ line = re.sub(r'<', '&lt;', line)
98
+ line = re.sub(r'>', '&gt;', line)
99
+ line = re.sub(r'"', '&quot;', line)
100
+ line = re.sub(r"'", '&#39;', line)
101
+ return line
102
+
103
+ def convert_emoji(self, line):
104
+ try:
105
+ line = emoji.emojize(line)
106
+ except ValueError:
107
+ pass
108
+ else:
109
+ line = emoji.emojize(line, language="alias")
110
+ return line
111
+
112
+ def format_inline(self, text, footnotes=None):
113
+ if not text:
114
+ return ""
115
+
116
+ placeholders = {}
117
+ p_counter = 0
118
+
119
+ # 1. Protect & convert Inline Code
120
+ def replace_code(match):
121
+ nonlocal p_counter
122
+ code_content = match.group(1)
123
+ escaped_code = self.Handle_escaping_characters(code_content)
124
+ html = f"<code>{escaped_code}</code>"
125
+ placeholder = f"HYPERMARKPLACEHOLDERCODE{p_counter}"
126
+ placeholders[placeholder] = html
127
+ p_counter += 1
128
+ return placeholder
129
+
130
+ text = re.sub(r"`([^`]+)`", replace_code, text)
131
+
132
+ # 2. Protect & convert Images (alt, url)
133
+ def replace_image(match):
134
+ nonlocal p_counter
135
+ alt = match.group(1)
136
+ url = match.group(2)
137
+ alt = self.convert_emoji(alt)
138
+ html = f'<img src="{url}" alt="{alt}">'
139
+ placeholder = f"HYPERMARKPLACEHOLDERIMG{p_counter}"
140
+ placeholders[placeholder] = html
141
+ p_counter += 1
142
+ return placeholder
143
+
144
+ text = re.sub(r"!\[([^\]]*)\]\(([^)]+)\)", replace_image, text)
145
+
146
+ # 3. Protect & convert Links (text, url)
147
+ def replace_link(match):
148
+ nonlocal p_counter
149
+ link_text = match.group(1)
150
+ url = match.group(2)
151
+ formatted_link_text = self.format_inline(link_text, footnotes=None)
152
+ html = f'<a href="{url}">{formatted_link_text}</a>'
153
+ placeholder = f"HYPERMARKPLACEHOLDERLINK{p_counter}"
154
+ placeholders[placeholder] = html
155
+ p_counter += 1
156
+ return placeholder
157
+
158
+ text = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", replace_link, text)
159
+
160
+ # 4. Handle Footnote References
161
+ if footnotes is not None:
162
+ def replace_footnote(match):
163
+ nonlocal p_counter
164
+ footnote_ref = match.group(1)
165
+ matching_id = next((key for key, value in footnotes.items() if value[1] == footnote_ref), None)
166
+ if matching_id is not None:
167
+ html = f'<sup><a href="#fn{footnote_ref}" id="{footnote_ref}">^{matching_id}</a></sup>'
168
+ placeholder = f"HYPERMARKPLACEHOLDERFN{p_counter}"
169
+ placeholders[placeholder] = html
170
+ p_counter += 1
171
+ return placeholder
172
+ return match.group(0)
173
+
174
+ text = re.sub(r"\[\^([^\]]+)\](?!:)", replace_footnote, text)
175
+
176
+ # 5. Escape raw text
177
+ text = self.Handle_escaping_characters(text)
178
+
179
+ # 6. Emojis
180
+ text = self.convert_emoji(text)
181
+
182
+ # 7. Apply conversions: Bold, Italic, Strikethrough, Subscript, Superscript, Highlight, Spoiler
183
+ text = re.sub(r"(\*\*|__)(.*?)\1", r"<strong>\2</strong>", text)
184
+ text = re.sub(r"(\*|_)(.*?)\1", r"<em>\2</em>", text)
185
+ text = re.sub(r"~~(.*?)~~", r"<del>\1</del>", text)
186
+ text = re.sub(r"==(.*?)==", r"<mark>\1</mark>", text)
187
+ text = re.sub(r"~(.*?)~", r"<sub>\1</sub>", text)
188
+ text = re.sub(r"\^(.*?)\^", r"<sup>\1</sup>", text)
189
+ text = re.sub(
190
+ r"\|\|(.*?)\|\|",
191
+ '<span class="spoiler" title="Click to reveal" onclick="this.classList.toggle(\'revealed\')">\\1</span>',
192
+ text
193
+ )
194
+
195
+ # 8. Restore placeholders
196
+ for placeholder, html in placeholders.items():
197
+ text = text.replace(placeholder, html)
198
+
199
+ return text
200
+
201
+ def convert_heading(self, line):
202
+ level_match = re.match(r"^#+", line)
203
+ level = len(level_match.group())
204
+ content = line[level:].strip()
205
+
206
+ id_match = re.search(r"\{\#([a-zA-Z0-9_-]+)\}\s*$", content)
207
+ heading_id = ""
208
+ if id_match:
209
+ heading_id = id_match.group(1)
210
+ content = content[:id_match.start()].strip()
211
+
212
+ formatted_content = self.format_inline(content, self.footnotes)
213
+ if heading_id:
214
+ return f'<h{level} id="{heading_id}">{formatted_content}</h{level}>'
215
+ else:
216
+ return f'<h{level}>{formatted_content}</h{level}>'
217
+
218
+ def convert_blockquote(self, lines):
219
+ formatted_lines = [f"<p>{self.format_inline(line, self.footnotes)}</p>" for line in lines]
220
+ return f"<blockquote>\n" + "\n".join(formatted_lines) + "\n</blockquote>"
221
+
222
+ def convert_container(self, container_type, container_title, lines):
223
+ html = f'<div class="custom-block {container_type}">\n'
224
+ html += f' <p class="custom-block-title">{container_title}</p>\n'
225
+
226
+ current_p = []
227
+ for line in lines:
228
+ stripped = line.strip()
229
+ if not stripped:
230
+ if current_p:
231
+ html += f' <p>{self.format_inline(" ".join(current_p), self.footnotes)}</p>\n'
232
+ current_p = []
233
+ else:
234
+ current_p.append(stripped)
235
+ if current_p:
236
+ html += f' <p>{self.format_inline(" ".join(current_p), self.footnotes)}</p>\n'
237
+
238
+ html += '</div>'
239
+ return html
240
+
241
+ def convert_table(self, lines):
242
+ if len(lines) < 2:
243
+ return "\n".join(lines)
244
+
245
+ sep_line = lines[1].strip()
246
+ cleaned_sep = sep_line.replace('|', '').replace(':', '').replace('-', '').strip()
247
+ is_valid_sep = len(cleaned_sep) == 0 and '-' in sep_line
248
+
249
+ if not is_valid_sep:
250
+ return "\n".join([f"<p>{self.format_inline(line, self.footnotes)}</p>" for line in lines])
251
+
252
+ table_html = '<table style="border: 1px solid black; border-collapse: collapse;">\n'
253
+
254
+ header_line = lines[0].strip()
255
+ if header_line.startswith('|'):
256
+ header_line = header_line[1:]
257
+ if header_line.endswith('|'):
258
+ header_line = header_line[:-1]
259
+ headers = [h.strip() for h in header_line.split('|')]
260
+
261
+ table_html += " <thead>\n <tr>\n"
262
+ for h in headers:
263
+ formatted_h = self.format_inline(h, self.footnotes)
264
+ table_html += f" <th style='border: 1px solid black; padding: 5px;'>{formatted_h}</th>\n"
265
+ table_html += " </tr>\n </thead>\n"
266
+
267
+ if sep_line.startswith('|'):
268
+ sep_line = sep_line[1:]
269
+ if sep_line.endswith('|'):
270
+ sep_line = sep_line[:-1]
271
+ align_cols = [c.strip() for c in sep_line.split('|')]
272
+ alignments = []
273
+ for c in align_cols:
274
+ if c.startswith(':') and c.endswith(':'):
275
+ alignments.append('center')
276
+ elif c.endswith(':'):
277
+ alignments.append('right')
278
+ else:
279
+ alignments.append('left')
280
+
281
+ while len(alignments) < len(headers):
282
+ alignments.append('left')
283
+
284
+ table_html += " <tbody>\n"
285
+ for line in lines[2:]:
286
+ line = line.strip()
287
+ if not line:
288
+ continue
289
+ if line.startswith('|'):
290
+ line = line[1:]
291
+ if line.endswith('|'):
292
+ line = line[:-1]
293
+ cols = [c.strip() for c in line.split('|')]
294
+
295
+ while len(cols) < len(headers):
296
+ cols.append('')
297
+
298
+ table_html += " <tr>\n"
299
+ for col, alignment in zip(cols[:len(headers)], alignments):
300
+ formatted_col = self.format_inline(col, self.footnotes)
301
+ table_html += f" <td style='border: 1px solid black; padding: 5px; text-align: {alignment};'>{formatted_col}</td>\n"
302
+ table_html += " </tr>\n"
303
+
304
+ table_html += " </tbody>\n"
305
+ table_html += '</table>\n'
306
+ return table_html
307
+
308
+ def flush_blocks(self):
309
+ if self.list_stack:
310
+ while self.list_stack:
311
+ _, ltype = self.list_stack.pop()
312
+ self.html_content.append(f"</{ltype}>")
313
+
314
+ if self.paragraph_lines:
315
+ p_text = " ".join(self.paragraph_lines)
316
+ self.html_content.append(f"<p>{self.format_inline(p_text, self.footnotes)}</p>")
317
+ self.paragraph_lines.clear()
318
+
319
+ if self.blockquote_lines:
320
+ self.html_content.append(self.convert_blockquote(self.blockquote_lines))
321
+ self.blockquote_lines.clear()
322
+
323
+ if self.table_lines:
324
+ self.html_content.append(self.convert_table(self.table_lines))
325
+ self.table_lines.clear()
326
+
327
+ if self.container_state['is_active']:
328
+ self.html_content.append(self.convert_container(
329
+ self.container_state['type'],
330
+ self.container_state['title'],
331
+ self.container_state['lines']
332
+ ))
333
+ self.container_state['is_active'] = False
334
+ self.container_state['lines'] = []
335
+
336
+ if self.is_def_list:
337
+ self.html_content.append("</dl>")
338
+ self.is_def_list = False
339
+
340
+ def convert(self):
341
+ lines = self.read()
342
+ if lines is None:
343
+ return
344
+
345
+ # Setup CSS styling block
346
+ css_content = ""
347
+ css_link = ""
348
+
349
+ style_normalized = self.style.lower().strip() if isinstance(self.style, str) else ""
350
+ if style_normalized in ("default", "defualt"):
351
+ default_css_path = os.path.join(os.path.dirname(__file__), "..", "resources", "hypermark.css")
352
+ try:
353
+ if os.path.exists(default_css_path):
354
+ with open(default_css_path, "r", encoding="utf-8") as f:
355
+ css_content = f.read()
356
+ else:
357
+ css_link = "<link rel='stylesheet' href='resources/hypermark.css'>\n"
358
+ except Exception:
359
+ css_link = "<link rel='stylesheet' href='resources/hypermark.css'>\n"
360
+ elif self.style and os.path.exists(self.style):
361
+ try:
362
+ with open(self.style, "r", encoding="utf-8") as f:
363
+ css_content = f.read()
364
+ except Exception:
365
+ css_link = f"<link rel='stylesheet' href='{self.style}'>\n"
366
+ elif self.style:
367
+ css_link = f"<link rel='stylesheet' href='{self.style}'>\n"
368
+
369
+ html_initial_body = (
370
+ "<!DOCTYPE html>\n"
371
+ "<html lang=\"en\">\n"
372
+ "<head>\n"
373
+ " <meta charset=\"UTF-8\">\n"
374
+ " <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n"
375
+ " <title>Markdown to HTML</title>\n"
376
+ " <!-- Highlight.js Styles -->\n"
377
+ " <link rel=\"stylesheet\" href=\"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css\">\n"
378
+ )
379
+
380
+ if css_content:
381
+ html_initial_body += f" <style>\n{css_content}\n </style>\n"
382
+ elif css_link:
383
+ html_initial_body += f" {css_link}"
384
+
385
+ html_initial_body += (
386
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js\"></script>\n"
387
+ " <script src=\"https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/go.min.js\"></script>\n"
388
+ "</head>\n"
389
+ "<body>\n"
390
+ )
391
+
392
+ html_end_body = (
393
+ " <script>\n"
394
+ " hljs.highlightAll();\n"
395
+ " function copyCode(button) {\n"
396
+ " const container = button.closest('.code-block-container');\n"
397
+ " const code = container.querySelector('code');\n"
398
+ " navigator.clipboard.writeText(code.innerText).then(() => {\n"
399
+ " button.textContent = 'Copied!';\n"
400
+ " button.classList.add('copied');\n"
401
+ " setTimeout(() => {\n"
402
+ " button.textContent = 'Copy code';\n"
403
+ " button.classList.remove('copied');\n"
404
+ " }, 2000);\n"
405
+ " });\n"
406
+ " }\n"
407
+ " </script>\n"
408
+ "</body>\n"
409
+ "</html>"
410
+ )
411
+
412
+ # PASS 1: Footnote pre-scan
413
+ for raw_line in lines:
414
+ line = raw_line.replace('\r', '').replace('\n', '')
415
+ fn_def_match = re.match(r"^\[\^([^\]]+)\]:\s*(.*)", line)
416
+ if fn_def_match:
417
+ fn_ref = fn_def_match.group(1)
418
+ fn_content = fn_def_match.group(2).strip()
419
+ fn_id = len(self.footnotes) + 1
420
+ self.footnotes[fn_id] = (self.format_inline(fn_content, footnotes=None), fn_ref)
421
+
422
+ # PASS 2: Main loop
423
+ for idx, raw_line in enumerate(lines):
424
+ line = raw_line.replace('\r', '').replace('\n', '')
425
+
426
+ # Code Blocks
427
+ if line.strip().startswith("```"):
428
+ if not self.is_code_block:
429
+ self.flush_blocks()
430
+ lang = line.strip()[3:].strip()
431
+ lang_class = f' class="language-{lang}"' if lang else ''
432
+ lang_display = lang if lang else 'code'
433
+ self.is_code_block = True
434
+ self.code_lines = []
435
+ else:
436
+ code_text = "\n".join(self.code_lines)
437
+ block_html = (
438
+ f'<div class="code-block-container">\n'
439
+ f' <div class="code-block-header">\n'
440
+ f' <span class="code-block-lang">{lang_display}</span>\n'
441
+ f' <button class="copy-code-button" onclick="copyCode(this)">Copy code</button>\n'
442
+ f' </div>\n'
443
+ f' <pre><code{lang_class}>{code_text}</code></pre>\n'
444
+ f'</div>'
445
+ )
446
+ placeholder = f"HYPERMARKBLOCKCODEBLOCKPLACEHOLDER{len(self.code_block_stores)}"
447
+ self.code_block_stores.append(block_html)
448
+ self.html_content.append(placeholder)
449
+ self.is_code_block = False
450
+ continue
451
+
452
+ if self.is_code_block:
453
+ self.code_lines.append(self.Handle_escaping_characters(line))
454
+ continue
455
+
456
+ # Fenced Containers
457
+ if line.strip().startswith(":::"):
458
+ if not self.container_state['is_active']:
459
+ self.flush_blocks()
460
+ parts = line.strip()[3:].strip().split(maxsplit=1)
461
+ c_type = parts[0] if parts else 'note'
462
+ c_title = parts[1] if len(parts) > 1 else c_type.upper()
463
+ self.container_state['is_active'] = True
464
+ self.container_state['type'] = c_type
465
+ self.container_state['title'] = c_title
466
+ self.container_state['lines'] = []
467
+ else:
468
+ self.html_content.append(self.convert_container(
469
+ self.container_state['type'],
470
+ self.container_state['title'],
471
+ self.container_state['lines']
472
+ ))
473
+ self.container_state['is_active'] = False
474
+ self.container_state['lines'] = []
475
+ continue
476
+
477
+ if self.container_state['is_active']:
478
+ self.container_state['lines'].append(line)
479
+ continue
480
+
481
+ # Footnote definitions
482
+ if re.match(r"^\[\^([^\]]+)\]:\s*(.*)", line):
483
+ self.flush_blocks()
484
+ continue
485
+
486
+ # Blank Lines
487
+ if not line.strip():
488
+ self.flush_blocks()
489
+ continue
490
+
491
+ # Headings
492
+ if line.startswith("#"):
493
+ self.flush_blocks()
494
+ self.html_content.append(self.convert_heading(line))
495
+ continue
496
+
497
+ # Horizontal Rule
498
+ if line.strip() == "---":
499
+ self.flush_blocks()
500
+ self.html_content.append("<hr>")
501
+ continue
502
+
503
+ # Blockquote
504
+ if line.startswith(">"):
505
+ if not self.blockquote_lines:
506
+ self.flush_blocks()
507
+ self.blockquote_lines.append(line[1:])
508
+ continue
509
+
510
+ # Tables
511
+ if "|" in line:
512
+ if self.table_lines:
513
+ self.table_lines.append(line)
514
+ continue
515
+ else:
516
+ has_separator = False
517
+ if idx + 1 < len(lines):
518
+ next_line = lines[idx + 1].replace('\r', '').replace('\n', '').strip()
519
+ cleaned_sep = next_line.replace('|', '').replace(':', '').replace('-', '').strip()
520
+ has_separator = len(cleaned_sep) == 0 and '-' in next_line
521
+
522
+ if has_separator:
523
+ self.flush_blocks()
524
+ self.table_lines.append(line)
525
+ continue
526
+
527
+ # Lists
528
+ list_match = re.match(r"^(\s*)(\d+\.\s+|[-\+\*]\s+)(.*)", line)
529
+ if list_match:
530
+ if not self.list_stack:
531
+ self.flush_blocks()
532
+
533
+ indent = len(list_match.group(1))
534
+ marker = list_match.group(2).strip()
535
+ content = list_match.group(3).strip()
536
+ list_type = "ol" if marker.endswith('.') else "ul"
537
+
538
+ task_match = re.match(r"^\[([ xX])\]\s+(.*)", content)
539
+ if task_match:
540
+ checked = task_match.group(1).lower() == 'x'
541
+ item_content = task_match.group(2).strip()
542
+ is_task = True
543
+ else:
544
+ item_content = content
545
+ is_task = False
546
+
547
+ while self.list_stack and indent < self.list_stack[-1][0]:
548
+ _, closed_type = self.list_stack.pop()
549
+ self.html_content.append(f"</{closed_type}>")
550
+
551
+ if not self.list_stack or indent > self.list_stack[-1][0]:
552
+ self.list_stack.append((indent, list_type))
553
+ self.html_content.append(f"<{list_type}>")
554
+ elif indent == self.list_stack[-1][0] and list_type != self.list_stack[-1][1]:
555
+ _, closed_type = self.list_stack.pop()
556
+ self.html_content.append(f"</{closed_type}>")
557
+ self.list_stack.append((indent, list_type))
558
+ self.html_content.append(f"<{list_type}>")
559
+
560
+ formatted_content = self.format_inline(item_content, self.footnotes)
561
+ if is_task:
562
+ chk = "checked" if checked else ""
563
+ self.html_content.append(f'<li style="list-style-type: none;"><input type="checkbox" {chk} disabled> {formatted_content}</li>')
564
+ else:
565
+ self.html_content.append(f'<li>{formatted_content}</li>')
566
+ continue
567
+
568
+ # Definition list
569
+ if line.startswith(":"):
570
+ if not self.is_def_list:
571
+ if self.list_stack or self.blockquote_lines or self.table_lines or self.container_state['is_active']:
572
+ self.flush_blocks()
573
+
574
+ term_text = " ".join(self.paragraph_lines)
575
+ self.paragraph_lines.clear()
576
+
577
+ self.html_content.append("<dl>")
578
+ self.html_content.append(f"<dt>{self.format_inline(term_text, self.footnotes)}</dt>")
579
+ self.is_def_list = True
580
+
581
+ definition_text = line[1:].strip()
582
+ self.html_content.append(f"<dd>{self.format_inline(definition_text, self.footnotes)}</dd>")
583
+ continue
584
+
585
+ # Fallback: Paragraph
586
+ if self.list_stack or self.blockquote_lines or self.table_lines or self.container_state['is_active']:
587
+ self.flush_blocks()
588
+
589
+ self.paragraph_lines.append(line.strip())
590
+
591
+ self.flush_blocks()
592
+
593
+ if self.footnotes:
594
+ self.html_content.append("<hr><h3>Footnotes</h3><ol>")
595
+ for fn_id, content in self.footnotes.items():
596
+ self.html_content.append(f'<li id="fn{content[1]}">{content[0]} <a href="#{content[1]}">^{fn_id}</a></li>')
597
+ self.html_content.append("</ol>")
598
+
599
+ full_html = html_initial_body + "\n".join(self.html_content) + "\n" + html_end_body
600
+
601
+ try:
602
+ soup = BeautifulSoup(full_html, "html.parser")
603
+ final_html = soup.prettify()
604
+ except Exception as e:
605
+ print(f"BeautifulSoup parsing failed: {e}. Writing raw HTML.")
606
+ final_html = full_html
607
+
608
+ for idx, block_html in enumerate(self.code_block_stores):
609
+ placeholder = f"HYPERMARKBLOCKCODEBLOCKPLACEHOLDER{idx}"
610
+ final_html = re.sub(rf"\s*{placeholder}\s*", f"\n{block_html}\n", final_html)
611
+
612
+ self.write(final_html)
613
+
614
+
615
+ class Text:
616
+ def __init__(self, path_md, path_text):
617
+ self.path_md = path_md
618
+ self.path_text = path_text
619
+ self.text_content = []
620
+
621
+ # State machine variables
622
+ self.is_code_block = False
623
+ self.paragraph_lines = []
624
+ self.blockquote_lines = []
625
+ self.table_lines = []
626
+
627
+ def read(self):
628
+ try:
629
+ with open(self.path_md, 'r', encoding="utf-8") as file:
630
+ return file.readlines()
631
+ except Exception as e:
632
+ print(f"Error reading file: {e}")
633
+ return None
634
+
635
+ def write(self, data):
636
+ try:
637
+ with open(self.path_text, 'w', encoding="utf-8") as file:
638
+ file.write(data)
639
+ return True
640
+ except Exception as e:
641
+ print(f"Error writing file: {e}")
642
+ return False
643
+
644
+ def flush_table(self, table_lines):
645
+ if not table_lines:
646
+ return ""
647
+
648
+ if len(table_lines) < 2:
649
+ return "\n".join([strip_inline_formatting(line) for line in table_lines])
650
+
651
+ sep_line = table_lines[1].strip()
652
+ cleaned_sep = sep_line.replace('|', '').replace(':', '').replace('-', '').strip()
653
+ is_valid_sep = len(cleaned_sep) == 0 and '-' in sep_line
654
+
655
+ if not is_valid_sep:
656
+ return "\n".join([strip_inline_formatting(line) for line in table_lines])
657
+
658
+ formatted_rows = []
659
+ header_line = table_lines[0].strip()
660
+ if header_line.startswith('|'):
661
+ header_line = header_line[1:]
662
+ if header_line.endswith('|'):
663
+ header_line = header_line[:-1]
664
+ headers = [strip_inline_formatting(h.strip()) for h in header_line.split('|')]
665
+
666
+ for line in table_lines[2:]:
667
+ line = line.strip()
668
+ if not line:
669
+ continue
670
+ if line.startswith('|'):
671
+ line = line[1:]
672
+ if line.endswith('|'):
673
+ line = line[:-1]
674
+ cols = [strip_inline_formatting(c.strip()) for c in line.split('|')]
675
+
676
+ row_parts = []
677
+ for idx, col in enumerate(cols):
678
+ header = headers[idx] if idx < len(headers) else f"Column {idx + 1}"
679
+ if header and col:
680
+ row_parts.append(f"{header}: {col}")
681
+ elif col:
682
+ row_parts.append(col)
683
+ if row_parts:
684
+ formatted_rows.append("Row: " + "; ".join(row_parts) + ".")
685
+
686
+ if formatted_rows:
687
+ return "Table data:\n" + "\n".join(formatted_rows)
688
+ return ""
689
+
690
+ def flush_blocks(self):
691
+ if self.paragraph_lines:
692
+ p_text = " ".join(self.paragraph_lines)
693
+ self.text_content.append(strip_inline_formatting(p_text))
694
+ self.paragraph_lines.clear()
695
+
696
+ if self.blockquote_lines:
697
+ for line in self.blockquote_lines:
698
+ self.text_content.append(f" {strip_inline_formatting(line)}")
699
+ self.blockquote_lines.clear()
700
+
701
+ if self.table_lines:
702
+ self.text_content.append(self.flush_table(self.table_lines))
703
+ self.table_lines.clear()
704
+
705
+ def convert(self):
706
+ lines = self.read()
707
+ if lines is None:
708
+ return
709
+
710
+ for idx, raw_line in enumerate(lines):
711
+ line = raw_line.replace('\r', '').replace('\n', '')
712
+
713
+ # Code Blocks
714
+ if line.strip().startswith("```"):
715
+ if not self.is_code_block:
716
+ self.flush_blocks()
717
+ self.is_code_block = True
718
+ self.text_content.append("\nCode block starts:\n")
719
+ else:
720
+ self.is_code_block = False
721
+ self.text_content.append("\nCode block ends.\n")
722
+ continue
723
+
724
+ if self.is_code_block:
725
+ self.text_content.append(line)
726
+ continue
727
+
728
+ # Blank Lines
729
+ if not line.strip():
730
+ self.flush_blocks()
731
+ self.text_content.append("")
732
+ continue
733
+
734
+ # Horizontal Rule
735
+ if line.strip() == "---":
736
+ self.flush_blocks()
737
+ self.text_content.append("")
738
+ continue
739
+
740
+ # Fenced Containers (Alert Boxes)
741
+ if line.strip().startswith(":::"):
742
+ self.flush_blocks()
743
+ parts = line.strip()[3:].strip().split(maxsplit=1)
744
+ if parts:
745
+ c_type = parts[0]
746
+ c_title = parts[1] if len(parts) > 1 else c_type.upper()
747
+ self.text_content.append(f"\n{c_title}:\n")
748
+ continue
749
+
750
+ # Heading
751
+ if line.startswith("#"):
752
+ self.flush_blocks()
753
+ level_match = re.match(r"^#+", line)
754
+ level = len(level_match.group())
755
+ content = line[level:].strip()
756
+ content = re.sub(r"\{\#([a-zA-Z0-9_-]+)\}\s*$", "", content).strip()
757
+ stripped_content = strip_inline_formatting(content)
758
+ self.text_content.append(f"\n{stripped_content}\n")
759
+ continue
760
+
761
+ # Blockquote
762
+ if line.startswith(">"):
763
+ if not self.blockquote_lines:
764
+ self.flush_blocks()
765
+ self.blockquote_lines.append(line[1:])
766
+ continue
767
+
768
+ # Table
769
+ if "|" in line:
770
+ if self.table_lines:
771
+ self.table_lines.append(line)
772
+ continue
773
+ else:
774
+ has_separator = False
775
+ if idx + 1 < len(lines):
776
+ next_line = lines[idx + 1].replace('\r', '').replace('\n', '').strip()
777
+ cleaned_sep = next_line.replace('|', '').replace(':', '').replace('-', '').strip()
778
+ has_separator = len(cleaned_sep) == 0 and '-' in next_line
779
+
780
+ if has_separator:
781
+ self.flush_blocks()
782
+ self.table_lines.append(line)
783
+ continue
784
+
785
+ # Definition List
786
+ if line.startswith(":"):
787
+ # Definition list uses the preceding paragraph as the term.
788
+ # Flush everything except paragraph lines
789
+ if self.blockquote_lines or self.table_lines:
790
+ self.flush_blocks()
791
+
792
+ term_text = " ".join(self.paragraph_lines)
793
+ self.paragraph_lines.clear()
794
+
795
+ if term_text:
796
+ self.text_content.append(strip_inline_formatting(term_text))
797
+
798
+ content = line[1:].strip()
799
+ self.text_content.append(f" {strip_inline_formatting(content)}")
800
+ continue
801
+
802
+ # List Item
803
+ list_match = re.match(r"^(\s*)(\d+\.\s+|[-\+\*]\s+)(.*)", line)
804
+ if list_match:
805
+ self.flush_blocks()
806
+ indent = list_match.group(1)
807
+ marker = list_match.group(2).strip()
808
+ content = list_match.group(3).strip()
809
+
810
+ # Check for task checkbox
811
+ task_match = re.match(r"^\[([ xX])\]\s+(.*)", content)
812
+ if task_match:
813
+ checked = task_match.group(1).lower() == 'x'
814
+ item_content = task_match.group(2).strip()
815
+ status = "Completed: " if checked else "Todo: "
816
+ stripped_content = status + strip_inline_formatting(item_content)
817
+ else:
818
+ stripped_content = strip_inline_formatting(content)
819
+
820
+ if marker.endswith('.'):
821
+ self.text_content.append(f"{indent}{marker} {stripped_content}")
822
+ else:
823
+ self.text_content.append(f"{indent}{stripped_content}")
824
+ continue
825
+
826
+ # Footnote definitions
827
+ fn_def_match = re.match(r"^\[\^([^\]]+)\]:\s*(.*)", line)
828
+ if fn_def_match:
829
+ self.flush_blocks()
830
+ fn_ref = fn_def_match.group(1)
831
+ fn_content = fn_def_match.group(2).strip()
832
+ self.text_content.append(f"Footnote {fn_ref}: {strip_inline_formatting(fn_content)}")
833
+ continue
834
+
835
+ # Fallback: Paragraph
836
+ if self.blockquote_lines or self.table_lines:
837
+ self.flush_blocks()
838
+ self.paragraph_lines.append(line.strip())
839
+
840
+ self.flush_blocks()
841
+
842
+ final_text = "\n".join(self.text_content)
843
+ self.write(final_text)
844
+
845
+
846
+ def html(path_md, path_html, style="default"):
847
+ parser = HTML(path_md, path_html, style)
848
+ parser.convert()
849
+
850
+
851
+ def text(path_md, path_text):
852
+ parser = Text(path_md, path_text)
853
+ parser.convert()
@@ -0,0 +1,29 @@
1
+ Metadata-Version: 2.4
2
+ Name: hypermark-py
3
+ Version: 1.0.0
4
+ Summary: A Python package for converting Markdown to HTML
5
+ Home-page: https://github.com/SwapCodesDev/hypermark
6
+ Author: SwapCodesDev
7
+ Author-email: swapcodes.dev@gmail.com
8
+ License: MIT
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=3.6
13
+ Description-Content-Type: text/markdown
14
+ Requires-Dist: beautifulsoup4
15
+ Requires-Dist: emoji
16
+ Dynamic: author
17
+ Dynamic: author-email
18
+ Dynamic: classifier
19
+ Dynamic: description
20
+ Dynamic: description-content-type
21
+ Dynamic: home-page
22
+ Dynamic: license
23
+ Dynamic: requires-dist
24
+ Dynamic: requires-python
25
+ Dynamic: summary
26
+
27
+ ``` bash
28
+ pip install git+https://github.com/SwapCodesDev/Markdown-to-HTML.git@master#subdirectory=python
29
+ ```
@@ -0,0 +1,5 @@
1
+ hypermark.py,sha256=RsYJKzE0BXo3ApZjDRzR_iHWoSU4DIxhKtOTP6Y_96g,33569
2
+ hypermark_py-1.0.0.dist-info/METADATA,sha256=TjPKOB-bBXfqhUooUXfpUcQuT7igRorMLBSRBX4J-Pg,823
3
+ hypermark_py-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
4
+ hypermark_py-1.0.0.dist-info/top_level.txt,sha256=FosEiKNXitNw6ZPz_JsmAmuhl5jJwwpO1Dvgcm0IeMA,10
5
+ hypermark_py-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1 @@
1
+ hypermark