telegram-md2html 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,410 @@
1
+ class MarkdownTokenizer {
2
+ constructor(text) {
3
+ this.text = text;
4
+ }
5
+ /**
6
+ * Tokenize the markdown text
7
+ */
8
+ tokenize() {
9
+ const tokens = [];
10
+ let pos = 0;
11
+ const text = this.text;
12
+ while (pos < text.length) {
13
+ // Skip if inside code block
14
+ if (this.isInsideCodeBlock(text, pos)) {
15
+ pos++;
16
+ continue;
17
+ }
18
+ // Try to match each token type (from outermost to innermost)
19
+ const token = this.matchToken(pos);
20
+ if (token) {
21
+ tokens.push(token);
22
+ pos = token.end;
23
+ }
24
+ else {
25
+ pos++;
26
+ }
27
+ }
28
+ return tokens.sort((a, b) => a.start - b.start);
29
+ }
30
+ matchToken(start) {
31
+ const text = this.text;
32
+ const remaining = text.slice(start);
33
+ // Skip if we're inside a quote marker
34
+ if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
35
+ return null;
36
+ }
37
+ // Match code block (triple backticks) - highest priority
38
+ const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
39
+ if (codeBlockMatch) {
40
+ return {
41
+ type: 'code_block',
42
+ content: codeBlockMatch[2],
43
+ language: codeBlockMatch[1],
44
+ start: start,
45
+ end: start + codeBlockMatch[0].length
46
+ };
47
+ }
48
+ // Match inline code
49
+ const inlineCodeMatch = remaining.match(/^`([^`\n]+)`/);
50
+ if (inlineCodeMatch && !this.isInsideInlineCode(text, start)) {
51
+ return {
52
+ type: 'inline_code',
53
+ content: inlineCodeMatch[1],
54
+ start: start,
55
+ end: start + inlineCodeMatch[0].length
56
+ };
57
+ }
58
+ // Match spoiler
59
+ const spoilerMatch = remaining.match(/^\|\|([^|\n]+?)\|\|/);
60
+ if (spoilerMatch) {
61
+ return {
62
+ type: 'spoiler',
63
+ content: spoilerMatch[1],
64
+ start: start,
65
+ end: start + spoilerMatch[0].length
66
+ };
67
+ }
68
+ // Match strikethrough
69
+ const strikethroughMatch = remaining.match(/^~~([^~\n]+?)~~/);
70
+ if (strikethroughMatch) {
71
+ return {
72
+ type: 'strikethrough',
73
+ content: strikethroughMatch[1],
74
+ start: start,
75
+ end: start + strikethroughMatch[0].length
76
+ };
77
+ }
78
+ // Match bold
79
+ const boldMatch = remaining.match(/^\*\*([^*\n]+?)\*\*/);
80
+ if (boldMatch) {
81
+ return {
82
+ type: 'bold',
83
+ content: boldMatch[1],
84
+ start: start,
85
+ end: start + boldMatch[0].length
86
+ };
87
+ }
88
+ // Match underline
89
+ const underlineMatch = remaining.match(/^__([^_\n]+?)__/);
90
+ if (underlineMatch) {
91
+ return {
92
+ type: 'underline',
93
+ content: underlineMatch[1],
94
+ start: start,
95
+ end: start + underlineMatch[0].length
96
+ };
97
+ }
98
+ // Match italic with asterisk
99
+ const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
100
+ if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
101
+ // Don't match if it's part of bold (**)
102
+ if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
103
+ return null;
104
+ }
105
+ return {
106
+ type: 'italic',
107
+ content: italicAsteriskMatch[1],
108
+ start: start,
109
+ end: start + italicAsteriskMatch[0].length
110
+ };
111
+ }
112
+ // Match italic with underscore
113
+ const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
114
+ if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
115
+ // Don't match if it's part of underline (__)
116
+ if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
117
+ return null;
118
+ }
119
+ return {
120
+ type: 'italic',
121
+ content: italicUnderscoreMatch[1],
122
+ start: start,
123
+ end: start + italicUnderscoreMatch[0].length
124
+ };
125
+ }
126
+ // Match link
127
+ const linkMatch = remaining.match(/^\[([^\]]+?)\]\(([^)]+?)\)/);
128
+ if (linkMatch) {
129
+ return {
130
+ type: 'link',
131
+ content: linkMatch[1],
132
+ start: start,
133
+ end: start + linkMatch[0].length,
134
+ language: linkMatch[2]
135
+ };
136
+ }
137
+ return null;
138
+ }
139
+ isInsideCodeBlock(text, position) {
140
+ // Check for code blocks
141
+ const codeBlockRegex = /```[\s\S]*?```/g;
142
+ let match;
143
+ while ((match = codeBlockRegex.exec(text)) !== null) {
144
+ if (position > match.index && position < match.index + match[0].length) {
145
+ // But allow matching the closing ``` itself
146
+ if (position >= match.index + match[0].length - 3) {
147
+ return false;
148
+ }
149
+ return true;
150
+ }
151
+ }
152
+ return false;
153
+ }
154
+ isInsideInlineCode(text, position) {
155
+ // Check for inline code
156
+ const inlineCodeRegex = /`[^`\n]*`/g;
157
+ let match;
158
+ while ((match = inlineCodeRegex.exec(text)) !== null) {
159
+ if (position > match.index && position < match.index + match[0].length) {
160
+ // But allow matching the closing ` itself
161
+ if (position === match.index + match[0].length - 1) {
162
+ return false;
163
+ }
164
+ return true;
165
+ }
166
+ }
167
+ return false;
168
+ }
169
+ }
170
+
171
+ /**
172
+ * Escapes HTML special characters (but not double-escape)
173
+ */
174
+ function escapeHtml(text) {
175
+ if (!text)
176
+ return text;
177
+ // Replace & first (but not if it's already an entity)
178
+ let result = text.replace(/&(?!#?\w+;)/g, '&amp;');
179
+ result = result.replace(/</g, '&lt;');
180
+ result = result.replace(/>/g, '&gt;');
181
+ result = result.replace(/"/g, '&quot;');
182
+ result = result.replace(/'/g, '&#39;');
183
+ return result;
184
+ }
185
+ /**
186
+ * Escapes Telegram HTML special characters
187
+ */
188
+ function escapeTelegramHtml(text) {
189
+ if (!text)
190
+ return text;
191
+ // For Telegram, we only need to escape &, <, >, and "
192
+ let result = text.replace(/&(?!#?\w+;)/g, '&amp;');
193
+ result = result.replace(/</g, '&lt;');
194
+ result = result.replace(/>/g, '&gt;');
195
+ result = result.replace(/"/g, '&quot;');
196
+ return result;
197
+ }
198
+ /**
199
+ * Appends missing code block delimiters
200
+ */
201
+ function autoCloseCodeBlocks(text) {
202
+ // Count triple backticks
203
+ const tripleBacktickCount = (text.match(/```/g) || []).length;
204
+ // If odd number, add closing backticks
205
+ if (tripleBacktickCount % 2 === 1) {
206
+ return text + '\n```';
207
+ }
208
+ return text;
209
+ }
210
+
211
+ class MarkdownConverter {
212
+ constructor(options = {}) {
213
+ this.hasCustomLinkProcessor = !!options.linkProcessor;
214
+ this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
215
+ this.options = {
216
+ escapeHtml: options.escapeHtml ?? true,
217
+ autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
218
+ linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
219
+ codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
220
+ };
221
+ }
222
+ /**
223
+ * Convert markdown text to Telegram HTML
224
+ */
225
+ convert(text) {
226
+ // Auto-close code blocks if enabled
227
+ let processedText = this.options.autoCloseCodeBlocks
228
+ ? autoCloseCodeBlocks(text)
229
+ : text;
230
+ // First pass: convert blockquotes (they should be at line starts)
231
+ processedText = this.preprocessBlockquotes(processedText);
232
+ // Convert the text recursively
233
+ let result = this.convertRecursive(processedText);
234
+ // Process blockquote markers
235
+ result = this.processBlockquoteMarkers(result);
236
+ // Only trim if there's actual content (not just whitespace)
237
+ if (result.trim() === '') {
238
+ return text; // Return original text (spaces) if result is empty
239
+ }
240
+ return result.trim();
241
+ }
242
+ /**
243
+ * Recursively convert markdown, handling nested styles
244
+ */
245
+ convertRecursive(text, depth = 0) {
246
+ if (depth > 10)
247
+ return text; // Prevent infinite recursion
248
+ // Tokenize the text
249
+ const tokenizer = new MarkdownTokenizer(text);
250
+ const tokens = tokenizer.tokenize();
251
+ // If no tokens found, return the text as-is (with HTML escaping)
252
+ if (tokens.length === 0) {
253
+ return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
254
+ }
255
+ let result = '';
256
+ let lastPos = 0;
257
+ for (const token of tokens) {
258
+ // Add text before token
259
+ if (token.start > lastPos) {
260
+ const textBefore = text.slice(lastPos, token.start);
261
+ result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
262
+ }
263
+ // Handle code blocks specially (no recursive parsing inside)
264
+ if (token.type === 'code_block') {
265
+ const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
266
+ result += this.wrapToken(token.type, codeContent, token.language);
267
+ lastPos = token.end;
268
+ continue;
269
+ }
270
+ // Handle inline code specially (no recursive parsing inside)
271
+ if (token.type === 'inline_code') {
272
+ const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
273
+ result += `<code>${codeContent}</code>`;
274
+ lastPos = token.end;
275
+ continue;
276
+ }
277
+ // Process other token content recursively
278
+ const tokenContent = this.convertRecursive(token.content, depth + 1);
279
+ // Wrap the content in appropriate HTML tags
280
+ result += this.wrapToken(token.type, tokenContent, token.language);
281
+ lastPos = token.end;
282
+ }
283
+ // Add remaining text
284
+ if (lastPos < text.length) {
285
+ const remainingText = text.slice(lastPos);
286
+ result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
287
+ }
288
+ return result;
289
+ }
290
+ /**
291
+ * Wrap token content in HTML tags
292
+ */
293
+ wrapToken(type, content, language) {
294
+ switch (type) {
295
+ case 'bold':
296
+ return `<b>${content}</b>`;
297
+ case 'italic':
298
+ return `<i>${content}</i>`;
299
+ case 'underline':
300
+ return `<u>${content}</u>`;
301
+ case 'strikethrough':
302
+ return `<s>${content}</s>`;
303
+ case 'spoiler':
304
+ return `<span class="tg-spoiler">${content}</span>`;
305
+ case 'inline_code':
306
+ // Already handled above
307
+ return `<code>${content}</code>`;
308
+ case 'code_block':
309
+ // Already handled above, but handle custom processor
310
+ if (this.hasCustomCodeBlockProcessor) {
311
+ return this.options.codeBlockProcessor(content, language);
312
+ }
313
+ const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
314
+ const langAttr = language ? ` class="language-${language}"` : '';
315
+ return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
316
+ case 'link':
317
+ const url = language || '';
318
+ if (this.hasCustomLinkProcessor) {
319
+ return this.options.linkProcessor(url, content);
320
+ }
321
+ const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
322
+ const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
323
+ return `<a href="${escapedUrl}">${escapedText}</a>`;
324
+ case 'quote':
325
+ return `\n<blockquote>${content.trim()}</blockquote>\n`;
326
+ case 'expandable_quote':
327
+ return `\n<blockquote expandable>${content.trim()}</blockquote>\n`;
328
+ default:
329
+ return content;
330
+ }
331
+ }
332
+ /**
333
+ * Preprocess blockquotes to mark them before other parsing
334
+ */
335
+ preprocessBlockquotes(text) {
336
+ const lines = text.split('\n');
337
+ const processedLines = [];
338
+ for (const line of lines) {
339
+ const trimmedLine = line.trim();
340
+ // Only treat lines starting with > at the beginning of line as blockquotes
341
+ if (trimmedLine.startsWith('**>')) {
342
+ // Expandable blockquote
343
+ const content = trimmedLine.substring(3).trim();
344
+ processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
345
+ }
346
+ else if (trimmedLine.startsWith('>')) {
347
+ // Regular blockquote
348
+ const content = trimmedLine.substring(1).trim();
349
+ processedLines.push(`[QUOTE]${content}`);
350
+ }
351
+ else {
352
+ processedLines.push(line);
353
+ }
354
+ }
355
+ return processedLines.join('\n');
356
+ }
357
+ /**
358
+ * Process blockquote markers
359
+ */
360
+ processBlockquoteMarkers(text) {
361
+ let result = text;
362
+ // Replace expandable quote markers (process content recursively)
363
+ const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
364
+ result = result.replace(expandableQuoteRegex, (match, content) => {
365
+ const processedContent = this.convertRecursive(content);
366
+ return `\n<blockquote expandable>${processedContent.trim()}</blockquote>\n`;
367
+ });
368
+ // Replace regular quote markers (process content recursively)
369
+ const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
370
+ result = result.replace(quoteRegex, (match, content) => {
371
+ const processedContent = this.convertRecursive(content);
372
+ return `\n<blockquote>${processedContent.trim()}</blockquote>\n`;
373
+ });
374
+ return result;
375
+ }
376
+ defaultLinkProcessor(url, text) {
377
+ const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
378
+ const escapedText = this.options.escapeHtml ? escapeHtml(text) : text;
379
+ return `<a href="${escapedUrl}">${escapedText}</a>`;
380
+ }
381
+ defaultCodeBlockProcessor(code, language) {
382
+ const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
383
+ const langAttr = language ? ` class="language-${language}"` : '';
384
+ return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
385
+ }
386
+ }
387
+
388
+ /**
389
+ * Convert Telegram-style Markdown to HTML
390
+ * @param text - Markdown text to convert
391
+ * @param options - Conversion options
392
+ * @returns Telegram-compatible HTML
393
+ */
394
+ function markdownToHtml(text, options) {
395
+ const converter = new MarkdownConverter(options);
396
+ return converter.convert(text);
397
+ }
398
+ /**
399
+ * Create a converter instance with custom options
400
+ */
401
+ function createConverter(options) {
402
+ return new MarkdownConverter(options);
403
+ }
404
+ var index = {
405
+ markdownToHtml,
406
+ createConverter,
407
+ MarkdownConverter
408
+ };
409
+
410
+ export { MarkdownConverter, createConverter, index as default, markdownToHtml };