telegram-md2html 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,423 @@
1
+ (function (global, factory) {
2
+ typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
3
+ typeof define === 'function' && define.amd ? define(['exports'], factory) :
4
+ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.TelegramMd2Html = {}));
5
+ })(this, (function (exports) { 'use strict';
6
+
7
+ class MarkdownTokenizer {
8
+ constructor(text) {
9
+ this.text = text;
10
+ }
11
+ /**
12
+ * Tokenize the markdown text
13
+ */
14
+ tokenize() {
15
+ const tokens = [];
16
+ let pos = 0;
17
+ const text = this.text;
18
+ while (pos < text.length) {
19
+ // Skip if inside code block
20
+ if (this.isInsideCodeBlock(text, pos)) {
21
+ pos++;
22
+ continue;
23
+ }
24
+ // Try to match each token type (from outermost to innermost)
25
+ const token = this.matchToken(pos);
26
+ if (token) {
27
+ tokens.push(token);
28
+ pos = token.end;
29
+ }
30
+ else {
31
+ pos++;
32
+ }
33
+ }
34
+ return tokens.sort((a, b) => a.start - b.start);
35
+ }
36
+ matchToken(start) {
37
+ const text = this.text;
38
+ const remaining = text.slice(start);
39
+ // Skip if we're inside a quote marker
40
+ if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
41
+ return null;
42
+ }
43
+ // Match code block (triple backticks) - highest priority
44
+ const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
45
+ if (codeBlockMatch) {
46
+ return {
47
+ type: 'code_block',
48
+ content: codeBlockMatch[2],
49
+ language: codeBlockMatch[1],
50
+ start: start,
51
+ end: start + codeBlockMatch[0].length
52
+ };
53
+ }
54
+ // Match inline code
55
+ const inlineCodeMatch = remaining.match(/^`([^`\n]+)`/);
56
+ if (inlineCodeMatch && !this.isInsideInlineCode(text, start)) {
57
+ return {
58
+ type: 'inline_code',
59
+ content: inlineCodeMatch[1],
60
+ start: start,
61
+ end: start + inlineCodeMatch[0].length
62
+ };
63
+ }
64
+ // Match spoiler
65
+ const spoilerMatch = remaining.match(/^\|\|([^|\n]+?)\|\|/);
66
+ if (spoilerMatch) {
67
+ return {
68
+ type: 'spoiler',
69
+ content: spoilerMatch[1],
70
+ start: start,
71
+ end: start + spoilerMatch[0].length
72
+ };
73
+ }
74
+ // Match strikethrough
75
+ const strikethroughMatch = remaining.match(/^~~([^~\n]+?)~~/);
76
+ if (strikethroughMatch) {
77
+ return {
78
+ type: 'strikethrough',
79
+ content: strikethroughMatch[1],
80
+ start: start,
81
+ end: start + strikethroughMatch[0].length
82
+ };
83
+ }
84
+ // Match bold
85
+ const boldMatch = remaining.match(/^\*\*([^*\n]+?)\*\*/);
86
+ if (boldMatch) {
87
+ return {
88
+ type: 'bold',
89
+ content: boldMatch[1],
90
+ start: start,
91
+ end: start + boldMatch[0].length
92
+ };
93
+ }
94
+ // Match underline
95
+ const underlineMatch = remaining.match(/^__([^_\n]+?)__/);
96
+ if (underlineMatch) {
97
+ return {
98
+ type: 'underline',
99
+ content: underlineMatch[1],
100
+ start: start,
101
+ end: start + underlineMatch[0].length
102
+ };
103
+ }
104
+ // Match italic with asterisk
105
+ const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
106
+ if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
107
+ // Don't match if it's part of bold (**)
108
+ if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
109
+ return null;
110
+ }
111
+ return {
112
+ type: 'italic',
113
+ content: italicAsteriskMatch[1],
114
+ start: start,
115
+ end: start + italicAsteriskMatch[0].length
116
+ };
117
+ }
118
+ // Match italic with underscore
119
+ const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
120
+ if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
121
+ // Don't match if it's part of underline (__)
122
+ if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
123
+ return null;
124
+ }
125
+ return {
126
+ type: 'italic',
127
+ content: italicUnderscoreMatch[1],
128
+ start: start,
129
+ end: start + italicUnderscoreMatch[0].length
130
+ };
131
+ }
132
+ // Match link
133
+ const linkMatch = remaining.match(/^\[([^\]]+?)\]\(([^)]+?)\)/);
134
+ if (linkMatch) {
135
+ return {
136
+ type: 'link',
137
+ content: linkMatch[1],
138
+ start: start,
139
+ end: start + linkMatch[0].length,
140
+ language: linkMatch[2]
141
+ };
142
+ }
143
+ return null;
144
+ }
145
+ isInsideCodeBlock(text, position) {
146
+ // Check for code blocks
147
+ const codeBlockRegex = /```[\s\S]*?```/g;
148
+ let match;
149
+ while ((match = codeBlockRegex.exec(text)) !== null) {
150
+ if (position > match.index && position < match.index + match[0].length) {
151
+ // But allow matching the closing ``` itself
152
+ if (position >= match.index + match[0].length - 3) {
153
+ return false;
154
+ }
155
+ return true;
156
+ }
157
+ }
158
+ return false;
159
+ }
160
+ isInsideInlineCode(text, position) {
161
+ // Check for inline code
162
+ const inlineCodeRegex = /`[^`\n]*`/g;
163
+ let match;
164
+ while ((match = inlineCodeRegex.exec(text)) !== null) {
165
+ if (position > match.index && position < match.index + match[0].length) {
166
+ // But allow matching the closing ` itself
167
+ if (position === match.index + match[0].length - 1) {
168
+ return false;
169
+ }
170
+ return true;
171
+ }
172
+ }
173
+ return false;
174
+ }
175
+ }
176
+
177
+ /**
178
+ * Escapes HTML special characters (but not double-escape)
179
+ */
180
+ function escapeHtml(text) {
181
+ if (!text)
182
+ return text;
183
+ // Replace & first (but not if it's already an entity)
184
+ let result = text.replace(/&(?!#?\w+;)/g, '&amp;');
185
+ result = result.replace(/</g, '&lt;');
186
+ result = result.replace(/>/g, '&gt;');
187
+ result = result.replace(/"/g, '&quot;');
188
+ result = result.replace(/'/g, '&#39;');
189
+ return result;
190
+ }
191
+ /**
192
+ * Escapes Telegram HTML special characters
193
+ */
194
+ function escapeTelegramHtml(text) {
195
+ if (!text)
196
+ return text;
197
+ // For Telegram, we only need to escape &, <, >, and "
198
+ let result = text.replace(/&(?!#?\w+;)/g, '&amp;');
199
+ result = result.replace(/</g, '&lt;');
200
+ result = result.replace(/>/g, '&gt;');
201
+ result = result.replace(/"/g, '&quot;');
202
+ return result;
203
+ }
204
+ /**
205
+ * Appends missing code block delimiters
206
+ */
207
+ function autoCloseCodeBlocks(text) {
208
+ // Count triple backticks
209
+ const tripleBacktickCount = (text.match(/```/g) || []).length;
210
+ // If odd number, add closing backticks
211
+ if (tripleBacktickCount % 2 === 1) {
212
+ return text + '\n```';
213
+ }
214
+ return text;
215
+ }
216
+
217
+ class MarkdownConverter {
218
+ constructor(options = {}) {
219
+ this.hasCustomLinkProcessor = !!options.linkProcessor;
220
+ this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
221
+ this.options = {
222
+ escapeHtml: options.escapeHtml ?? true,
223
+ autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
224
+ linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
225
+ codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
226
+ };
227
+ }
228
+ /**
229
+ * Convert markdown text to Telegram HTML
230
+ */
231
+ convert(text) {
232
+ // Auto-close code blocks if enabled
233
+ let processedText = this.options.autoCloseCodeBlocks
234
+ ? autoCloseCodeBlocks(text)
235
+ : text;
236
+ // First pass: convert blockquotes (they should be at line starts)
237
+ processedText = this.preprocessBlockquotes(processedText);
238
+ // Convert the text recursively
239
+ let result = this.convertRecursive(processedText);
240
+ // Process blockquote markers
241
+ result = this.processBlockquoteMarkers(result);
242
+ // Only trim if there's actual content (not just whitespace)
243
+ if (result.trim() === '') {
244
+ return text; // Return original text (spaces) if result is empty
245
+ }
246
+ return result.trim();
247
+ }
248
+ /**
249
+ * Recursively convert markdown, handling nested styles
250
+ */
251
+ convertRecursive(text, depth = 0) {
252
+ if (depth > 10)
253
+ return text; // Prevent infinite recursion
254
+ // Tokenize the text
255
+ const tokenizer = new MarkdownTokenizer(text);
256
+ const tokens = tokenizer.tokenize();
257
+ // If no tokens found, return the text as-is (with HTML escaping)
258
+ if (tokens.length === 0) {
259
+ return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
260
+ }
261
+ let result = '';
262
+ let lastPos = 0;
263
+ for (const token of tokens) {
264
+ // Add text before token
265
+ if (token.start > lastPos) {
266
+ const textBefore = text.slice(lastPos, token.start);
267
+ result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
268
+ }
269
+ // Handle code blocks specially (no recursive parsing inside)
270
+ if (token.type === 'code_block') {
271
+ const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
272
+ result += this.wrapToken(token.type, codeContent, token.language);
273
+ lastPos = token.end;
274
+ continue;
275
+ }
276
+ // Handle inline code specially (no recursive parsing inside)
277
+ if (token.type === 'inline_code') {
278
+ const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
279
+ result += `<code>${codeContent}</code>`;
280
+ lastPos = token.end;
281
+ continue;
282
+ }
283
+ // Process other token content recursively
284
+ const tokenContent = this.convertRecursive(token.content, depth + 1);
285
+ // Wrap the content in appropriate HTML tags
286
+ result += this.wrapToken(token.type, tokenContent, token.language);
287
+ lastPos = token.end;
288
+ }
289
+ // Add remaining text
290
+ if (lastPos < text.length) {
291
+ const remainingText = text.slice(lastPos);
292
+ result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
293
+ }
294
+ return result;
295
+ }
296
+ /**
297
+ * Wrap token content in HTML tags
298
+ */
299
+ wrapToken(type, content, language) {
300
+ switch (type) {
301
+ case 'bold':
302
+ return `<b>${content}</b>`;
303
+ case 'italic':
304
+ return `<i>${content}</i>`;
305
+ case 'underline':
306
+ return `<u>${content}</u>`;
307
+ case 'strikethrough':
308
+ return `<s>${content}</s>`;
309
+ case 'spoiler':
310
+ return `<span class="tg-spoiler">${content}</span>`;
311
+ case 'inline_code':
312
+ // Already handled above
313
+ return `<code>${content}</code>`;
314
+ case 'code_block':
315
+ // Already handled above, but handle custom processor
316
+ if (this.hasCustomCodeBlockProcessor) {
317
+ return this.options.codeBlockProcessor(content, language);
318
+ }
319
+ const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
320
+ const langAttr = language ? ` class="language-${language}"` : '';
321
+ return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
322
+ case 'link':
323
+ const url = language || '';
324
+ if (this.hasCustomLinkProcessor) {
325
+ return this.options.linkProcessor(url, content);
326
+ }
327
+ const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
328
+ const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
329
+ return `<a href="${escapedUrl}">${escapedText}</a>`;
330
+ case 'quote':
331
+ return `\n<blockquote>${content.trim()}</blockquote>\n`;
332
+ case 'expandable_quote':
333
+ return `\n<blockquote expandable>${content.trim()}</blockquote>\n`;
334
+ default:
335
+ return content;
336
+ }
337
+ }
338
+ /**
339
+ * Preprocess blockquotes to mark them before other parsing
340
+ */
341
+ preprocessBlockquotes(text) {
342
+ const lines = text.split('\n');
343
+ const processedLines = [];
344
+ for (const line of lines) {
345
+ const trimmedLine = line.trim();
346
+ // Only treat lines starting with > at the beginning of line as blockquotes
347
+ if (trimmedLine.startsWith('**>')) {
348
+ // Expandable blockquote
349
+ const content = trimmedLine.substring(3).trim();
350
+ processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
351
+ }
352
+ else if (trimmedLine.startsWith('>')) {
353
+ // Regular blockquote
354
+ const content = trimmedLine.substring(1).trim();
355
+ processedLines.push(`[QUOTE]${content}`);
356
+ }
357
+ else {
358
+ processedLines.push(line);
359
+ }
360
+ }
361
+ return processedLines.join('\n');
362
+ }
363
+ /**
364
+ * Process blockquote markers
365
+ */
366
+ processBlockquoteMarkers(text) {
367
+ let result = text;
368
+ // Replace expandable quote markers (process content recursively)
369
+ const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
370
+ result = result.replace(expandableQuoteRegex, (match, content) => {
371
+ const processedContent = this.convertRecursive(content);
372
+ return `\n<blockquote expandable>${processedContent.trim()}</blockquote>\n`;
373
+ });
374
+ // Replace regular quote markers (process content recursively)
375
+ const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
376
+ result = result.replace(quoteRegex, (match, content) => {
377
+ const processedContent = this.convertRecursive(content);
378
+ return `\n<blockquote>${processedContent.trim()}</blockquote>\n`;
379
+ });
380
+ return result;
381
+ }
382
+ defaultLinkProcessor(url, text) {
383
+ const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
384
+ const escapedText = this.options.escapeHtml ? escapeHtml(text) : text;
385
+ return `<a href="${escapedUrl}">${escapedText}</a>`;
386
+ }
387
+ defaultCodeBlockProcessor(code, language) {
388
+ const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
389
+ const langAttr = language ? ` class="language-${language}"` : '';
390
+ return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
391
+ }
392
+ }
393
+
394
+ /**
395
+ * Convert Telegram-style Markdown to HTML
396
+ * @param text - Markdown text to convert
397
+ * @param options - Conversion options
398
+ * @returns Telegram-compatible HTML
399
+ */
400
+ function markdownToHtml(text, options) {
401
+ const converter = new MarkdownConverter(options);
402
+ return converter.convert(text);
403
+ }
404
+ /**
405
+ * Create a converter instance with custom options
406
+ */
407
+ function createConverter(options) {
408
+ return new MarkdownConverter(options);
409
+ }
410
+ var index = {
411
+ markdownToHtml,
412
+ createConverter,
413
+ MarkdownConverter
414
+ };
415
+
416
+ exports.MarkdownConverter = MarkdownConverter;
417
+ exports.createConverter = createConverter;
418
+ exports.default = index;
419
+ exports.markdownToHtml = markdownToHtml;
420
+
421
+ Object.defineProperty(exports, '__esModule', { value: true });
422
+
423
+ }));
package/package.json CHANGED
@@ -1,12 +1,10 @@
1
1
  {
2
2
  "name": "telegram-md2html",
3
- "version": "1.0.0",
3
+ "version": "1.0.1",
4
4
  "description": "A smart converter for Telegram-style Markdown to Telegram-compatible HTML",
5
-
6
5
  "main": "./dist/index.cjs",
7
6
  "module": "./dist/index.mjs",
8
7
  "types": "./dist/index.d.ts",
9
-
10
8
  "exports": {
11
9
  ".": {
12
10
  "types": "./dist/index.d.ts",
@@ -14,16 +12,14 @@
14
12
  "require": "./dist/index.cjs"
15
13
  }
16
14
  },
17
-
18
15
  "scripts": {
19
- "build": "rollup -c && npm run build:types",
20
- "build:types": "cp dist/index.d.ts dist/index.d.mts",
21
- "dev": "rollup -c -w",
22
- "test": "node --no-warnings --experimental-vm-modules node_modules/jest/bin/jest.js",
23
- "test:ci": "npm test -- --coverage",
24
- "prepublishOnly": "npm run build && npm run test"
25
- },
26
-
16
+ "build": "rollup -c && npm run build:types",
17
+ "build:types": "cp dist/index.d.ts dist/index.d.mts",
18
+ "dev": "rollup -c -w",
19
+ "test": "node --no-warnings --experimental-vm-modules node_modules/jest/bin/jest.js",
20
+ "test:ci": "npm test -- --coverage",
21
+ "prepublishOnly": "npm run build && npm run test"
22
+ },
27
23
  "keywords": [
28
24
  "telegram",
29
25
  "markdown",
@@ -32,10 +28,8 @@
32
28
  "bot",
33
29
  "messaging"
34
30
  ],
35
-
36
31
  "author": "Soumyadeep Das <soumyadeepdas765@gmail.com>",
37
32
  "license": "MIT",
38
-
39
33
  "devDependencies": {
40
34
  "@rollup/plugin-commonjs": "^25.0.7",
41
35
  "@rollup/plugin-node-resolve": "^15.2.3",
@@ -48,14 +42,15 @@
48
42
  "tslib": "^2.6.2",
49
43
  "typescript": "^5.3.3"
50
44
  },
51
-
52
45
  "files": [
53
46
  "dist",
54
47
  "README.md",
55
48
  "LICENSE"
56
49
  ],
57
-
58
50
  "engines": {
59
51
  "node": ">=14.0.0"
52
+ },
53
+ "dependencies": {
54
+ "telegram-md2html": "^1.0.0"
60
55
  }
61
- }
56
+ }