telegram-md2html 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,432 @@
1
+ (function (global, factory) {
2
+ typeof exports === 'object' && typeof module !== 'undefined' ? factory(exports) :
3
+ typeof define === 'function' && define.amd ? define(['exports'], factory) :
4
+ (global = typeof globalThis !== 'undefined' ? globalThis : global || self, factory(global.TelegramMd2Html = {}));
5
+ })(this, (function (exports) { 'use strict';
6
+
7
+ class MarkdownTokenizer {
8
+ constructor(text) {
9
+ this.text = text;
10
+ }
11
+ /**
12
+ * Tokenize the markdown text
13
+ */
14
+ tokenize() {
15
+ const tokens = [];
16
+ let pos = 0;
17
+ const text = this.text;
18
+ while (pos < text.length) {
19
+ // Skip if inside code block
20
+ if (this.isInsideCodeBlock(text, pos)) {
21
+ pos++;
22
+ continue;
23
+ }
24
+ // Try to match each token type (from outermost to innermost)
25
+ const token = this.matchToken(pos);
26
+ if (token) {
27
+ tokens.push(token);
28
+ pos = token.end;
29
+ }
30
+ else {
31
+ pos++;
32
+ }
33
+ }
34
+ return tokens.sort((a, b) => a.start - b.start);
35
+ }
36
+ matchToken(start) {
37
+ const text = this.text;
38
+ const remaining = text.slice(start);
39
+ // Skip if we're inside a quote marker
40
+ if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
41
+ return null;
42
+ }
43
+ // Match code block (triple backticks) - highest priority
44
+ const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
45
+ if (codeBlockMatch) {
46
+ return {
47
+ type: 'code_block',
48
+ content: codeBlockMatch[2],
49
+ language: codeBlockMatch[1],
50
+ start: start,
51
+ end: start + codeBlockMatch[0].length
52
+ };
53
+ }
54
+ // Match inline code
55
+ const inlineCodeMatch = remaining.match(/^`([^`\n]+)`/);
56
+ if (inlineCodeMatch && !this.isInsideInlineCode(text, start)) {
57
+ return {
58
+ type: 'inline_code',
59
+ content: inlineCodeMatch[1],
60
+ start: start,
61
+ end: start + inlineCodeMatch[0].length
62
+ };
63
+ }
64
+ // Match spoiler
65
+ const spoilerMatch = remaining.match(/^\|\|([^|\n]+?)\|\|/);
66
+ if (spoilerMatch) {
67
+ return {
68
+ type: 'spoiler',
69
+ content: spoilerMatch[1],
70
+ start: start,
71
+ end: start + spoilerMatch[0].length
72
+ };
73
+ }
74
+ // Match strikethrough
75
+ const strikethroughMatch = remaining.match(/^~~([^~\n]+?)~~/);
76
+ if (strikethroughMatch) {
77
+ return {
78
+ type: 'strikethrough',
79
+ content: strikethroughMatch[1],
80
+ start: start,
81
+ end: start + strikethroughMatch[0].length
82
+ };
83
+ }
84
+ // Match bold
85
+ const boldMatch = remaining.match(/^\*\*([^*\n]+?)\*\*/);
86
+ if (boldMatch) {
87
+ return {
88
+ type: 'bold',
89
+ content: boldMatch[1],
90
+ start: start,
91
+ end: start + boldMatch[0].length
92
+ };
93
+ }
94
+ // Match underline
95
+ const underlineMatch = remaining.match(/^__([^_\n]+?)__/);
96
+ if (underlineMatch) {
97
+ return {
98
+ type: 'underline',
99
+ content: underlineMatch[1],
100
+ start: start,
101
+ end: start + underlineMatch[0].length
102
+ };
103
+ }
104
+ // Match italic with asterisk
105
+ const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
106
+ if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
107
+ // Don't match if it's part of bold (**)
108
+ if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
109
+ return null;
110
+ }
111
+ return {
112
+ type: 'italic',
113
+ content: italicAsteriskMatch[1],
114
+ start: start,
115
+ end: start + italicAsteriskMatch[0].length
116
+ };
117
+ }
118
+ // Match italic with underscore
119
+ const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
120
+ if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
121
+ // Don't match if it's part of underline (__)
122
+ if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
123
+ return null;
124
+ }
125
+ return {
126
+ type: 'italic',
127
+ content: italicUnderscoreMatch[1],
128
+ start: start,
129
+ end: start + italicUnderscoreMatch[0].length
130
+ };
131
+ }
132
+ // Match link
133
+ const linkMatch = remaining.match(/^\[([^\]]+?)\]\(([^)]+?)\)/);
134
+ if (linkMatch) {
135
+ return {
136
+ type: 'link',
137
+ content: linkMatch[1],
138
+ start: start,
139
+ end: start + linkMatch[0].length,
140
+ language: linkMatch[2]
141
+ };
142
+ }
143
+ return null;
144
+ }
145
+ isInsideCodeBlock(text, position) {
146
+ // Check for code blocks
147
+ const codeBlockRegex = /```[\s\S]*?```/g;
148
+ let match;
149
+ while ((match = codeBlockRegex.exec(text)) !== null) {
150
+ if (position > match.index && position < match.index + match[0].length) {
151
+ // But allow matching the closing ``` itself
152
+ if (position >= match.index + match[0].length - 3) {
153
+ return false;
154
+ }
155
+ return true;
156
+ }
157
+ }
158
+ return false;
159
+ }
160
+ isInsideInlineCode(text, position) {
161
+ // Check for inline code
162
+ const inlineCodeRegex = /`[^`\n]*`/g;
163
+ let match;
164
+ while ((match = inlineCodeRegex.exec(text)) !== null) {
165
+ if (position > match.index && position < match.index + match[0].length) {
166
+ // But allow matching the closing ` itself
167
+ if (position === match.index + match[0].length - 1) {
168
+ return false;
169
+ }
170
+ return true;
171
+ }
172
+ }
173
+ return false;
174
+ }
175
+ }
176
+
177
+ /**
178
+ * Escapes HTML special characters (but not double-escape)
179
+ */
180
+ function escapeHtml(text) {
181
+ if (!text)
182
+ return text;
183
+ // Replace & first (but not if it's already an entity)
184
+ let result = text.replace(/&(?!#?\w+;)/g, '&amp;');
185
+ result = result.replace(/</g, '&lt;');
186
+ result = result.replace(/>/g, '&gt;');
187
+ result = result.replace(/"/g, '&quot;');
188
+ result = result.replace(/'/g, '&#39;');
189
+ return result;
190
+ }
191
+ /**
192
+ * Escapes Telegram HTML special characters
193
+ */
194
+ function escapeTelegramHtml(text) {
195
+ if (!text)
196
+ return text;
197
+ // For Telegram, we only need to escape &, <, >, and "
198
+ let result = text.replace(/&(?!#?\w+;)/g, '&amp;');
199
+ result = result.replace(/</g, '&lt;');
200
+ result = result.replace(/>/g, '&gt;');
201
+ result = result.replace(/"/g, '&quot;');
202
+ return result;
203
+ }
204
+ /**
205
+ * Appends missing code block delimiters
206
+ */
207
+ function autoCloseCodeBlocks(text) {
208
+ // Count triple backticks
209
+ const tripleBacktickCount = (text.match(/```/g) || []).length;
210
+ // If odd number, add closing backticks
211
+ if (tripleBacktickCount % 2 === 1) {
212
+ return text + '\n```';
213
+ }
214
+ return text;
215
+ }
216
+
217
+ class MarkdownConverter {
218
+ constructor(options = {}) {
219
+ this.hasCustomLinkProcessor = !!options.linkProcessor;
220
+ this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
221
+ this.options = {
222
+ escapeHtml: options.escapeHtml ?? true,
223
+ autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
224
+ linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
225
+ codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
226
+ };
227
+ }
228
+ /**
229
+ * Convert markdown text to Telegram HTML
230
+ */
231
+ convert(text) {
232
+ // Auto-close code blocks if enabled
233
+ let processedText = this.options.autoCloseCodeBlocks
234
+ ? autoCloseCodeBlocks(text)
235
+ : text;
236
+ // First pass: convert blockquotes (they should be at line starts)
237
+ processedText = this.preprocessBlockquotes(processedText);
238
+ // Convert the text recursively
239
+ let result = this.convertRecursive(processedText);
240
+ // Process blockquote markers
241
+ result = this.processBlockquoteMarkers(result);
242
+ // Only trim if there's actual content (not just whitespace)
243
+ if (result.trim() === '') {
244
+ return text; // Return original text (spaces) if result is empty
245
+ }
246
+ return result.trim();
247
+ }
248
+ /**
249
+ * Recursively convert markdown, handling nested styles
250
+ */
251
+ convertRecursive(text, depth = 0) {
252
+ if (depth > 10)
253
+ return text; // Prevent infinite recursion
254
+ // Tokenize the text
255
+ const tokenizer = new MarkdownTokenizer(text);
256
+ const tokens = tokenizer.tokenize();
257
+ // If no tokens found, return the text as-is (with HTML escaping)
258
+ if (tokens.length === 0) {
259
+ return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
260
+ }
261
+ let result = '';
262
+ let lastPos = 0;
263
+ for (const token of tokens) {
264
+ // Add text before token
265
+ if (token.start > lastPos) {
266
+ const textBefore = text.slice(lastPos, token.start);
267
+ result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
268
+ }
269
+ // Handle code blocks specially (no recursive parsing inside)
270
+ if (token.type === 'code_block') {
271
+ const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
272
+ result += this.wrapToken(token.type, codeContent, token.language);
273
+ lastPos = token.end;
274
+ continue;
275
+ }
276
+ // Handle inline code specially (no recursive parsing inside)
277
+ if (token.type === 'inline_code') {
278
+ const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
279
+ result += `<code>${codeContent}</code>`;
280
+ lastPos = token.end;
281
+ continue;
282
+ }
283
+ // Process other token content recursively
284
+ const tokenContent = this.convertRecursive(token.content, depth + 1);
285
+ // Wrap the content in appropriate HTML tags
286
+ result += this.wrapToken(token.type, tokenContent, token.language);
287
+ lastPos = token.end;
288
+ }
289
+ // Add remaining text
290
+ if (lastPos < text.length) {
291
+ const remainingText = text.slice(lastPos);
292
+ result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
293
+ }
294
+ return result;
295
+ }
296
+ /**
297
+ * Wrap token content in HTML tags
298
+ * FIXED: Removed extra newlines that were being added around code blocks and quotes
299
+ * Previously added \n before and after, now returns clean tags without extra whitespace
300
+ */
301
+ wrapToken(type, content, language) {
302
+ switch (type) {
303
+ case 'bold':
304
+ return `<b>${content}</b>`;
305
+ case 'italic':
306
+ return `<i>${content}</i>`;
307
+ case 'underline':
308
+ return `<u>${content}</u>`;
309
+ case 'strikethrough':
310
+ return `<s>${content}</s>`;
311
+ case 'spoiler':
312
+ return `<span class="tg-spoiler">${content}</span>`;
313
+ case 'inline_code':
314
+ // Already handled above
315
+ return `<code>${content}</code>`;
316
+ case 'code_block':
317
+ // Already handled above, but handle custom processor
318
+ if (this.hasCustomCodeBlockProcessor) {
319
+ return this.options.codeBlockProcessor(content, language);
320
+ }
321
+ const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
322
+ const langAttr = language ? ` class="language-${language}"` : '';
323
+ // FIXED: Removed \n before and after - now returns just the tag
324
+ return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
325
+ case 'link':
326
+ const url = language || '';
327
+ if (this.hasCustomLinkProcessor) {
328
+ return this.options.linkProcessor(url, content);
329
+ }
330
+ const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
331
+ const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
332
+ return `<a href="${escapedUrl}">${escapedText}</a>`;
333
+ case 'quote':
334
+ // FIXED: Removed \n before and after - now returns just the tag
335
+ return `<blockquote>${content.trim()}</blockquote>`;
336
+ case 'expandable_quote':
337
+ // FIXED: Removed \n before and after - now returns just the tag
338
+ return `<blockquote expandable>${content.trim()}</blockquote>`;
339
+ default:
340
+ return content;
341
+ }
342
+ }
343
+ /**
344
+ * Preprocess blockquotes to mark them before other parsing
345
+ */
346
+ preprocessBlockquotes(text) {
347
+ const lines = text.split('\n');
348
+ const processedLines = [];
349
+ for (const line of lines) {
350
+ const trimmedLine = line.trim();
351
+ // Only treat lines starting with > at the beginning of line as blockquotes
352
+ if (trimmedLine.startsWith('**>')) {
353
+ // Expandable blockquote
354
+ const content = trimmedLine.substring(3).trim();
355
+ processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
356
+ }
357
+ else if (trimmedLine.startsWith('>')) {
358
+ // Regular blockquote
359
+ const content = trimmedLine.substring(1).trim();
360
+ processedLines.push(`[QUOTE]${content}`);
361
+ }
362
+ else {
363
+ processedLines.push(line);
364
+ }
365
+ }
366
+ return processedLines.join('\n');
367
+ }
368
+ /**
369
+ * Process blockquote markers
370
+ * FIXED: Removed extra newlines from the replacement strings
371
+ */
372
+ processBlockquoteMarkers(text) {
373
+ let result = text;
374
+ // Replace expandable quote markers (process content recursively)
375
+ const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
376
+ result = result.replace(expandableQuoteRegex, (match, content) => {
377
+ const processedContent = this.convertRecursive(content);
378
+ // FIXED: Removed \n before and after
379
+ return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
380
+ });
381
+ // Replace regular quote markers (process content recursively)
382
+ const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
383
+ result = result.replace(quoteRegex, (match, content) => {
384
+ const processedContent = this.convertRecursive(content);
385
+ // FIXED: Removed \n before and after
386
+ return `<blockquote>${processedContent.trim()}</blockquote>`;
387
+ });
388
+ return result;
389
+ }
390
+ defaultLinkProcessor(url, text) {
391
+ const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
392
+ const escapedText = this.options.escapeHtml ? escapeHtml(text) : text;
393
+ return `<a href="${escapedUrl}">${escapedText}</a>`;
394
+ }
395
+ defaultCodeBlockProcessor(code, language) {
396
+ const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
397
+ const langAttr = language ? ` class="language-${language}"` : '';
398
+ // FIXED: Removed \n before and after in default processor too
399
+ return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
400
+ }
401
+ }
402
+
403
+ /**
404
+ * Convert Telegram-style Markdown to HTML
405
+ * @param text - Markdown text to convert
406
+ * @param options - Conversion options
407
+ * @returns Telegram-compatible HTML
408
+ */
409
+ function markdownToHtml(text, options) {
410
+ const converter = new MarkdownConverter(options);
411
+ return converter.convert(text);
412
+ }
413
+ /**
414
+ * Create a converter instance with custom options
415
+ */
416
+ function createConverter(options) {
417
+ return new MarkdownConverter(options);
418
+ }
419
+ var index = {
420
+ markdownToHtml,
421
+ createConverter,
422
+ MarkdownConverter
423
+ };
424
+
425
+ exports.MarkdownConverter = MarkdownConverter;
426
+ exports.createConverter = createConverter;
427
+ exports.default = index;
428
+ exports.markdownToHtml = markdownToHtml;
429
+
430
+ Object.defineProperty(exports, '__esModule', { value: true });
431
+
432
+ }));
package/package.json CHANGED
@@ -1,12 +1,10 @@
1
1
  {
2
2
  "name": "telegram-md2html",
3
- "version": "1.0.0",
3
+ "version": "1.0.2",
4
4
  "description": "A smart converter for Telegram-style Markdown to Telegram-compatible HTML",
5
-
6
5
  "main": "./dist/index.cjs",
7
6
  "module": "./dist/index.mjs",
8
7
  "types": "./dist/index.d.ts",
9
-
10
8
  "exports": {
11
9
  ".": {
12
10
  "types": "./dist/index.d.ts",
@@ -14,16 +12,14 @@
14
12
  "require": "./dist/index.cjs"
15
13
  }
16
14
  },
17
-
18
15
  "scripts": {
19
- "build": "rollup -c && npm run build:types",
20
- "build:types": "cp dist/index.d.ts dist/index.d.mts",
21
- "dev": "rollup -c -w",
22
- "test": "node --no-warnings --experimental-vm-modules node_modules/jest/bin/jest.js",
23
- "test:ci": "npm test -- --coverage",
24
- "prepublishOnly": "npm run build && npm run test"
25
- },
26
-
16
+ "build": "rollup -c && npm run build:types",
17
+ "build:types": "cp dist/index.d.ts dist/index.d.mts",
18
+ "dev": "rollup -c -w",
19
+ "test": "node --no-warnings --experimental-vm-modules node_modules/jest/bin/jest.js",
20
+ "test:ci": "npm test -- --coverage",
21
+ "prepublishOnly": "npm run build && npm run test"
22
+ },
27
23
  "keywords": [
28
24
  "telegram",
29
25
  "markdown",
@@ -32,10 +28,8 @@
32
28
  "bot",
33
29
  "messaging"
34
30
  ],
35
-
36
31
  "author": "Soumyadeep Das <soumyadeepdas765@gmail.com>",
37
32
  "license": "MIT",
38
-
39
33
  "devDependencies": {
40
34
  "@rollup/plugin-commonjs": "^25.0.7",
41
35
  "@rollup/plugin-node-resolve": "^15.2.3",
@@ -48,14 +42,15 @@
48
42
  "tslib": "^2.6.2",
49
43
  "typescript": "^5.3.3"
50
44
  },
51
-
52
45
  "files": [
53
46
  "dist",
54
47
  "README.md",
55
48
  "LICENSE"
56
49
  ],
57
-
58
50
  "engines": {
59
51
  "node": ">=14.0.0"
52
+ },
53
+ "dependencies": {
54
+ "telegram-md2html": "^1.0.0"
60
55
  }
61
- }
56
+ }