telegram-md2html 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +383 -319
- package/dist/converter.d.ts +1 -15
- package/dist/index.cjs +61 -58
- package/dist/index.esm.js +61 -58
- package/dist/index.mjs +61 -58
- package/dist/index.umd.js +61 -58
- package/dist/tokenizer.d.ts +0 -3
- package/dist/types.d.ts +9 -0
- package/package.json +1 -1
package/dist/converter.d.ts
CHANGED
|
@@ -3,26 +3,12 @@ export declare class MarkdownConverter {
|
|
|
3
3
|
private options;
|
|
4
4
|
private hasCustomLinkProcessor;
|
|
5
5
|
private hasCustomCodeBlockProcessor;
|
|
6
|
+
private headingSymbol;
|
|
6
7
|
constructor(options?: ConvertOptions);
|
|
7
|
-
/**
|
|
8
|
-
* Convert markdown text to Telegram HTML
|
|
9
|
-
*/
|
|
10
8
|
convert(text: string): string;
|
|
11
|
-
/**
|
|
12
|
-
* Recursively convert markdown, handling nested styles
|
|
13
|
-
*/
|
|
14
9
|
private convertRecursive;
|
|
15
|
-
/**
|
|
16
|
-
* Wrap token content in HTML tags
|
|
17
|
-
*/
|
|
18
10
|
private wrapToken;
|
|
19
|
-
/**
|
|
20
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
21
|
-
*/
|
|
22
11
|
private preprocessBlockquotes;
|
|
23
|
-
/**
|
|
24
|
-
* Process blockquote markers
|
|
25
|
-
*/
|
|
26
12
|
private processBlockquoteMarkers;
|
|
27
13
|
private defaultLinkProcessor;
|
|
28
14
|
private defaultCodeBlockProcessor;
|
package/dist/index.cjs
CHANGED
|
@@ -6,20 +6,15 @@ class MarkdownTokenizer {
|
|
|
6
6
|
constructor(text) {
|
|
7
7
|
this.text = text;
|
|
8
8
|
}
|
|
9
|
-
/**
|
|
10
|
-
* Tokenize the markdown text
|
|
11
|
-
*/
|
|
12
9
|
tokenize() {
|
|
13
10
|
const tokens = [];
|
|
14
11
|
let pos = 0;
|
|
15
12
|
const text = this.text;
|
|
16
13
|
while (pos < text.length) {
|
|
17
|
-
// Skip if inside code block
|
|
18
14
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
19
15
|
pos++;
|
|
20
16
|
continue;
|
|
21
17
|
}
|
|
22
|
-
// Try to match each token type (from outermost to innermost)
|
|
23
18
|
const token = this.matchToken(pos);
|
|
24
19
|
if (token) {
|
|
25
20
|
tokens.push(token);
|
|
@@ -34,11 +29,22 @@ class MarkdownTokenizer {
|
|
|
34
29
|
matchToken(start) {
|
|
35
30
|
const text = this.text;
|
|
36
31
|
const remaining = text.slice(start);
|
|
37
|
-
// Skip if we're inside a quote marker
|
|
38
32
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
39
33
|
return null;
|
|
40
34
|
}
|
|
41
|
-
// Match
|
|
35
|
+
// Match headings (###, ##)
|
|
36
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
37
|
+
if (headingMatch) {
|
|
38
|
+
const level = headingMatch[1].length;
|
|
39
|
+
const content = headingMatch[2];
|
|
40
|
+
return {
|
|
41
|
+
type: `heading_${level}`,
|
|
42
|
+
content: content,
|
|
43
|
+
start: start,
|
|
44
|
+
end: start + headingMatch[0].length
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
// Match code block (triple backticks)
|
|
42
48
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
43
49
|
if (codeBlockMatch) {
|
|
44
50
|
return {
|
|
@@ -99,13 +105,22 @@ class MarkdownTokenizer {
|
|
|
99
105
|
end: start + underlineMatch[0].length
|
|
100
106
|
};
|
|
101
107
|
}
|
|
102
|
-
// Match italic with asterisk
|
|
103
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
108
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
109
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
104
110
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
105
111
|
// Don't match if it's part of bold (**)
|
|
106
112
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
107
113
|
return null;
|
|
108
114
|
}
|
|
115
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
116
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
// Check if followed by alphanumeric or underscore
|
|
120
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
121
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
109
124
|
return {
|
|
110
125
|
type: 'italic',
|
|
111
126
|
content: italicAsteriskMatch[1],
|
|
@@ -113,13 +128,25 @@ class MarkdownTokenizer {
|
|
|
113
128
|
end: start + italicAsteriskMatch[0].length
|
|
114
129
|
};
|
|
115
130
|
}
|
|
116
|
-
// Match italic with underscore
|
|
117
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
131
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
132
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
118
133
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
119
134
|
// Don't match if it's part of underline (__)
|
|
120
135
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
121
136
|
return null;
|
|
122
137
|
}
|
|
138
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
139
|
+
if (start > 0) {
|
|
140
|
+
const prevChar = text[start - 1];
|
|
141
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
146
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
147
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
123
150
|
return {
|
|
124
151
|
type: 'italic',
|
|
125
152
|
content: italicUnderscoreMatch[1],
|
|
@@ -141,12 +168,10 @@ class MarkdownTokenizer {
|
|
|
141
168
|
return null;
|
|
142
169
|
}
|
|
143
170
|
isInsideCodeBlock(text, position) {
|
|
144
|
-
// Check for code blocks
|
|
145
171
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
146
172
|
let match;
|
|
147
173
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
148
174
|
if (position > match.index && position < match.index + match[0].length) {
|
|
149
|
-
// But allow matching the closing ``` itself
|
|
150
175
|
if (position >= match.index + match[0].length - 3) {
|
|
151
176
|
return false;
|
|
152
177
|
}
|
|
@@ -156,12 +181,10 @@ class MarkdownTokenizer {
|
|
|
156
181
|
return false;
|
|
157
182
|
}
|
|
158
183
|
isInsideInlineCode(text, position) {
|
|
159
|
-
// Check for inline code
|
|
160
184
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
161
185
|
let match;
|
|
162
186
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
163
187
|
if (position > match.index && position < match.index + match[0].length) {
|
|
164
|
-
// But allow matching the closing ` itself
|
|
165
188
|
if (position === match.index + match[0].length - 1) {
|
|
166
189
|
return false;
|
|
167
190
|
}
|
|
@@ -216,84 +239,77 @@ class MarkdownConverter {
|
|
|
216
239
|
constructor(options = {}) {
|
|
217
240
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
218
241
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
242
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
243
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
219
244
|
this.options = {
|
|
220
245
|
escapeHtml: options.escapeHtml ?? true,
|
|
221
246
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
247
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
248
|
+
headingBlank: options.headingBlank ?? false,
|
|
222
249
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
223
250
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
224
251
|
};
|
|
225
252
|
}
|
|
226
|
-
/**
|
|
227
|
-
* Convert markdown text to Telegram HTML
|
|
228
|
-
*/
|
|
229
253
|
convert(text) {
|
|
230
|
-
// Auto-close code blocks if enabled
|
|
231
254
|
let processedText = this.options.autoCloseCodeBlocks
|
|
232
255
|
? autoCloseCodeBlocks(text)
|
|
233
256
|
: text;
|
|
234
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
235
257
|
processedText = this.preprocessBlockquotes(processedText);
|
|
236
|
-
// Convert the text recursively
|
|
237
258
|
let result = this.convertRecursive(processedText);
|
|
238
|
-
// Process blockquote markers
|
|
239
259
|
result = this.processBlockquoteMarkers(result);
|
|
240
|
-
// Only trim if there's actual content (not just whitespace)
|
|
241
260
|
if (result.trim() === '') {
|
|
242
|
-
return text;
|
|
261
|
+
return text;
|
|
243
262
|
}
|
|
244
263
|
return result.trim();
|
|
245
264
|
}
|
|
246
|
-
/**
|
|
247
|
-
* Recursively convert markdown, handling nested styles
|
|
248
|
-
*/
|
|
249
265
|
convertRecursive(text, depth = 0) {
|
|
250
266
|
if (depth > 10)
|
|
251
|
-
return text;
|
|
252
|
-
// Tokenize the text
|
|
267
|
+
return text;
|
|
253
268
|
const tokenizer = new MarkdownTokenizer(text);
|
|
254
269
|
const tokens = tokenizer.tokenize();
|
|
255
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
256
270
|
if (tokens.length === 0) {
|
|
257
271
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
258
272
|
}
|
|
259
273
|
let result = '';
|
|
260
274
|
let lastPos = 0;
|
|
261
275
|
for (const token of tokens) {
|
|
262
|
-
// Add text before token
|
|
263
276
|
if (token.start > lastPos) {
|
|
264
277
|
const textBefore = text.slice(lastPos, token.start);
|
|
265
278
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
266
279
|
}
|
|
267
|
-
// Handle
|
|
280
|
+
// Fixed: Handle headings
|
|
281
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
282
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
283
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
284
|
+
// Fixed: Add symbol and bold styling
|
|
285
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
286
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
287
|
+
result += `<b>${headingText}</b>`;
|
|
288
|
+
lastPos = token.end;
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
268
291
|
if (token.type === 'code_block') {
|
|
269
292
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
270
293
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
271
294
|
lastPos = token.end;
|
|
272
295
|
continue;
|
|
273
296
|
}
|
|
274
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
275
297
|
if (token.type === 'inline_code') {
|
|
276
298
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
277
299
|
result += `<code>${codeContent}</code>`;
|
|
278
300
|
lastPos = token.end;
|
|
279
301
|
continue;
|
|
280
302
|
}
|
|
281
|
-
// Process other token content recursively
|
|
282
303
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
283
|
-
// Wrap the content in appropriate HTML tags
|
|
284
304
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
285
305
|
lastPos = token.end;
|
|
286
306
|
}
|
|
287
|
-
// Add remaining text
|
|
288
307
|
if (lastPos < text.length) {
|
|
289
308
|
const remainingText = text.slice(lastPos);
|
|
290
309
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
291
310
|
}
|
|
292
311
|
return result;
|
|
293
312
|
}
|
|
294
|
-
/**
|
|
295
|
-
* Wrap token content in HTML tags
|
|
296
|
-
*/
|
|
297
313
|
wrapToken(type, content, language) {
|
|
298
314
|
switch (type) {
|
|
299
315
|
case 'bold':
|
|
@@ -307,16 +323,14 @@ class MarkdownConverter {
|
|
|
307
323
|
case 'spoiler':
|
|
308
324
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
309
325
|
case 'inline_code':
|
|
310
|
-
// Already handled above
|
|
311
326
|
return `<code>${content}</code>`;
|
|
312
327
|
case 'code_block':
|
|
313
|
-
// Already handled above, but handle custom processor
|
|
314
328
|
if (this.hasCustomCodeBlockProcessor) {
|
|
315
329
|
return this.options.codeBlockProcessor(content, language);
|
|
316
330
|
}
|
|
317
331
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
318
332
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
319
|
-
return
|
|
333
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
320
334
|
case 'link':
|
|
321
335
|
const url = language || '';
|
|
322
336
|
if (this.hasCustomLinkProcessor) {
|
|
@@ -326,29 +340,23 @@ class MarkdownConverter {
|
|
|
326
340
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
327
341
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
328
342
|
case 'quote':
|
|
329
|
-
return
|
|
343
|
+
return `<blockquote>${content.trim()}</blockquote>`;
|
|
330
344
|
case 'expandable_quote':
|
|
331
|
-
return
|
|
345
|
+
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
332
346
|
default:
|
|
333
347
|
return content;
|
|
334
348
|
}
|
|
335
349
|
}
|
|
336
|
-
/**
|
|
337
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
338
|
-
*/
|
|
339
350
|
preprocessBlockquotes(text) {
|
|
340
351
|
const lines = text.split('\n');
|
|
341
352
|
const processedLines = [];
|
|
342
353
|
for (const line of lines) {
|
|
343
354
|
const trimmedLine = line.trim();
|
|
344
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
345
355
|
if (trimmedLine.startsWith('**>')) {
|
|
346
|
-
// Expandable blockquote
|
|
347
356
|
const content = trimmedLine.substring(3).trim();
|
|
348
357
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
349
358
|
}
|
|
350
359
|
else if (trimmedLine.startsWith('>')) {
|
|
351
|
-
// Regular blockquote
|
|
352
360
|
const content = trimmedLine.substring(1).trim();
|
|
353
361
|
processedLines.push(`[QUOTE]${content}`);
|
|
354
362
|
}
|
|
@@ -358,22 +366,17 @@ class MarkdownConverter {
|
|
|
358
366
|
}
|
|
359
367
|
return processedLines.join('\n');
|
|
360
368
|
}
|
|
361
|
-
/**
|
|
362
|
-
* Process blockquote markers
|
|
363
|
-
*/
|
|
364
369
|
processBlockquoteMarkers(text) {
|
|
365
370
|
let result = text;
|
|
366
|
-
// Replace expandable quote markers (process content recursively)
|
|
367
371
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
368
372
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
369
373
|
const processedContent = this.convertRecursive(content);
|
|
370
|
-
return
|
|
374
|
+
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
371
375
|
});
|
|
372
|
-
// Replace regular quote markers (process content recursively)
|
|
373
376
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
374
377
|
result = result.replace(quoteRegex, (match, content) => {
|
|
375
378
|
const processedContent = this.convertRecursive(content);
|
|
376
|
-
return
|
|
379
|
+
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
377
380
|
});
|
|
378
381
|
return result;
|
|
379
382
|
}
|
|
@@ -385,7 +388,7 @@ class MarkdownConverter {
|
|
|
385
388
|
defaultCodeBlockProcessor(code, language) {
|
|
386
389
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
387
390
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
388
|
-
return
|
|
391
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
389
392
|
}
|
|
390
393
|
}
|
|
391
394
|
|
package/dist/index.esm.js
CHANGED
|
@@ -2,20 +2,15 @@ class MarkdownTokenizer {
|
|
|
2
2
|
constructor(text) {
|
|
3
3
|
this.text = text;
|
|
4
4
|
}
|
|
5
|
-
/**
|
|
6
|
-
* Tokenize the markdown text
|
|
7
|
-
*/
|
|
8
5
|
tokenize() {
|
|
9
6
|
const tokens = [];
|
|
10
7
|
let pos = 0;
|
|
11
8
|
const text = this.text;
|
|
12
9
|
while (pos < text.length) {
|
|
13
|
-
// Skip if inside code block
|
|
14
10
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
15
11
|
pos++;
|
|
16
12
|
continue;
|
|
17
13
|
}
|
|
18
|
-
// Try to match each token type (from outermost to innermost)
|
|
19
14
|
const token = this.matchToken(pos);
|
|
20
15
|
if (token) {
|
|
21
16
|
tokens.push(token);
|
|
@@ -30,11 +25,22 @@ class MarkdownTokenizer {
|
|
|
30
25
|
matchToken(start) {
|
|
31
26
|
const text = this.text;
|
|
32
27
|
const remaining = text.slice(start);
|
|
33
|
-
// Skip if we're inside a quote marker
|
|
34
28
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
35
29
|
return null;
|
|
36
30
|
}
|
|
37
|
-
// Match
|
|
31
|
+
// Match headings (###, ##)
|
|
32
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
33
|
+
if (headingMatch) {
|
|
34
|
+
const level = headingMatch[1].length;
|
|
35
|
+
const content = headingMatch[2];
|
|
36
|
+
return {
|
|
37
|
+
type: `heading_${level}`,
|
|
38
|
+
content: content,
|
|
39
|
+
start: start,
|
|
40
|
+
end: start + headingMatch[0].length
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
// Match code block (triple backticks)
|
|
38
44
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
39
45
|
if (codeBlockMatch) {
|
|
40
46
|
return {
|
|
@@ -95,13 +101,22 @@ class MarkdownTokenizer {
|
|
|
95
101
|
end: start + underlineMatch[0].length
|
|
96
102
|
};
|
|
97
103
|
}
|
|
98
|
-
// Match italic with asterisk
|
|
99
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
104
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
105
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
100
106
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
101
107
|
// Don't match if it's part of bold (**)
|
|
102
108
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
103
109
|
return null;
|
|
104
110
|
}
|
|
111
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
112
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
// Check if followed by alphanumeric or underscore
|
|
116
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
117
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
105
120
|
return {
|
|
106
121
|
type: 'italic',
|
|
107
122
|
content: italicAsteriskMatch[1],
|
|
@@ -109,13 +124,25 @@ class MarkdownTokenizer {
|
|
|
109
124
|
end: start + italicAsteriskMatch[0].length
|
|
110
125
|
};
|
|
111
126
|
}
|
|
112
|
-
// Match italic with underscore
|
|
113
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
127
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
128
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
114
129
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
115
130
|
// Don't match if it's part of underline (__)
|
|
116
131
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
117
132
|
return null;
|
|
118
133
|
}
|
|
134
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
135
|
+
if (start > 0) {
|
|
136
|
+
const prevChar = text[start - 1];
|
|
137
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
142
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
143
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
119
146
|
return {
|
|
120
147
|
type: 'italic',
|
|
121
148
|
content: italicUnderscoreMatch[1],
|
|
@@ -137,12 +164,10 @@ class MarkdownTokenizer {
|
|
|
137
164
|
return null;
|
|
138
165
|
}
|
|
139
166
|
isInsideCodeBlock(text, position) {
|
|
140
|
-
// Check for code blocks
|
|
141
167
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
142
168
|
let match;
|
|
143
169
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
144
170
|
if (position > match.index && position < match.index + match[0].length) {
|
|
145
|
-
// But allow matching the closing ``` itself
|
|
146
171
|
if (position >= match.index + match[0].length - 3) {
|
|
147
172
|
return false;
|
|
148
173
|
}
|
|
@@ -152,12 +177,10 @@ class MarkdownTokenizer {
|
|
|
152
177
|
return false;
|
|
153
178
|
}
|
|
154
179
|
isInsideInlineCode(text, position) {
|
|
155
|
-
// Check for inline code
|
|
156
180
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
157
181
|
let match;
|
|
158
182
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
159
183
|
if (position > match.index && position < match.index + match[0].length) {
|
|
160
|
-
// But allow matching the closing ` itself
|
|
161
184
|
if (position === match.index + match[0].length - 1) {
|
|
162
185
|
return false;
|
|
163
186
|
}
|
|
@@ -212,84 +235,77 @@ class MarkdownConverter {
|
|
|
212
235
|
constructor(options = {}) {
|
|
213
236
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
214
237
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
238
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
239
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
215
240
|
this.options = {
|
|
216
241
|
escapeHtml: options.escapeHtml ?? true,
|
|
217
242
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
243
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
244
|
+
headingBlank: options.headingBlank ?? false,
|
|
218
245
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
219
246
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
220
247
|
};
|
|
221
248
|
}
|
|
222
|
-
/**
|
|
223
|
-
* Convert markdown text to Telegram HTML
|
|
224
|
-
*/
|
|
225
249
|
convert(text) {
|
|
226
|
-
// Auto-close code blocks if enabled
|
|
227
250
|
let processedText = this.options.autoCloseCodeBlocks
|
|
228
251
|
? autoCloseCodeBlocks(text)
|
|
229
252
|
: text;
|
|
230
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
231
253
|
processedText = this.preprocessBlockquotes(processedText);
|
|
232
|
-
// Convert the text recursively
|
|
233
254
|
let result = this.convertRecursive(processedText);
|
|
234
|
-
// Process blockquote markers
|
|
235
255
|
result = this.processBlockquoteMarkers(result);
|
|
236
|
-
// Only trim if there's actual content (not just whitespace)
|
|
237
256
|
if (result.trim() === '') {
|
|
238
|
-
return text;
|
|
257
|
+
return text;
|
|
239
258
|
}
|
|
240
259
|
return result.trim();
|
|
241
260
|
}
|
|
242
|
-
/**
|
|
243
|
-
* Recursively convert markdown, handling nested styles
|
|
244
|
-
*/
|
|
245
261
|
convertRecursive(text, depth = 0) {
|
|
246
262
|
if (depth > 10)
|
|
247
|
-
return text;
|
|
248
|
-
// Tokenize the text
|
|
263
|
+
return text;
|
|
249
264
|
const tokenizer = new MarkdownTokenizer(text);
|
|
250
265
|
const tokens = tokenizer.tokenize();
|
|
251
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
252
266
|
if (tokens.length === 0) {
|
|
253
267
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
254
268
|
}
|
|
255
269
|
let result = '';
|
|
256
270
|
let lastPos = 0;
|
|
257
271
|
for (const token of tokens) {
|
|
258
|
-
// Add text before token
|
|
259
272
|
if (token.start > lastPos) {
|
|
260
273
|
const textBefore = text.slice(lastPos, token.start);
|
|
261
274
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
262
275
|
}
|
|
263
|
-
// Handle
|
|
276
|
+
// Fixed: Handle headings
|
|
277
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
278
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
279
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
280
|
+
// Fixed: Add symbol and bold styling
|
|
281
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
282
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
283
|
+
result += `<b>${headingText}</b>`;
|
|
284
|
+
lastPos = token.end;
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
264
287
|
if (token.type === 'code_block') {
|
|
265
288
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
266
289
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
267
290
|
lastPos = token.end;
|
|
268
291
|
continue;
|
|
269
292
|
}
|
|
270
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
271
293
|
if (token.type === 'inline_code') {
|
|
272
294
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
273
295
|
result += `<code>${codeContent}</code>`;
|
|
274
296
|
lastPos = token.end;
|
|
275
297
|
continue;
|
|
276
298
|
}
|
|
277
|
-
// Process other token content recursively
|
|
278
299
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
279
|
-
// Wrap the content in appropriate HTML tags
|
|
280
300
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
281
301
|
lastPos = token.end;
|
|
282
302
|
}
|
|
283
|
-
// Add remaining text
|
|
284
303
|
if (lastPos < text.length) {
|
|
285
304
|
const remainingText = text.slice(lastPos);
|
|
286
305
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
287
306
|
}
|
|
288
307
|
return result;
|
|
289
308
|
}
|
|
290
|
-
/**
|
|
291
|
-
* Wrap token content in HTML tags
|
|
292
|
-
*/
|
|
293
309
|
wrapToken(type, content, language) {
|
|
294
310
|
switch (type) {
|
|
295
311
|
case 'bold':
|
|
@@ -303,16 +319,14 @@ class MarkdownConverter {
|
|
|
303
319
|
case 'spoiler':
|
|
304
320
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
305
321
|
case 'inline_code':
|
|
306
|
-
// Already handled above
|
|
307
322
|
return `<code>${content}</code>`;
|
|
308
323
|
case 'code_block':
|
|
309
|
-
// Already handled above, but handle custom processor
|
|
310
324
|
if (this.hasCustomCodeBlockProcessor) {
|
|
311
325
|
return this.options.codeBlockProcessor(content, language);
|
|
312
326
|
}
|
|
313
327
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
314
328
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
315
|
-
return
|
|
329
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
316
330
|
case 'link':
|
|
317
331
|
const url = language || '';
|
|
318
332
|
if (this.hasCustomLinkProcessor) {
|
|
@@ -322,29 +336,23 @@ class MarkdownConverter {
|
|
|
322
336
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
323
337
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
324
338
|
case 'quote':
|
|
325
|
-
return
|
|
339
|
+
return `<blockquote>${content.trim()}</blockquote>`;
|
|
326
340
|
case 'expandable_quote':
|
|
327
|
-
return
|
|
341
|
+
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
328
342
|
default:
|
|
329
343
|
return content;
|
|
330
344
|
}
|
|
331
345
|
}
|
|
332
|
-
/**
|
|
333
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
334
|
-
*/
|
|
335
346
|
preprocessBlockquotes(text) {
|
|
336
347
|
const lines = text.split('\n');
|
|
337
348
|
const processedLines = [];
|
|
338
349
|
for (const line of lines) {
|
|
339
350
|
const trimmedLine = line.trim();
|
|
340
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
341
351
|
if (trimmedLine.startsWith('**>')) {
|
|
342
|
-
// Expandable blockquote
|
|
343
352
|
const content = trimmedLine.substring(3).trim();
|
|
344
353
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
345
354
|
}
|
|
346
355
|
else if (trimmedLine.startsWith('>')) {
|
|
347
|
-
// Regular blockquote
|
|
348
356
|
const content = trimmedLine.substring(1).trim();
|
|
349
357
|
processedLines.push(`[QUOTE]${content}`);
|
|
350
358
|
}
|
|
@@ -354,22 +362,17 @@ class MarkdownConverter {
|
|
|
354
362
|
}
|
|
355
363
|
return processedLines.join('\n');
|
|
356
364
|
}
|
|
357
|
-
/**
|
|
358
|
-
* Process blockquote markers
|
|
359
|
-
*/
|
|
360
365
|
processBlockquoteMarkers(text) {
|
|
361
366
|
let result = text;
|
|
362
|
-
// Replace expandable quote markers (process content recursively)
|
|
363
367
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
364
368
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
365
369
|
const processedContent = this.convertRecursive(content);
|
|
366
|
-
return
|
|
370
|
+
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
367
371
|
});
|
|
368
|
-
// Replace regular quote markers (process content recursively)
|
|
369
372
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
370
373
|
result = result.replace(quoteRegex, (match, content) => {
|
|
371
374
|
const processedContent = this.convertRecursive(content);
|
|
372
|
-
return
|
|
375
|
+
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
373
376
|
});
|
|
374
377
|
return result;
|
|
375
378
|
}
|
|
@@ -381,7 +384,7 @@ class MarkdownConverter {
|
|
|
381
384
|
defaultCodeBlockProcessor(code, language) {
|
|
382
385
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
383
386
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
384
|
-
return
|
|
387
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
385
388
|
}
|
|
386
389
|
}
|
|
387
390
|
|