telegram-md2html 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +379 -324
- package/dist/converter.d.ts +1 -18
- package/dist/index.cjs +55 -61
- package/dist/index.esm.js +55 -61
- package/dist/index.mjs +55 -61
- package/dist/index.umd.js +55 -61
- package/dist/tokenizer.d.ts +0 -3
- package/dist/types.d.ts +9 -0
- package/package.json +1 -1
package/dist/converter.d.ts
CHANGED
|
@@ -3,29 +3,12 @@ export declare class MarkdownConverter {
|
|
|
3
3
|
private options;
|
|
4
4
|
private hasCustomLinkProcessor;
|
|
5
5
|
private hasCustomCodeBlockProcessor;
|
|
6
|
+
private headingSymbol;
|
|
6
7
|
constructor(options?: ConvertOptions);
|
|
7
|
-
/**
|
|
8
|
-
* Convert markdown text to Telegram HTML
|
|
9
|
-
*/
|
|
10
8
|
convert(text: string): string;
|
|
11
|
-
/**
|
|
12
|
-
* Recursively convert markdown, handling nested styles
|
|
13
|
-
*/
|
|
14
9
|
private convertRecursive;
|
|
15
|
-
/**
|
|
16
|
-
* Wrap token content in HTML tags
|
|
17
|
-
* FIXED: Removed extra newlines that were being added around code blocks and quotes
|
|
18
|
-
* Previously added \n before and after, now returns clean tags without extra whitespace
|
|
19
|
-
*/
|
|
20
10
|
private wrapToken;
|
|
21
|
-
/**
|
|
22
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
23
|
-
*/
|
|
24
11
|
private preprocessBlockquotes;
|
|
25
|
-
/**
|
|
26
|
-
* Process blockquote markers
|
|
27
|
-
* FIXED: Removed extra newlines from the replacement strings
|
|
28
|
-
*/
|
|
29
12
|
private processBlockquoteMarkers;
|
|
30
13
|
private defaultLinkProcessor;
|
|
31
14
|
private defaultCodeBlockProcessor;
|
package/dist/index.cjs
CHANGED
|
@@ -6,20 +6,15 @@ class MarkdownTokenizer {
|
|
|
6
6
|
constructor(text) {
|
|
7
7
|
this.text = text;
|
|
8
8
|
}
|
|
9
|
-
/**
|
|
10
|
-
* Tokenize the markdown text
|
|
11
|
-
*/
|
|
12
9
|
tokenize() {
|
|
13
10
|
const tokens = [];
|
|
14
11
|
let pos = 0;
|
|
15
12
|
const text = this.text;
|
|
16
13
|
while (pos < text.length) {
|
|
17
|
-
// Skip if inside code block
|
|
18
14
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
19
15
|
pos++;
|
|
20
16
|
continue;
|
|
21
17
|
}
|
|
22
|
-
// Try to match each token type (from outermost to innermost)
|
|
23
18
|
const token = this.matchToken(pos);
|
|
24
19
|
if (token) {
|
|
25
20
|
tokens.push(token);
|
|
@@ -34,11 +29,22 @@ class MarkdownTokenizer {
|
|
|
34
29
|
matchToken(start) {
|
|
35
30
|
const text = this.text;
|
|
36
31
|
const remaining = text.slice(start);
|
|
37
|
-
// Skip if we're inside a quote marker
|
|
38
32
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
39
33
|
return null;
|
|
40
34
|
}
|
|
41
|
-
// Match
|
|
35
|
+
// Match headings (###, ##)
|
|
36
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
37
|
+
if (headingMatch) {
|
|
38
|
+
const level = headingMatch[1].length;
|
|
39
|
+
const content = headingMatch[2];
|
|
40
|
+
return {
|
|
41
|
+
type: `heading_${level}`,
|
|
42
|
+
content: content,
|
|
43
|
+
start: start,
|
|
44
|
+
end: start + headingMatch[0].length
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
// Match code block (triple backticks)
|
|
42
48
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
43
49
|
if (codeBlockMatch) {
|
|
44
50
|
return {
|
|
@@ -99,13 +105,22 @@ class MarkdownTokenizer {
|
|
|
99
105
|
end: start + underlineMatch[0].length
|
|
100
106
|
};
|
|
101
107
|
}
|
|
102
|
-
// Match italic with asterisk
|
|
103
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
108
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
109
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
104
110
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
105
111
|
// Don't match if it's part of bold (**)
|
|
106
112
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
107
113
|
return null;
|
|
108
114
|
}
|
|
115
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
116
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
// Check if followed by alphanumeric or underscore
|
|
120
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
121
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
122
|
+
return null;
|
|
123
|
+
}
|
|
109
124
|
return {
|
|
110
125
|
type: 'italic',
|
|
111
126
|
content: italicAsteriskMatch[1],
|
|
@@ -113,13 +128,25 @@ class MarkdownTokenizer {
|
|
|
113
128
|
end: start + italicAsteriskMatch[0].length
|
|
114
129
|
};
|
|
115
130
|
}
|
|
116
|
-
// Match italic with underscore
|
|
117
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
131
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
132
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
118
133
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
119
134
|
// Don't match if it's part of underline (__)
|
|
120
135
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
121
136
|
return null;
|
|
122
137
|
}
|
|
138
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
139
|
+
if (start > 0) {
|
|
140
|
+
const prevChar = text[start - 1];
|
|
141
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
146
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
147
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
148
|
+
return null;
|
|
149
|
+
}
|
|
123
150
|
return {
|
|
124
151
|
type: 'italic',
|
|
125
152
|
content: italicUnderscoreMatch[1],
|
|
@@ -141,12 +168,10 @@ class MarkdownTokenizer {
|
|
|
141
168
|
return null;
|
|
142
169
|
}
|
|
143
170
|
isInsideCodeBlock(text, position) {
|
|
144
|
-
// Check for code blocks
|
|
145
171
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
146
172
|
let match;
|
|
147
173
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
148
174
|
if (position > match.index && position < match.index + match[0].length) {
|
|
149
|
-
// But allow matching the closing ``` itself
|
|
150
175
|
if (position >= match.index + match[0].length - 3) {
|
|
151
176
|
return false;
|
|
152
177
|
}
|
|
@@ -156,12 +181,10 @@ class MarkdownTokenizer {
|
|
|
156
181
|
return false;
|
|
157
182
|
}
|
|
158
183
|
isInsideInlineCode(text, position) {
|
|
159
|
-
// Check for inline code
|
|
160
184
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
161
185
|
let match;
|
|
162
186
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
163
187
|
if (position > match.index && position < match.index + match[0].length) {
|
|
164
|
-
// But allow matching the closing ` itself
|
|
165
188
|
if (position === match.index + match[0].length - 1) {
|
|
166
189
|
return false;
|
|
167
190
|
}
|
|
@@ -216,86 +239,77 @@ class MarkdownConverter {
|
|
|
216
239
|
constructor(options = {}) {
|
|
217
240
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
218
241
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
242
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
243
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
219
244
|
this.options = {
|
|
220
245
|
escapeHtml: options.escapeHtml ?? true,
|
|
221
246
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
247
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
248
|
+
headingBlank: options.headingBlank ?? false,
|
|
222
249
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
223
250
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
224
251
|
};
|
|
225
252
|
}
|
|
226
|
-
/**
|
|
227
|
-
* Convert markdown text to Telegram HTML
|
|
228
|
-
*/
|
|
229
253
|
convert(text) {
|
|
230
|
-
// Auto-close code blocks if enabled
|
|
231
254
|
let processedText = this.options.autoCloseCodeBlocks
|
|
232
255
|
? autoCloseCodeBlocks(text)
|
|
233
256
|
: text;
|
|
234
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
235
257
|
processedText = this.preprocessBlockquotes(processedText);
|
|
236
|
-
// Convert the text recursively
|
|
237
258
|
let result = this.convertRecursive(processedText);
|
|
238
|
-
// Process blockquote markers
|
|
239
259
|
result = this.processBlockquoteMarkers(result);
|
|
240
|
-
// Only trim if there's actual content (not just whitespace)
|
|
241
260
|
if (result.trim() === '') {
|
|
242
|
-
return text;
|
|
261
|
+
return text;
|
|
243
262
|
}
|
|
244
263
|
return result.trim();
|
|
245
264
|
}
|
|
246
|
-
/**
|
|
247
|
-
* Recursively convert markdown, handling nested styles
|
|
248
|
-
*/
|
|
249
265
|
convertRecursive(text, depth = 0) {
|
|
250
266
|
if (depth > 10)
|
|
251
|
-
return text;
|
|
252
|
-
// Tokenize the text
|
|
267
|
+
return text;
|
|
253
268
|
const tokenizer = new MarkdownTokenizer(text);
|
|
254
269
|
const tokens = tokenizer.tokenize();
|
|
255
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
256
270
|
if (tokens.length === 0) {
|
|
257
271
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
258
272
|
}
|
|
259
273
|
let result = '';
|
|
260
274
|
let lastPos = 0;
|
|
261
275
|
for (const token of tokens) {
|
|
262
|
-
// Add text before token
|
|
263
276
|
if (token.start > lastPos) {
|
|
264
277
|
const textBefore = text.slice(lastPos, token.start);
|
|
265
278
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
266
279
|
}
|
|
267
|
-
// Handle
|
|
280
|
+
// Fixed: Handle headings
|
|
281
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
282
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
283
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
284
|
+
// Fixed: Add symbol and bold styling
|
|
285
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
286
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
287
|
+
result += `<b>${headingText}</b>`;
|
|
288
|
+
lastPos = token.end;
|
|
289
|
+
continue;
|
|
290
|
+
}
|
|
268
291
|
if (token.type === 'code_block') {
|
|
269
292
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
270
293
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
271
294
|
lastPos = token.end;
|
|
272
295
|
continue;
|
|
273
296
|
}
|
|
274
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
275
297
|
if (token.type === 'inline_code') {
|
|
276
298
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
277
299
|
result += `<code>${codeContent}</code>`;
|
|
278
300
|
lastPos = token.end;
|
|
279
301
|
continue;
|
|
280
302
|
}
|
|
281
|
-
// Process other token content recursively
|
|
282
303
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
283
|
-
// Wrap the content in appropriate HTML tags
|
|
284
304
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
285
305
|
lastPos = token.end;
|
|
286
306
|
}
|
|
287
|
-
// Add remaining text
|
|
288
307
|
if (lastPos < text.length) {
|
|
289
308
|
const remainingText = text.slice(lastPos);
|
|
290
309
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
291
310
|
}
|
|
292
311
|
return result;
|
|
293
312
|
}
|
|
294
|
-
/**
|
|
295
|
-
* Wrap token content in HTML tags
|
|
296
|
-
* FIXED: Removed extra newlines that were being added around code blocks and quotes
|
|
297
|
-
* Previously added \n before and after, now returns clean tags without extra whitespace
|
|
298
|
-
*/
|
|
299
313
|
wrapToken(type, content, language) {
|
|
300
314
|
switch (type) {
|
|
301
315
|
case 'bold':
|
|
@@ -309,16 +323,13 @@ class MarkdownConverter {
|
|
|
309
323
|
case 'spoiler':
|
|
310
324
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
311
325
|
case 'inline_code':
|
|
312
|
-
// Already handled above
|
|
313
326
|
return `<code>${content}</code>`;
|
|
314
327
|
case 'code_block':
|
|
315
|
-
// Already handled above, but handle custom processor
|
|
316
328
|
if (this.hasCustomCodeBlockProcessor) {
|
|
317
329
|
return this.options.codeBlockProcessor(content, language);
|
|
318
330
|
}
|
|
319
331
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
320
332
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
321
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
322
333
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
323
334
|
case 'link':
|
|
324
335
|
const url = language || '';
|
|
@@ -329,31 +340,23 @@ class MarkdownConverter {
|
|
|
329
340
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
330
341
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
331
342
|
case 'quote':
|
|
332
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
333
343
|
return `<blockquote>${content.trim()}</blockquote>`;
|
|
334
344
|
case 'expandable_quote':
|
|
335
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
336
345
|
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
337
346
|
default:
|
|
338
347
|
return content;
|
|
339
348
|
}
|
|
340
349
|
}
|
|
341
|
-
/**
|
|
342
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
343
|
-
*/
|
|
344
350
|
preprocessBlockquotes(text) {
|
|
345
351
|
const lines = text.split('\n');
|
|
346
352
|
const processedLines = [];
|
|
347
353
|
for (const line of lines) {
|
|
348
354
|
const trimmedLine = line.trim();
|
|
349
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
350
355
|
if (trimmedLine.startsWith('**>')) {
|
|
351
|
-
// Expandable blockquote
|
|
352
356
|
const content = trimmedLine.substring(3).trim();
|
|
353
357
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
354
358
|
}
|
|
355
359
|
else if (trimmedLine.startsWith('>')) {
|
|
356
|
-
// Regular blockquote
|
|
357
360
|
const content = trimmedLine.substring(1).trim();
|
|
358
361
|
processedLines.push(`[QUOTE]${content}`);
|
|
359
362
|
}
|
|
@@ -363,24 +366,16 @@ class MarkdownConverter {
|
|
|
363
366
|
}
|
|
364
367
|
return processedLines.join('\n');
|
|
365
368
|
}
|
|
366
|
-
/**
|
|
367
|
-
* Process blockquote markers
|
|
368
|
-
* FIXED: Removed extra newlines from the replacement strings
|
|
369
|
-
*/
|
|
370
369
|
processBlockquoteMarkers(text) {
|
|
371
370
|
let result = text;
|
|
372
|
-
// Replace expandable quote markers (process content recursively)
|
|
373
371
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
374
372
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
375
373
|
const processedContent = this.convertRecursive(content);
|
|
376
|
-
// FIXED: Removed \n before and after
|
|
377
374
|
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
378
375
|
});
|
|
379
|
-
// Replace regular quote markers (process content recursively)
|
|
380
376
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
381
377
|
result = result.replace(quoteRegex, (match, content) => {
|
|
382
378
|
const processedContent = this.convertRecursive(content);
|
|
383
|
-
// FIXED: Removed \n before and after
|
|
384
379
|
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
385
380
|
});
|
|
386
381
|
return result;
|
|
@@ -393,7 +388,6 @@ class MarkdownConverter {
|
|
|
393
388
|
defaultCodeBlockProcessor(code, language) {
|
|
394
389
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
395
390
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
396
|
-
// FIXED: Removed \n before and after in default processor too
|
|
397
391
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
398
392
|
}
|
|
399
393
|
}
|
package/dist/index.esm.js
CHANGED
|
@@ -2,20 +2,15 @@ class MarkdownTokenizer {
|
|
|
2
2
|
constructor(text) {
|
|
3
3
|
this.text = text;
|
|
4
4
|
}
|
|
5
|
-
/**
|
|
6
|
-
* Tokenize the markdown text
|
|
7
|
-
*/
|
|
8
5
|
tokenize() {
|
|
9
6
|
const tokens = [];
|
|
10
7
|
let pos = 0;
|
|
11
8
|
const text = this.text;
|
|
12
9
|
while (pos < text.length) {
|
|
13
|
-
// Skip if inside code block
|
|
14
10
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
15
11
|
pos++;
|
|
16
12
|
continue;
|
|
17
13
|
}
|
|
18
|
-
// Try to match each token type (from outermost to innermost)
|
|
19
14
|
const token = this.matchToken(pos);
|
|
20
15
|
if (token) {
|
|
21
16
|
tokens.push(token);
|
|
@@ -30,11 +25,22 @@ class MarkdownTokenizer {
|
|
|
30
25
|
matchToken(start) {
|
|
31
26
|
const text = this.text;
|
|
32
27
|
const remaining = text.slice(start);
|
|
33
|
-
// Skip if we're inside a quote marker
|
|
34
28
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
35
29
|
return null;
|
|
36
30
|
}
|
|
37
|
-
// Match
|
|
31
|
+
// Match headings (###, ##)
|
|
32
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
33
|
+
if (headingMatch) {
|
|
34
|
+
const level = headingMatch[1].length;
|
|
35
|
+
const content = headingMatch[2];
|
|
36
|
+
return {
|
|
37
|
+
type: `heading_${level}`,
|
|
38
|
+
content: content,
|
|
39
|
+
start: start,
|
|
40
|
+
end: start + headingMatch[0].length
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
// Match code block (triple backticks)
|
|
38
44
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
39
45
|
if (codeBlockMatch) {
|
|
40
46
|
return {
|
|
@@ -95,13 +101,22 @@ class MarkdownTokenizer {
|
|
|
95
101
|
end: start + underlineMatch[0].length
|
|
96
102
|
};
|
|
97
103
|
}
|
|
98
|
-
// Match italic with asterisk
|
|
99
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
104
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
105
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
100
106
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
101
107
|
// Don't match if it's part of bold (**)
|
|
102
108
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
103
109
|
return null;
|
|
104
110
|
}
|
|
111
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
112
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
// Check if followed by alphanumeric or underscore
|
|
116
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
117
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
105
120
|
return {
|
|
106
121
|
type: 'italic',
|
|
107
122
|
content: italicAsteriskMatch[1],
|
|
@@ -109,13 +124,25 @@ class MarkdownTokenizer {
|
|
|
109
124
|
end: start + italicAsteriskMatch[0].length
|
|
110
125
|
};
|
|
111
126
|
}
|
|
112
|
-
// Match italic with underscore
|
|
113
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
127
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
128
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
114
129
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
115
130
|
// Don't match if it's part of underline (__)
|
|
116
131
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
117
132
|
return null;
|
|
118
133
|
}
|
|
134
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
135
|
+
if (start > 0) {
|
|
136
|
+
const prevChar = text[start - 1];
|
|
137
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
142
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
143
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
119
146
|
return {
|
|
120
147
|
type: 'italic',
|
|
121
148
|
content: italicUnderscoreMatch[1],
|
|
@@ -137,12 +164,10 @@ class MarkdownTokenizer {
|
|
|
137
164
|
return null;
|
|
138
165
|
}
|
|
139
166
|
isInsideCodeBlock(text, position) {
|
|
140
|
-
// Check for code blocks
|
|
141
167
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
142
168
|
let match;
|
|
143
169
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
144
170
|
if (position > match.index && position < match.index + match[0].length) {
|
|
145
|
-
// But allow matching the closing ``` itself
|
|
146
171
|
if (position >= match.index + match[0].length - 3) {
|
|
147
172
|
return false;
|
|
148
173
|
}
|
|
@@ -152,12 +177,10 @@ class MarkdownTokenizer {
|
|
|
152
177
|
return false;
|
|
153
178
|
}
|
|
154
179
|
isInsideInlineCode(text, position) {
|
|
155
|
-
// Check for inline code
|
|
156
180
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
157
181
|
let match;
|
|
158
182
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
159
183
|
if (position > match.index && position < match.index + match[0].length) {
|
|
160
|
-
// But allow matching the closing ` itself
|
|
161
184
|
if (position === match.index + match[0].length - 1) {
|
|
162
185
|
return false;
|
|
163
186
|
}
|
|
@@ -212,86 +235,77 @@ class MarkdownConverter {
|
|
|
212
235
|
constructor(options = {}) {
|
|
213
236
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
214
237
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
238
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
239
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
215
240
|
this.options = {
|
|
216
241
|
escapeHtml: options.escapeHtml ?? true,
|
|
217
242
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
243
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
244
|
+
headingBlank: options.headingBlank ?? false,
|
|
218
245
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
219
246
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
220
247
|
};
|
|
221
248
|
}
|
|
222
|
-
/**
|
|
223
|
-
* Convert markdown text to Telegram HTML
|
|
224
|
-
*/
|
|
225
249
|
convert(text) {
|
|
226
|
-
// Auto-close code blocks if enabled
|
|
227
250
|
let processedText = this.options.autoCloseCodeBlocks
|
|
228
251
|
? autoCloseCodeBlocks(text)
|
|
229
252
|
: text;
|
|
230
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
231
253
|
processedText = this.preprocessBlockquotes(processedText);
|
|
232
|
-
// Convert the text recursively
|
|
233
254
|
let result = this.convertRecursive(processedText);
|
|
234
|
-
// Process blockquote markers
|
|
235
255
|
result = this.processBlockquoteMarkers(result);
|
|
236
|
-
// Only trim if there's actual content (not just whitespace)
|
|
237
256
|
if (result.trim() === '') {
|
|
238
|
-
return text;
|
|
257
|
+
return text;
|
|
239
258
|
}
|
|
240
259
|
return result.trim();
|
|
241
260
|
}
|
|
242
|
-
/**
|
|
243
|
-
* Recursively convert markdown, handling nested styles
|
|
244
|
-
*/
|
|
245
261
|
convertRecursive(text, depth = 0) {
|
|
246
262
|
if (depth > 10)
|
|
247
|
-
return text;
|
|
248
|
-
// Tokenize the text
|
|
263
|
+
return text;
|
|
249
264
|
const tokenizer = new MarkdownTokenizer(text);
|
|
250
265
|
const tokens = tokenizer.tokenize();
|
|
251
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
252
266
|
if (tokens.length === 0) {
|
|
253
267
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
254
268
|
}
|
|
255
269
|
let result = '';
|
|
256
270
|
let lastPos = 0;
|
|
257
271
|
for (const token of tokens) {
|
|
258
|
-
// Add text before token
|
|
259
272
|
if (token.start > lastPos) {
|
|
260
273
|
const textBefore = text.slice(lastPos, token.start);
|
|
261
274
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
262
275
|
}
|
|
263
|
-
// Handle
|
|
276
|
+
// Fixed: Handle headings
|
|
277
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
278
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
279
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
280
|
+
// Fixed: Add symbol and bold styling
|
|
281
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
282
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
283
|
+
result += `<b>${headingText}</b>`;
|
|
284
|
+
lastPos = token.end;
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
264
287
|
if (token.type === 'code_block') {
|
|
265
288
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
266
289
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
267
290
|
lastPos = token.end;
|
|
268
291
|
continue;
|
|
269
292
|
}
|
|
270
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
271
293
|
if (token.type === 'inline_code') {
|
|
272
294
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
273
295
|
result += `<code>${codeContent}</code>`;
|
|
274
296
|
lastPos = token.end;
|
|
275
297
|
continue;
|
|
276
298
|
}
|
|
277
|
-
// Process other token content recursively
|
|
278
299
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
279
|
-
// Wrap the content in appropriate HTML tags
|
|
280
300
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
281
301
|
lastPos = token.end;
|
|
282
302
|
}
|
|
283
|
-
// Add remaining text
|
|
284
303
|
if (lastPos < text.length) {
|
|
285
304
|
const remainingText = text.slice(lastPos);
|
|
286
305
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
287
306
|
}
|
|
288
307
|
return result;
|
|
289
308
|
}
|
|
290
|
-
/**
|
|
291
|
-
* Wrap token content in HTML tags
|
|
292
|
-
* FIXED: Removed extra newlines that were being added around code blocks and quotes
|
|
293
|
-
* Previously added \n before and after, now returns clean tags without extra whitespace
|
|
294
|
-
*/
|
|
295
309
|
wrapToken(type, content, language) {
|
|
296
310
|
switch (type) {
|
|
297
311
|
case 'bold':
|
|
@@ -305,16 +319,13 @@ class MarkdownConverter {
|
|
|
305
319
|
case 'spoiler':
|
|
306
320
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
307
321
|
case 'inline_code':
|
|
308
|
-
// Already handled above
|
|
309
322
|
return `<code>${content}</code>`;
|
|
310
323
|
case 'code_block':
|
|
311
|
-
// Already handled above, but handle custom processor
|
|
312
324
|
if (this.hasCustomCodeBlockProcessor) {
|
|
313
325
|
return this.options.codeBlockProcessor(content, language);
|
|
314
326
|
}
|
|
315
327
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
316
328
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
317
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
318
329
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
319
330
|
case 'link':
|
|
320
331
|
const url = language || '';
|
|
@@ -325,31 +336,23 @@ class MarkdownConverter {
|
|
|
325
336
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
326
337
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
327
338
|
case 'quote':
|
|
328
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
329
339
|
return `<blockquote>${content.trim()}</blockquote>`;
|
|
330
340
|
case 'expandable_quote':
|
|
331
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
332
341
|
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
333
342
|
default:
|
|
334
343
|
return content;
|
|
335
344
|
}
|
|
336
345
|
}
|
|
337
|
-
/**
|
|
338
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
339
|
-
*/
|
|
340
346
|
preprocessBlockquotes(text) {
|
|
341
347
|
const lines = text.split('\n');
|
|
342
348
|
const processedLines = [];
|
|
343
349
|
for (const line of lines) {
|
|
344
350
|
const trimmedLine = line.trim();
|
|
345
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
346
351
|
if (trimmedLine.startsWith('**>')) {
|
|
347
|
-
// Expandable blockquote
|
|
348
352
|
const content = trimmedLine.substring(3).trim();
|
|
349
353
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
350
354
|
}
|
|
351
355
|
else if (trimmedLine.startsWith('>')) {
|
|
352
|
-
// Regular blockquote
|
|
353
356
|
const content = trimmedLine.substring(1).trim();
|
|
354
357
|
processedLines.push(`[QUOTE]${content}`);
|
|
355
358
|
}
|
|
@@ -359,24 +362,16 @@ class MarkdownConverter {
|
|
|
359
362
|
}
|
|
360
363
|
return processedLines.join('\n');
|
|
361
364
|
}
|
|
362
|
-
/**
|
|
363
|
-
* Process blockquote markers
|
|
364
|
-
* FIXED: Removed extra newlines from the replacement strings
|
|
365
|
-
*/
|
|
366
365
|
processBlockquoteMarkers(text) {
|
|
367
366
|
let result = text;
|
|
368
|
-
// Replace expandable quote markers (process content recursively)
|
|
369
367
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
370
368
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
371
369
|
const processedContent = this.convertRecursive(content);
|
|
372
|
-
// FIXED: Removed \n before and after
|
|
373
370
|
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
374
371
|
});
|
|
375
|
-
// Replace regular quote markers (process content recursively)
|
|
376
372
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
377
373
|
result = result.replace(quoteRegex, (match, content) => {
|
|
378
374
|
const processedContent = this.convertRecursive(content);
|
|
379
|
-
// FIXED: Removed \n before and after
|
|
380
375
|
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
381
376
|
});
|
|
382
377
|
return result;
|
|
@@ -389,7 +384,6 @@ class MarkdownConverter {
|
|
|
389
384
|
defaultCodeBlockProcessor(code, language) {
|
|
390
385
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
391
386
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
392
|
-
// FIXED: Removed \n before and after in default processor too
|
|
393
387
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
394
388
|
}
|
|
395
389
|
}
|