telegram-md2html 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +379 -324
- package/dist/converter.d.ts +1 -18
- package/dist/index.cjs +55 -61
- package/dist/index.esm.js +55 -61
- package/dist/index.mjs +55 -61
- package/dist/index.umd.js +55 -61
- package/dist/tokenizer.d.ts +0 -3
- package/dist/types.d.ts +9 -0
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -2,20 +2,15 @@ class MarkdownTokenizer {
|
|
|
2
2
|
constructor(text) {
|
|
3
3
|
this.text = text;
|
|
4
4
|
}
|
|
5
|
-
/**
|
|
6
|
-
* Tokenize the markdown text
|
|
7
|
-
*/
|
|
8
5
|
tokenize() {
|
|
9
6
|
const tokens = [];
|
|
10
7
|
let pos = 0;
|
|
11
8
|
const text = this.text;
|
|
12
9
|
while (pos < text.length) {
|
|
13
|
-
// Skip if inside code block
|
|
14
10
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
15
11
|
pos++;
|
|
16
12
|
continue;
|
|
17
13
|
}
|
|
18
|
-
// Try to match each token type (from outermost to innermost)
|
|
19
14
|
const token = this.matchToken(pos);
|
|
20
15
|
if (token) {
|
|
21
16
|
tokens.push(token);
|
|
@@ -30,11 +25,22 @@ class MarkdownTokenizer {
|
|
|
30
25
|
matchToken(start) {
|
|
31
26
|
const text = this.text;
|
|
32
27
|
const remaining = text.slice(start);
|
|
33
|
-
// Skip if we're inside a quote marker
|
|
34
28
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
35
29
|
return null;
|
|
36
30
|
}
|
|
37
|
-
// Match
|
|
31
|
+
// Match headings (###, ##)
|
|
32
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
33
|
+
if (headingMatch) {
|
|
34
|
+
const level = headingMatch[1].length;
|
|
35
|
+
const content = headingMatch[2];
|
|
36
|
+
return {
|
|
37
|
+
type: `heading_${level}`,
|
|
38
|
+
content: content,
|
|
39
|
+
start: start,
|
|
40
|
+
end: start + headingMatch[0].length
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
// Match code block (triple backticks)
|
|
38
44
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
39
45
|
if (codeBlockMatch) {
|
|
40
46
|
return {
|
|
@@ -95,13 +101,22 @@ class MarkdownTokenizer {
|
|
|
95
101
|
end: start + underlineMatch[0].length
|
|
96
102
|
};
|
|
97
103
|
}
|
|
98
|
-
// Match italic with asterisk
|
|
99
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
104
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
105
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
100
106
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
101
107
|
// Don't match if it's part of bold (**)
|
|
102
108
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
103
109
|
return null;
|
|
104
110
|
}
|
|
111
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
112
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
// Check if followed by alphanumeric or underscore
|
|
116
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
117
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
105
120
|
return {
|
|
106
121
|
type: 'italic',
|
|
107
122
|
content: italicAsteriskMatch[1],
|
|
@@ -109,13 +124,25 @@ class MarkdownTokenizer {
|
|
|
109
124
|
end: start + italicAsteriskMatch[0].length
|
|
110
125
|
};
|
|
111
126
|
}
|
|
112
|
-
// Match italic with underscore
|
|
113
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
127
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
128
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
114
129
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
115
130
|
// Don't match if it's part of underline (__)
|
|
116
131
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
117
132
|
return null;
|
|
118
133
|
}
|
|
134
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
135
|
+
if (start > 0) {
|
|
136
|
+
const prevChar = text[start - 1];
|
|
137
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
142
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
143
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
119
146
|
return {
|
|
120
147
|
type: 'italic',
|
|
121
148
|
content: italicUnderscoreMatch[1],
|
|
@@ -137,12 +164,10 @@ class MarkdownTokenizer {
|
|
|
137
164
|
return null;
|
|
138
165
|
}
|
|
139
166
|
isInsideCodeBlock(text, position) {
|
|
140
|
-
// Check for code blocks
|
|
141
167
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
142
168
|
let match;
|
|
143
169
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
144
170
|
if (position > match.index && position < match.index + match[0].length) {
|
|
145
|
-
// But allow matching the closing ``` itself
|
|
146
171
|
if (position >= match.index + match[0].length - 3) {
|
|
147
172
|
return false;
|
|
148
173
|
}
|
|
@@ -152,12 +177,10 @@ class MarkdownTokenizer {
|
|
|
152
177
|
return false;
|
|
153
178
|
}
|
|
154
179
|
isInsideInlineCode(text, position) {
|
|
155
|
-
// Check for inline code
|
|
156
180
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
157
181
|
let match;
|
|
158
182
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
159
183
|
if (position > match.index && position < match.index + match[0].length) {
|
|
160
|
-
// But allow matching the closing ` itself
|
|
161
184
|
if (position === match.index + match[0].length - 1) {
|
|
162
185
|
return false;
|
|
163
186
|
}
|
|
@@ -212,86 +235,77 @@ class MarkdownConverter {
|
|
|
212
235
|
constructor(options = {}) {
|
|
213
236
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
214
237
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
238
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
239
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
215
240
|
this.options = {
|
|
216
241
|
escapeHtml: options.escapeHtml ?? true,
|
|
217
242
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
243
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
244
|
+
headingBlank: options.headingBlank ?? false,
|
|
218
245
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
219
246
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
220
247
|
};
|
|
221
248
|
}
|
|
222
|
-
/**
|
|
223
|
-
* Convert markdown text to Telegram HTML
|
|
224
|
-
*/
|
|
225
249
|
convert(text) {
|
|
226
|
-
// Auto-close code blocks if enabled
|
|
227
250
|
let processedText = this.options.autoCloseCodeBlocks
|
|
228
251
|
? autoCloseCodeBlocks(text)
|
|
229
252
|
: text;
|
|
230
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
231
253
|
processedText = this.preprocessBlockquotes(processedText);
|
|
232
|
-
// Convert the text recursively
|
|
233
254
|
let result = this.convertRecursive(processedText);
|
|
234
|
-
// Process blockquote markers
|
|
235
255
|
result = this.processBlockquoteMarkers(result);
|
|
236
|
-
// Only trim if there's actual content (not just whitespace)
|
|
237
256
|
if (result.trim() === '') {
|
|
238
|
-
return text;
|
|
257
|
+
return text;
|
|
239
258
|
}
|
|
240
259
|
return result.trim();
|
|
241
260
|
}
|
|
242
|
-
/**
|
|
243
|
-
* Recursively convert markdown, handling nested styles
|
|
244
|
-
*/
|
|
245
261
|
convertRecursive(text, depth = 0) {
|
|
246
262
|
if (depth > 10)
|
|
247
|
-
return text;
|
|
248
|
-
// Tokenize the text
|
|
263
|
+
return text;
|
|
249
264
|
const tokenizer = new MarkdownTokenizer(text);
|
|
250
265
|
const tokens = tokenizer.tokenize();
|
|
251
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
252
266
|
if (tokens.length === 0) {
|
|
253
267
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
254
268
|
}
|
|
255
269
|
let result = '';
|
|
256
270
|
let lastPos = 0;
|
|
257
271
|
for (const token of tokens) {
|
|
258
|
-
// Add text before token
|
|
259
272
|
if (token.start > lastPos) {
|
|
260
273
|
const textBefore = text.slice(lastPos, token.start);
|
|
261
274
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
262
275
|
}
|
|
263
|
-
// Handle
|
|
276
|
+
// Fixed: Handle headings
|
|
277
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
278
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
279
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
280
|
+
// Fixed: Add symbol and bold styling
|
|
281
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
282
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
283
|
+
result += `<b>${headingText}</b>`;
|
|
284
|
+
lastPos = token.end;
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
264
287
|
if (token.type === 'code_block') {
|
|
265
288
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
266
289
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
267
290
|
lastPos = token.end;
|
|
268
291
|
continue;
|
|
269
292
|
}
|
|
270
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
271
293
|
if (token.type === 'inline_code') {
|
|
272
294
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
273
295
|
result += `<code>${codeContent}</code>`;
|
|
274
296
|
lastPos = token.end;
|
|
275
297
|
continue;
|
|
276
298
|
}
|
|
277
|
-
// Process other token content recursively
|
|
278
299
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
279
|
-
// Wrap the content in appropriate HTML tags
|
|
280
300
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
281
301
|
lastPos = token.end;
|
|
282
302
|
}
|
|
283
|
-
// Add remaining text
|
|
284
303
|
if (lastPos < text.length) {
|
|
285
304
|
const remainingText = text.slice(lastPos);
|
|
286
305
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
287
306
|
}
|
|
288
307
|
return result;
|
|
289
308
|
}
|
|
290
|
-
/**
|
|
291
|
-
* Wrap token content in HTML tags
|
|
292
|
-
* FIXED: Removed extra newlines that were being added around code blocks and quotes
|
|
293
|
-
* Previously added \n before and after, now returns clean tags without extra whitespace
|
|
294
|
-
*/
|
|
295
309
|
wrapToken(type, content, language) {
|
|
296
310
|
switch (type) {
|
|
297
311
|
case 'bold':
|
|
@@ -305,16 +319,13 @@ class MarkdownConverter {
|
|
|
305
319
|
case 'spoiler':
|
|
306
320
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
307
321
|
case 'inline_code':
|
|
308
|
-
// Already handled above
|
|
309
322
|
return `<code>${content}</code>`;
|
|
310
323
|
case 'code_block':
|
|
311
|
-
// Already handled above, but handle custom processor
|
|
312
324
|
if (this.hasCustomCodeBlockProcessor) {
|
|
313
325
|
return this.options.codeBlockProcessor(content, language);
|
|
314
326
|
}
|
|
315
327
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
316
328
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
317
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
318
329
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
319
330
|
case 'link':
|
|
320
331
|
const url = language || '';
|
|
@@ -325,31 +336,23 @@ class MarkdownConverter {
|
|
|
325
336
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
326
337
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
327
338
|
case 'quote':
|
|
328
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
329
339
|
return `<blockquote>${content.trim()}</blockquote>`;
|
|
330
340
|
case 'expandable_quote':
|
|
331
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
332
341
|
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
333
342
|
default:
|
|
334
343
|
return content;
|
|
335
344
|
}
|
|
336
345
|
}
|
|
337
|
-
/**
|
|
338
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
339
|
-
*/
|
|
340
346
|
preprocessBlockquotes(text) {
|
|
341
347
|
const lines = text.split('\n');
|
|
342
348
|
const processedLines = [];
|
|
343
349
|
for (const line of lines) {
|
|
344
350
|
const trimmedLine = line.trim();
|
|
345
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
346
351
|
if (trimmedLine.startsWith('**>')) {
|
|
347
|
-
// Expandable blockquote
|
|
348
352
|
const content = trimmedLine.substring(3).trim();
|
|
349
353
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
350
354
|
}
|
|
351
355
|
else if (trimmedLine.startsWith('>')) {
|
|
352
|
-
// Regular blockquote
|
|
353
356
|
const content = trimmedLine.substring(1).trim();
|
|
354
357
|
processedLines.push(`[QUOTE]${content}`);
|
|
355
358
|
}
|
|
@@ -359,24 +362,16 @@ class MarkdownConverter {
|
|
|
359
362
|
}
|
|
360
363
|
return processedLines.join('\n');
|
|
361
364
|
}
|
|
362
|
-
/**
|
|
363
|
-
* Process blockquote markers
|
|
364
|
-
* FIXED: Removed extra newlines from the replacement strings
|
|
365
|
-
*/
|
|
366
365
|
processBlockquoteMarkers(text) {
|
|
367
366
|
let result = text;
|
|
368
|
-
// Replace expandable quote markers (process content recursively)
|
|
369
367
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
370
368
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
371
369
|
const processedContent = this.convertRecursive(content);
|
|
372
|
-
// FIXED: Removed \n before and after
|
|
373
370
|
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
374
371
|
});
|
|
375
|
-
// Replace regular quote markers (process content recursively)
|
|
376
372
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
377
373
|
result = result.replace(quoteRegex, (match, content) => {
|
|
378
374
|
const processedContent = this.convertRecursive(content);
|
|
379
|
-
// FIXED: Removed \n before and after
|
|
380
375
|
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
381
376
|
});
|
|
382
377
|
return result;
|
|
@@ -389,7 +384,6 @@ class MarkdownConverter {
|
|
|
389
384
|
defaultCodeBlockProcessor(code, language) {
|
|
390
385
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
391
386
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
392
|
-
// FIXED: Removed \n before and after in default processor too
|
|
393
387
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
394
388
|
}
|
|
395
389
|
}
|
package/dist/index.umd.js
CHANGED
|
@@ -8,20 +8,15 @@
|
|
|
8
8
|
constructor(text) {
|
|
9
9
|
this.text = text;
|
|
10
10
|
}
|
|
11
|
-
/**
|
|
12
|
-
* Tokenize the markdown text
|
|
13
|
-
*/
|
|
14
11
|
tokenize() {
|
|
15
12
|
const tokens = [];
|
|
16
13
|
let pos = 0;
|
|
17
14
|
const text = this.text;
|
|
18
15
|
while (pos < text.length) {
|
|
19
|
-
// Skip if inside code block
|
|
20
16
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
21
17
|
pos++;
|
|
22
18
|
continue;
|
|
23
19
|
}
|
|
24
|
-
// Try to match each token type (from outermost to innermost)
|
|
25
20
|
const token = this.matchToken(pos);
|
|
26
21
|
if (token) {
|
|
27
22
|
tokens.push(token);
|
|
@@ -36,11 +31,22 @@
|
|
|
36
31
|
matchToken(start) {
|
|
37
32
|
const text = this.text;
|
|
38
33
|
const remaining = text.slice(start);
|
|
39
|
-
// Skip if we're inside a quote marker
|
|
40
34
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
41
35
|
return null;
|
|
42
36
|
}
|
|
43
|
-
// Match
|
|
37
|
+
// Match headings (###, ##)
|
|
38
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
39
|
+
if (headingMatch) {
|
|
40
|
+
const level = headingMatch[1].length;
|
|
41
|
+
const content = headingMatch[2];
|
|
42
|
+
return {
|
|
43
|
+
type: `heading_${level}`,
|
|
44
|
+
content: content,
|
|
45
|
+
start: start,
|
|
46
|
+
end: start + headingMatch[0].length
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
// Match code block (triple backticks)
|
|
44
50
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
45
51
|
if (codeBlockMatch) {
|
|
46
52
|
return {
|
|
@@ -101,13 +107,22 @@
|
|
|
101
107
|
end: start + underlineMatch[0].length
|
|
102
108
|
};
|
|
103
109
|
}
|
|
104
|
-
// Match italic with asterisk
|
|
105
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
110
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
111
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
106
112
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
107
113
|
// Don't match if it's part of bold (**)
|
|
108
114
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
109
115
|
return null;
|
|
110
116
|
}
|
|
117
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
118
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
// Check if followed by alphanumeric or underscore
|
|
122
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
123
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
111
126
|
return {
|
|
112
127
|
type: 'italic',
|
|
113
128
|
content: italicAsteriskMatch[1],
|
|
@@ -115,13 +130,25 @@
|
|
|
115
130
|
end: start + italicAsteriskMatch[0].length
|
|
116
131
|
};
|
|
117
132
|
}
|
|
118
|
-
// Match italic with underscore
|
|
119
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
133
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
134
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
120
135
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
121
136
|
// Don't match if it's part of underline (__)
|
|
122
137
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
123
138
|
return null;
|
|
124
139
|
}
|
|
140
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
141
|
+
if (start > 0) {
|
|
142
|
+
const prevChar = text[start - 1];
|
|
143
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
148
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
149
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
125
152
|
return {
|
|
126
153
|
type: 'italic',
|
|
127
154
|
content: italicUnderscoreMatch[1],
|
|
@@ -143,12 +170,10 @@
|
|
|
143
170
|
return null;
|
|
144
171
|
}
|
|
145
172
|
isInsideCodeBlock(text, position) {
|
|
146
|
-
// Check for code blocks
|
|
147
173
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
148
174
|
let match;
|
|
149
175
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
150
176
|
if (position > match.index && position < match.index + match[0].length) {
|
|
151
|
-
// But allow matching the closing ``` itself
|
|
152
177
|
if (position >= match.index + match[0].length - 3) {
|
|
153
178
|
return false;
|
|
154
179
|
}
|
|
@@ -158,12 +183,10 @@
|
|
|
158
183
|
return false;
|
|
159
184
|
}
|
|
160
185
|
isInsideInlineCode(text, position) {
|
|
161
|
-
// Check for inline code
|
|
162
186
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
163
187
|
let match;
|
|
164
188
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
165
189
|
if (position > match.index && position < match.index + match[0].length) {
|
|
166
|
-
// But allow matching the closing ` itself
|
|
167
190
|
if (position === match.index + match[0].length - 1) {
|
|
168
191
|
return false;
|
|
169
192
|
}
|
|
@@ -218,86 +241,77 @@
|
|
|
218
241
|
constructor(options = {}) {
|
|
219
242
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
220
243
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
244
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
245
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
221
246
|
this.options = {
|
|
222
247
|
escapeHtml: options.escapeHtml ?? true,
|
|
223
248
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
249
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
250
|
+
headingBlank: options.headingBlank ?? false,
|
|
224
251
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
225
252
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
226
253
|
};
|
|
227
254
|
}
|
|
228
|
-
/**
|
|
229
|
-
* Convert markdown text to Telegram HTML
|
|
230
|
-
*/
|
|
231
255
|
convert(text) {
|
|
232
|
-
// Auto-close code blocks if enabled
|
|
233
256
|
let processedText = this.options.autoCloseCodeBlocks
|
|
234
257
|
? autoCloseCodeBlocks(text)
|
|
235
258
|
: text;
|
|
236
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
237
259
|
processedText = this.preprocessBlockquotes(processedText);
|
|
238
|
-
// Convert the text recursively
|
|
239
260
|
let result = this.convertRecursive(processedText);
|
|
240
|
-
// Process blockquote markers
|
|
241
261
|
result = this.processBlockquoteMarkers(result);
|
|
242
|
-
// Only trim if there's actual content (not just whitespace)
|
|
243
262
|
if (result.trim() === '') {
|
|
244
|
-
return text;
|
|
263
|
+
return text;
|
|
245
264
|
}
|
|
246
265
|
return result.trim();
|
|
247
266
|
}
|
|
248
|
-
/**
|
|
249
|
-
* Recursively convert markdown, handling nested styles
|
|
250
|
-
*/
|
|
251
267
|
convertRecursive(text, depth = 0) {
|
|
252
268
|
if (depth > 10)
|
|
253
|
-
return text;
|
|
254
|
-
// Tokenize the text
|
|
269
|
+
return text;
|
|
255
270
|
const tokenizer = new MarkdownTokenizer(text);
|
|
256
271
|
const tokens = tokenizer.tokenize();
|
|
257
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
258
272
|
if (tokens.length === 0) {
|
|
259
273
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
260
274
|
}
|
|
261
275
|
let result = '';
|
|
262
276
|
let lastPos = 0;
|
|
263
277
|
for (const token of tokens) {
|
|
264
|
-
// Add text before token
|
|
265
278
|
if (token.start > lastPos) {
|
|
266
279
|
const textBefore = text.slice(lastPos, token.start);
|
|
267
280
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
268
281
|
}
|
|
269
|
-
// Handle
|
|
282
|
+
// Fixed: Handle headings
|
|
283
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
284
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
285
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
286
|
+
// Fixed: Add symbol and bold styling
|
|
287
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
288
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
289
|
+
result += `<b>${headingText}</b>`;
|
|
290
|
+
lastPos = token.end;
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
270
293
|
if (token.type === 'code_block') {
|
|
271
294
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
272
295
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
273
296
|
lastPos = token.end;
|
|
274
297
|
continue;
|
|
275
298
|
}
|
|
276
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
277
299
|
if (token.type === 'inline_code') {
|
|
278
300
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
279
301
|
result += `<code>${codeContent}</code>`;
|
|
280
302
|
lastPos = token.end;
|
|
281
303
|
continue;
|
|
282
304
|
}
|
|
283
|
-
// Process other token content recursively
|
|
284
305
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
285
|
-
// Wrap the content in appropriate HTML tags
|
|
286
306
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
287
307
|
lastPos = token.end;
|
|
288
308
|
}
|
|
289
|
-
// Add remaining text
|
|
290
309
|
if (lastPos < text.length) {
|
|
291
310
|
const remainingText = text.slice(lastPos);
|
|
292
311
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
293
312
|
}
|
|
294
313
|
return result;
|
|
295
314
|
}
|
|
296
|
-
/**
|
|
297
|
-
* Wrap token content in HTML tags
|
|
298
|
-
* FIXED: Removed extra newlines that were being added around code blocks and quotes
|
|
299
|
-
* Previously added \n before and after, now returns clean tags without extra whitespace
|
|
300
|
-
*/
|
|
301
315
|
wrapToken(type, content, language) {
|
|
302
316
|
switch (type) {
|
|
303
317
|
case 'bold':
|
|
@@ -311,16 +325,13 @@
|
|
|
311
325
|
case 'spoiler':
|
|
312
326
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
313
327
|
case 'inline_code':
|
|
314
|
-
// Already handled above
|
|
315
328
|
return `<code>${content}</code>`;
|
|
316
329
|
case 'code_block':
|
|
317
|
-
// Already handled above, but handle custom processor
|
|
318
330
|
if (this.hasCustomCodeBlockProcessor) {
|
|
319
331
|
return this.options.codeBlockProcessor(content, language);
|
|
320
332
|
}
|
|
321
333
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
322
334
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
323
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
324
335
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
325
336
|
case 'link':
|
|
326
337
|
const url = language || '';
|
|
@@ -331,31 +342,23 @@
|
|
|
331
342
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
332
343
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
333
344
|
case 'quote':
|
|
334
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
335
345
|
return `<blockquote>${content.trim()}</blockquote>`;
|
|
336
346
|
case 'expandable_quote':
|
|
337
|
-
// FIXED: Removed \n before and after - now returns just the tag
|
|
338
347
|
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
339
348
|
default:
|
|
340
349
|
return content;
|
|
341
350
|
}
|
|
342
351
|
}
|
|
343
|
-
/**
|
|
344
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
345
|
-
*/
|
|
346
352
|
preprocessBlockquotes(text) {
|
|
347
353
|
const lines = text.split('\n');
|
|
348
354
|
const processedLines = [];
|
|
349
355
|
for (const line of lines) {
|
|
350
356
|
const trimmedLine = line.trim();
|
|
351
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
352
357
|
if (trimmedLine.startsWith('**>')) {
|
|
353
|
-
// Expandable blockquote
|
|
354
358
|
const content = trimmedLine.substring(3).trim();
|
|
355
359
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
356
360
|
}
|
|
357
361
|
else if (trimmedLine.startsWith('>')) {
|
|
358
|
-
// Regular blockquote
|
|
359
362
|
const content = trimmedLine.substring(1).trim();
|
|
360
363
|
processedLines.push(`[QUOTE]${content}`);
|
|
361
364
|
}
|
|
@@ -365,24 +368,16 @@
|
|
|
365
368
|
}
|
|
366
369
|
return processedLines.join('\n');
|
|
367
370
|
}
|
|
368
|
-
/**
|
|
369
|
-
* Process blockquote markers
|
|
370
|
-
* FIXED: Removed extra newlines from the replacement strings
|
|
371
|
-
*/
|
|
372
371
|
processBlockquoteMarkers(text) {
|
|
373
372
|
let result = text;
|
|
374
|
-
// Replace expandable quote markers (process content recursively)
|
|
375
373
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
376
374
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
377
375
|
const processedContent = this.convertRecursive(content);
|
|
378
|
-
// FIXED: Removed \n before and after
|
|
379
376
|
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
380
377
|
});
|
|
381
|
-
// Replace regular quote markers (process content recursively)
|
|
382
378
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
383
379
|
result = result.replace(quoteRegex, (match, content) => {
|
|
384
380
|
const processedContent = this.convertRecursive(content);
|
|
385
|
-
// FIXED: Removed \n before and after
|
|
386
381
|
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
387
382
|
});
|
|
388
383
|
return result;
|
|
@@ -395,7 +390,6 @@
|
|
|
395
390
|
defaultCodeBlockProcessor(code, language) {
|
|
396
391
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
397
392
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
398
|
-
// FIXED: Removed \n before and after in default processor too
|
|
399
393
|
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
400
394
|
}
|
|
401
395
|
}
|
package/dist/tokenizer.d.ts
CHANGED
package/dist/types.d.ts
CHANGED
|
@@ -9,6 +9,15 @@ export interface ConvertOptions {
|
|
|
9
9
|
* @default true
|
|
10
10
|
*/
|
|
11
11
|
autoCloseCodeBlocks?: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Custom symbol for headings (default: '▎')
|
|
14
|
+
*/
|
|
15
|
+
headingSymbol?: string;
|
|
16
|
+
/**
|
|
17
|
+
* Whether to show blank (no symbol) for headings
|
|
18
|
+
* @default false
|
|
19
|
+
*/
|
|
20
|
+
headingBlank?: boolean;
|
|
12
21
|
/**
|
|
13
22
|
* Custom replacement function for links
|
|
14
23
|
*/
|