telegram-md2html 1.0.1 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +383 -319
- package/dist/converter.d.ts +1 -15
- package/dist/index.cjs +61 -58
- package/dist/index.esm.js +61 -58
- package/dist/index.mjs +61 -58
- package/dist/index.umd.js +61 -58
- package/dist/tokenizer.d.ts +0 -3
- package/dist/types.d.ts +9 -0
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -2,20 +2,15 @@ class MarkdownTokenizer {
|
|
|
2
2
|
constructor(text) {
|
|
3
3
|
this.text = text;
|
|
4
4
|
}
|
|
5
|
-
/**
|
|
6
|
-
* Tokenize the markdown text
|
|
7
|
-
*/
|
|
8
5
|
tokenize() {
|
|
9
6
|
const tokens = [];
|
|
10
7
|
let pos = 0;
|
|
11
8
|
const text = this.text;
|
|
12
9
|
while (pos < text.length) {
|
|
13
|
-
// Skip if inside code block
|
|
14
10
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
15
11
|
pos++;
|
|
16
12
|
continue;
|
|
17
13
|
}
|
|
18
|
-
// Try to match each token type (from outermost to innermost)
|
|
19
14
|
const token = this.matchToken(pos);
|
|
20
15
|
if (token) {
|
|
21
16
|
tokens.push(token);
|
|
@@ -30,11 +25,22 @@ class MarkdownTokenizer {
|
|
|
30
25
|
matchToken(start) {
|
|
31
26
|
const text = this.text;
|
|
32
27
|
const remaining = text.slice(start);
|
|
33
|
-
// Skip if we're inside a quote marker
|
|
34
28
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
35
29
|
return null;
|
|
36
30
|
}
|
|
37
|
-
// Match
|
|
31
|
+
// Match headings (###, ##)
|
|
32
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
33
|
+
if (headingMatch) {
|
|
34
|
+
const level = headingMatch[1].length;
|
|
35
|
+
const content = headingMatch[2];
|
|
36
|
+
return {
|
|
37
|
+
type: `heading_${level}`,
|
|
38
|
+
content: content,
|
|
39
|
+
start: start,
|
|
40
|
+
end: start + headingMatch[0].length
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
// Match code block (triple backticks)
|
|
38
44
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
39
45
|
if (codeBlockMatch) {
|
|
40
46
|
return {
|
|
@@ -95,13 +101,22 @@ class MarkdownTokenizer {
|
|
|
95
101
|
end: start + underlineMatch[0].length
|
|
96
102
|
};
|
|
97
103
|
}
|
|
98
|
-
// Match italic with asterisk
|
|
99
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
104
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
105
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
100
106
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
101
107
|
// Don't match if it's part of bold (**)
|
|
102
108
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
103
109
|
return null;
|
|
104
110
|
}
|
|
111
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
112
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
// Check if followed by alphanumeric or underscore
|
|
116
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
117
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
105
120
|
return {
|
|
106
121
|
type: 'italic',
|
|
107
122
|
content: italicAsteriskMatch[1],
|
|
@@ -109,13 +124,25 @@ class MarkdownTokenizer {
|
|
|
109
124
|
end: start + italicAsteriskMatch[0].length
|
|
110
125
|
};
|
|
111
126
|
}
|
|
112
|
-
// Match italic with underscore
|
|
113
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
127
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
128
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
114
129
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
115
130
|
// Don't match if it's part of underline (__)
|
|
116
131
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
117
132
|
return null;
|
|
118
133
|
}
|
|
134
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
135
|
+
if (start > 0) {
|
|
136
|
+
const prevChar = text[start - 1];
|
|
137
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
138
|
+
return null;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
142
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
143
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
119
146
|
return {
|
|
120
147
|
type: 'italic',
|
|
121
148
|
content: italicUnderscoreMatch[1],
|
|
@@ -137,12 +164,10 @@ class MarkdownTokenizer {
|
|
|
137
164
|
return null;
|
|
138
165
|
}
|
|
139
166
|
isInsideCodeBlock(text, position) {
|
|
140
|
-
// Check for code blocks
|
|
141
167
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
142
168
|
let match;
|
|
143
169
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
144
170
|
if (position > match.index && position < match.index + match[0].length) {
|
|
145
|
-
// But allow matching the closing ``` itself
|
|
146
171
|
if (position >= match.index + match[0].length - 3) {
|
|
147
172
|
return false;
|
|
148
173
|
}
|
|
@@ -152,12 +177,10 @@ class MarkdownTokenizer {
|
|
|
152
177
|
return false;
|
|
153
178
|
}
|
|
154
179
|
isInsideInlineCode(text, position) {
|
|
155
|
-
// Check for inline code
|
|
156
180
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
157
181
|
let match;
|
|
158
182
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
159
183
|
if (position > match.index && position < match.index + match[0].length) {
|
|
160
|
-
// But allow matching the closing ` itself
|
|
161
184
|
if (position === match.index + match[0].length - 1) {
|
|
162
185
|
return false;
|
|
163
186
|
}
|
|
@@ -212,84 +235,77 @@ class MarkdownConverter {
|
|
|
212
235
|
constructor(options = {}) {
|
|
213
236
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
214
237
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
238
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
239
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
215
240
|
this.options = {
|
|
216
241
|
escapeHtml: options.escapeHtml ?? true,
|
|
217
242
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
243
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
244
|
+
headingBlank: options.headingBlank ?? false,
|
|
218
245
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
219
246
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
220
247
|
};
|
|
221
248
|
}
|
|
222
|
-
/**
|
|
223
|
-
* Convert markdown text to Telegram HTML
|
|
224
|
-
*/
|
|
225
249
|
convert(text) {
|
|
226
|
-
// Auto-close code blocks if enabled
|
|
227
250
|
let processedText = this.options.autoCloseCodeBlocks
|
|
228
251
|
? autoCloseCodeBlocks(text)
|
|
229
252
|
: text;
|
|
230
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
231
253
|
processedText = this.preprocessBlockquotes(processedText);
|
|
232
|
-
// Convert the text recursively
|
|
233
254
|
let result = this.convertRecursive(processedText);
|
|
234
|
-
// Process blockquote markers
|
|
235
255
|
result = this.processBlockquoteMarkers(result);
|
|
236
|
-
// Only trim if there's actual content (not just whitespace)
|
|
237
256
|
if (result.trim() === '') {
|
|
238
|
-
return text;
|
|
257
|
+
return text;
|
|
239
258
|
}
|
|
240
259
|
return result.trim();
|
|
241
260
|
}
|
|
242
|
-
/**
|
|
243
|
-
* Recursively convert markdown, handling nested styles
|
|
244
|
-
*/
|
|
245
261
|
convertRecursive(text, depth = 0) {
|
|
246
262
|
if (depth > 10)
|
|
247
|
-
return text;
|
|
248
|
-
// Tokenize the text
|
|
263
|
+
return text;
|
|
249
264
|
const tokenizer = new MarkdownTokenizer(text);
|
|
250
265
|
const tokens = tokenizer.tokenize();
|
|
251
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
252
266
|
if (tokens.length === 0) {
|
|
253
267
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
254
268
|
}
|
|
255
269
|
let result = '';
|
|
256
270
|
let lastPos = 0;
|
|
257
271
|
for (const token of tokens) {
|
|
258
|
-
// Add text before token
|
|
259
272
|
if (token.start > lastPos) {
|
|
260
273
|
const textBefore = text.slice(lastPos, token.start);
|
|
261
274
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
262
275
|
}
|
|
263
|
-
// Handle
|
|
276
|
+
// Fixed: Handle headings
|
|
277
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
278
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
279
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
280
|
+
// Fixed: Add symbol and bold styling
|
|
281
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
282
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
283
|
+
result += `<b>${headingText}</b>`;
|
|
284
|
+
lastPos = token.end;
|
|
285
|
+
continue;
|
|
286
|
+
}
|
|
264
287
|
if (token.type === 'code_block') {
|
|
265
288
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
266
289
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
267
290
|
lastPos = token.end;
|
|
268
291
|
continue;
|
|
269
292
|
}
|
|
270
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
271
293
|
if (token.type === 'inline_code') {
|
|
272
294
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
273
295
|
result += `<code>${codeContent}</code>`;
|
|
274
296
|
lastPos = token.end;
|
|
275
297
|
continue;
|
|
276
298
|
}
|
|
277
|
-
// Process other token content recursively
|
|
278
299
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
279
|
-
// Wrap the content in appropriate HTML tags
|
|
280
300
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
281
301
|
lastPos = token.end;
|
|
282
302
|
}
|
|
283
|
-
// Add remaining text
|
|
284
303
|
if (lastPos < text.length) {
|
|
285
304
|
const remainingText = text.slice(lastPos);
|
|
286
305
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
287
306
|
}
|
|
288
307
|
return result;
|
|
289
308
|
}
|
|
290
|
-
/**
|
|
291
|
-
* Wrap token content in HTML tags
|
|
292
|
-
*/
|
|
293
309
|
wrapToken(type, content, language) {
|
|
294
310
|
switch (type) {
|
|
295
311
|
case 'bold':
|
|
@@ -303,16 +319,14 @@ class MarkdownConverter {
|
|
|
303
319
|
case 'spoiler':
|
|
304
320
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
305
321
|
case 'inline_code':
|
|
306
|
-
// Already handled above
|
|
307
322
|
return `<code>${content}</code>`;
|
|
308
323
|
case 'code_block':
|
|
309
|
-
// Already handled above, but handle custom processor
|
|
310
324
|
if (this.hasCustomCodeBlockProcessor) {
|
|
311
325
|
return this.options.codeBlockProcessor(content, language);
|
|
312
326
|
}
|
|
313
327
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
314
328
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
315
|
-
return
|
|
329
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
316
330
|
case 'link':
|
|
317
331
|
const url = language || '';
|
|
318
332
|
if (this.hasCustomLinkProcessor) {
|
|
@@ -322,29 +336,23 @@ class MarkdownConverter {
|
|
|
322
336
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
323
337
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
324
338
|
case 'quote':
|
|
325
|
-
return
|
|
339
|
+
return `<blockquote>${content.trim()}</blockquote>`;
|
|
326
340
|
case 'expandable_quote':
|
|
327
|
-
return
|
|
341
|
+
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
328
342
|
default:
|
|
329
343
|
return content;
|
|
330
344
|
}
|
|
331
345
|
}
|
|
332
|
-
/**
|
|
333
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
334
|
-
*/
|
|
335
346
|
preprocessBlockquotes(text) {
|
|
336
347
|
const lines = text.split('\n');
|
|
337
348
|
const processedLines = [];
|
|
338
349
|
for (const line of lines) {
|
|
339
350
|
const trimmedLine = line.trim();
|
|
340
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
341
351
|
if (trimmedLine.startsWith('**>')) {
|
|
342
|
-
// Expandable blockquote
|
|
343
352
|
const content = trimmedLine.substring(3).trim();
|
|
344
353
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
345
354
|
}
|
|
346
355
|
else if (trimmedLine.startsWith('>')) {
|
|
347
|
-
// Regular blockquote
|
|
348
356
|
const content = trimmedLine.substring(1).trim();
|
|
349
357
|
processedLines.push(`[QUOTE]${content}`);
|
|
350
358
|
}
|
|
@@ -354,22 +362,17 @@ class MarkdownConverter {
|
|
|
354
362
|
}
|
|
355
363
|
return processedLines.join('\n');
|
|
356
364
|
}
|
|
357
|
-
/**
|
|
358
|
-
* Process blockquote markers
|
|
359
|
-
*/
|
|
360
365
|
processBlockquoteMarkers(text) {
|
|
361
366
|
let result = text;
|
|
362
|
-
// Replace expandable quote markers (process content recursively)
|
|
363
367
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
364
368
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
365
369
|
const processedContent = this.convertRecursive(content);
|
|
366
|
-
return
|
|
370
|
+
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
367
371
|
});
|
|
368
|
-
// Replace regular quote markers (process content recursively)
|
|
369
372
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
370
373
|
result = result.replace(quoteRegex, (match, content) => {
|
|
371
374
|
const processedContent = this.convertRecursive(content);
|
|
372
|
-
return
|
|
375
|
+
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
373
376
|
});
|
|
374
377
|
return result;
|
|
375
378
|
}
|
|
@@ -381,7 +384,7 @@ class MarkdownConverter {
|
|
|
381
384
|
defaultCodeBlockProcessor(code, language) {
|
|
382
385
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
383
386
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
384
|
-
return
|
|
387
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
385
388
|
}
|
|
386
389
|
}
|
|
387
390
|
|
package/dist/index.umd.js
CHANGED
|
@@ -8,20 +8,15 @@
|
|
|
8
8
|
constructor(text) {
|
|
9
9
|
this.text = text;
|
|
10
10
|
}
|
|
11
|
-
/**
|
|
12
|
-
* Tokenize the markdown text
|
|
13
|
-
*/
|
|
14
11
|
tokenize() {
|
|
15
12
|
const tokens = [];
|
|
16
13
|
let pos = 0;
|
|
17
14
|
const text = this.text;
|
|
18
15
|
while (pos < text.length) {
|
|
19
|
-
// Skip if inside code block
|
|
20
16
|
if (this.isInsideCodeBlock(text, pos)) {
|
|
21
17
|
pos++;
|
|
22
18
|
continue;
|
|
23
19
|
}
|
|
24
|
-
// Try to match each token type (from outermost to innermost)
|
|
25
20
|
const token = this.matchToken(pos);
|
|
26
21
|
if (token) {
|
|
27
22
|
tokens.push(token);
|
|
@@ -36,11 +31,22 @@
|
|
|
36
31
|
matchToken(start) {
|
|
37
32
|
const text = this.text;
|
|
38
33
|
const remaining = text.slice(start);
|
|
39
|
-
// Skip if we're inside a quote marker
|
|
40
34
|
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
41
35
|
return null;
|
|
42
36
|
}
|
|
43
|
-
// Match
|
|
37
|
+
// Match headings (###, ##)
|
|
38
|
+
const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
|
|
39
|
+
if (headingMatch) {
|
|
40
|
+
const level = headingMatch[1].length;
|
|
41
|
+
const content = headingMatch[2];
|
|
42
|
+
return {
|
|
43
|
+
type: `heading_${level}`,
|
|
44
|
+
content: content,
|
|
45
|
+
start: start,
|
|
46
|
+
end: start + headingMatch[0].length
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
// Match code block (triple backticks)
|
|
44
50
|
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
45
51
|
if (codeBlockMatch) {
|
|
46
52
|
return {
|
|
@@ -101,13 +107,22 @@
|
|
|
101
107
|
end: start + underlineMatch[0].length
|
|
102
108
|
};
|
|
103
109
|
}
|
|
104
|
-
// Match italic with asterisk
|
|
105
|
-
const italicAsteriskMatch = remaining.match(/^\*([^*\
|
|
110
|
+
// FIXED: Match italic with asterisk - require space before and after
|
|
111
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
|
|
106
112
|
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
107
113
|
// Don't match if it's part of bold (**)
|
|
108
114
|
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
109
115
|
return null;
|
|
110
116
|
}
|
|
117
|
+
// Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
|
|
118
|
+
if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
// Check if followed by alphanumeric or underscore
|
|
122
|
+
const afterMatch = start + italicAsteriskMatch[0].length;
|
|
123
|
+
if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
111
126
|
return {
|
|
112
127
|
type: 'italic',
|
|
113
128
|
content: italicAsteriskMatch[1],
|
|
@@ -115,13 +130,25 @@
|
|
|
115
130
|
end: start + italicAsteriskMatch[0].length
|
|
116
131
|
};
|
|
117
132
|
}
|
|
118
|
-
// Match italic with underscore
|
|
119
|
-
const italicUnderscoreMatch = remaining.match(/^_([^_\
|
|
133
|
+
// FIXED: Match italic with underscore - require word boundaries and not part of username
|
|
134
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
|
|
120
135
|
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
121
136
|
// Don't match if it's part of underline (__)
|
|
122
137
|
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
123
138
|
return null;
|
|
124
139
|
}
|
|
140
|
+
// FIXED: Don't match if preceded by @ (username) or alphanumeric
|
|
141
|
+
if (start > 0) {
|
|
142
|
+
const prevChar = text[start - 1];
|
|
143
|
+
if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
|
|
144
|
+
return null;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
// Don't match if followed by alphanumeric (part of word)
|
|
148
|
+
const afterMatch = start + italicUnderscoreMatch[0].length;
|
|
149
|
+
if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
|
|
150
|
+
return null;
|
|
151
|
+
}
|
|
125
152
|
return {
|
|
126
153
|
type: 'italic',
|
|
127
154
|
content: italicUnderscoreMatch[1],
|
|
@@ -143,12 +170,10 @@
|
|
|
143
170
|
return null;
|
|
144
171
|
}
|
|
145
172
|
isInsideCodeBlock(text, position) {
|
|
146
|
-
// Check for code blocks
|
|
147
173
|
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
148
174
|
let match;
|
|
149
175
|
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
150
176
|
if (position > match.index && position < match.index + match[0].length) {
|
|
151
|
-
// But allow matching the closing ``` itself
|
|
152
177
|
if (position >= match.index + match[0].length - 3) {
|
|
153
178
|
return false;
|
|
154
179
|
}
|
|
@@ -158,12 +183,10 @@
|
|
|
158
183
|
return false;
|
|
159
184
|
}
|
|
160
185
|
isInsideInlineCode(text, position) {
|
|
161
|
-
// Check for inline code
|
|
162
186
|
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
163
187
|
let match;
|
|
164
188
|
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
165
189
|
if (position > match.index && position < match.index + match[0].length) {
|
|
166
|
-
// But allow matching the closing ` itself
|
|
167
190
|
if (position === match.index + match[0].length - 1) {
|
|
168
191
|
return false;
|
|
169
192
|
}
|
|
@@ -218,84 +241,77 @@
|
|
|
218
241
|
constructor(options = {}) {
|
|
219
242
|
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
220
243
|
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
244
|
+
// Fixed: Add heading symbol option (default: '▎')
|
|
245
|
+
this.headingSymbol = options.headingSymbol ?? '▎';
|
|
221
246
|
this.options = {
|
|
222
247
|
escapeHtml: options.escapeHtml ?? true,
|
|
223
248
|
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
249
|
+
headingSymbol: options.headingSymbol ?? '▎',
|
|
250
|
+
headingBlank: options.headingBlank ?? false,
|
|
224
251
|
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
225
252
|
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
226
253
|
};
|
|
227
254
|
}
|
|
228
|
-
/**
|
|
229
|
-
* Convert markdown text to Telegram HTML
|
|
230
|
-
*/
|
|
231
255
|
convert(text) {
|
|
232
|
-
// Auto-close code blocks if enabled
|
|
233
256
|
let processedText = this.options.autoCloseCodeBlocks
|
|
234
257
|
? autoCloseCodeBlocks(text)
|
|
235
258
|
: text;
|
|
236
|
-
// First pass: convert blockquotes (they should be at line starts)
|
|
237
259
|
processedText = this.preprocessBlockquotes(processedText);
|
|
238
|
-
// Convert the text recursively
|
|
239
260
|
let result = this.convertRecursive(processedText);
|
|
240
|
-
// Process blockquote markers
|
|
241
261
|
result = this.processBlockquoteMarkers(result);
|
|
242
|
-
// Only trim if there's actual content (not just whitespace)
|
|
243
262
|
if (result.trim() === '') {
|
|
244
|
-
return text;
|
|
263
|
+
return text;
|
|
245
264
|
}
|
|
246
265
|
return result.trim();
|
|
247
266
|
}
|
|
248
|
-
/**
|
|
249
|
-
* Recursively convert markdown, handling nested styles
|
|
250
|
-
*/
|
|
251
267
|
convertRecursive(text, depth = 0) {
|
|
252
268
|
if (depth > 10)
|
|
253
|
-
return text;
|
|
254
|
-
// Tokenize the text
|
|
269
|
+
return text;
|
|
255
270
|
const tokenizer = new MarkdownTokenizer(text);
|
|
256
271
|
const tokens = tokenizer.tokenize();
|
|
257
|
-
// If no tokens found, return the text as-is (with HTML escaping)
|
|
258
272
|
if (tokens.length === 0) {
|
|
259
273
|
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
260
274
|
}
|
|
261
275
|
let result = '';
|
|
262
276
|
let lastPos = 0;
|
|
263
277
|
for (const token of tokens) {
|
|
264
|
-
// Add text before token
|
|
265
278
|
if (token.start > lastPos) {
|
|
266
279
|
const textBefore = text.slice(lastPos, token.start);
|
|
267
280
|
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
268
281
|
}
|
|
269
|
-
// Handle
|
|
282
|
+
// Fixed: Handle headings
|
|
283
|
+
if (token.type === 'heading_2' || token.type === 'heading_3') {
|
|
284
|
+
token.type === 'heading_2' ? '##' : '###';
|
|
285
|
+
const processedContent = this.convertRecursive(token.content, depth + 1);
|
|
286
|
+
// Fixed: Add symbol and bold styling
|
|
287
|
+
const symbol = this.options.headingBlank ? '' : this.headingSymbol;
|
|
288
|
+
const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
|
|
289
|
+
result += `<b>${headingText}</b>`;
|
|
290
|
+
lastPos = token.end;
|
|
291
|
+
continue;
|
|
292
|
+
}
|
|
270
293
|
if (token.type === 'code_block') {
|
|
271
294
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
272
295
|
result += this.wrapToken(token.type, codeContent, token.language);
|
|
273
296
|
lastPos = token.end;
|
|
274
297
|
continue;
|
|
275
298
|
}
|
|
276
|
-
// Handle inline code specially (no recursive parsing inside)
|
|
277
299
|
if (token.type === 'inline_code') {
|
|
278
300
|
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
279
301
|
result += `<code>${codeContent}</code>`;
|
|
280
302
|
lastPos = token.end;
|
|
281
303
|
continue;
|
|
282
304
|
}
|
|
283
|
-
// Process other token content recursively
|
|
284
305
|
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
285
|
-
// Wrap the content in appropriate HTML tags
|
|
286
306
|
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
287
307
|
lastPos = token.end;
|
|
288
308
|
}
|
|
289
|
-
// Add remaining text
|
|
290
309
|
if (lastPos < text.length) {
|
|
291
310
|
const remainingText = text.slice(lastPos);
|
|
292
311
|
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
293
312
|
}
|
|
294
313
|
return result;
|
|
295
314
|
}
|
|
296
|
-
/**
|
|
297
|
-
* Wrap token content in HTML tags
|
|
298
|
-
*/
|
|
299
315
|
wrapToken(type, content, language) {
|
|
300
316
|
switch (type) {
|
|
301
317
|
case 'bold':
|
|
@@ -309,16 +325,14 @@
|
|
|
309
325
|
case 'spoiler':
|
|
310
326
|
return `<span class="tg-spoiler">${content}</span>`;
|
|
311
327
|
case 'inline_code':
|
|
312
|
-
// Already handled above
|
|
313
328
|
return `<code>${content}</code>`;
|
|
314
329
|
case 'code_block':
|
|
315
|
-
// Already handled above, but handle custom processor
|
|
316
330
|
if (this.hasCustomCodeBlockProcessor) {
|
|
317
331
|
return this.options.codeBlockProcessor(content, language);
|
|
318
332
|
}
|
|
319
333
|
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
320
334
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
321
|
-
return
|
|
335
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
322
336
|
case 'link':
|
|
323
337
|
const url = language || '';
|
|
324
338
|
if (this.hasCustomLinkProcessor) {
|
|
@@ -328,29 +342,23 @@
|
|
|
328
342
|
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
329
343
|
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
330
344
|
case 'quote':
|
|
331
|
-
return
|
|
345
|
+
return `<blockquote>${content.trim()}</blockquote>`;
|
|
332
346
|
case 'expandable_quote':
|
|
333
|
-
return
|
|
347
|
+
return `<blockquote expandable>${content.trim()}</blockquote>`;
|
|
334
348
|
default:
|
|
335
349
|
return content;
|
|
336
350
|
}
|
|
337
351
|
}
|
|
338
|
-
/**
|
|
339
|
-
* Preprocess blockquotes to mark them before other parsing
|
|
340
|
-
*/
|
|
341
352
|
preprocessBlockquotes(text) {
|
|
342
353
|
const lines = text.split('\n');
|
|
343
354
|
const processedLines = [];
|
|
344
355
|
for (const line of lines) {
|
|
345
356
|
const trimmedLine = line.trim();
|
|
346
|
-
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
347
357
|
if (trimmedLine.startsWith('**>')) {
|
|
348
|
-
// Expandable blockquote
|
|
349
358
|
const content = trimmedLine.substring(3).trim();
|
|
350
359
|
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
351
360
|
}
|
|
352
361
|
else if (trimmedLine.startsWith('>')) {
|
|
353
|
-
// Regular blockquote
|
|
354
362
|
const content = trimmedLine.substring(1).trim();
|
|
355
363
|
processedLines.push(`[QUOTE]${content}`);
|
|
356
364
|
}
|
|
@@ -360,22 +368,17 @@
|
|
|
360
368
|
}
|
|
361
369
|
return processedLines.join('\n');
|
|
362
370
|
}
|
|
363
|
-
/**
|
|
364
|
-
* Process blockquote markers
|
|
365
|
-
*/
|
|
366
371
|
processBlockquoteMarkers(text) {
|
|
367
372
|
let result = text;
|
|
368
|
-
// Replace expandable quote markers (process content recursively)
|
|
369
373
|
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
370
374
|
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
371
375
|
const processedContent = this.convertRecursive(content);
|
|
372
|
-
return
|
|
376
|
+
return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
|
|
373
377
|
});
|
|
374
|
-
// Replace regular quote markers (process content recursively)
|
|
375
378
|
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
376
379
|
result = result.replace(quoteRegex, (match, content) => {
|
|
377
380
|
const processedContent = this.convertRecursive(content);
|
|
378
|
-
return
|
|
381
|
+
return `<blockquote>${processedContent.trim()}</blockquote>`;
|
|
379
382
|
});
|
|
380
383
|
return result;
|
|
381
384
|
}
|
|
@@ -387,7 +390,7 @@
|
|
|
387
390
|
defaultCodeBlockProcessor(code, language) {
|
|
388
391
|
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
389
392
|
const langAttr = language ? ` class="language-${language}"` : '';
|
|
390
|
-
return
|
|
393
|
+
return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
|
|
391
394
|
}
|
|
392
395
|
}
|
|
393
396
|
|
package/dist/tokenizer.d.ts
CHANGED
package/dist/types.d.ts
CHANGED
|
@@ -9,6 +9,15 @@ export interface ConvertOptions {
|
|
|
9
9
|
* @default true
|
|
10
10
|
*/
|
|
11
11
|
autoCloseCodeBlocks?: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Custom symbol for headings (default: '▎')
|
|
14
|
+
*/
|
|
15
|
+
headingSymbol?: string;
|
|
16
|
+
/**
|
|
17
|
+
* Whether to show blank (no symbol) for headings
|
|
18
|
+
* @default false
|
|
19
|
+
*/
|
|
20
|
+
headingBlank?: boolean;
|
|
12
21
|
/**
|
|
13
22
|
* Custom replacement function for links
|
|
14
23
|
*/
|