telegram-md2html 1.0.1 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2,20 +2,15 @@ class MarkdownTokenizer {
2
2
  constructor(text) {
3
3
  this.text = text;
4
4
  }
5
- /**
6
- * Tokenize the markdown text
7
- */
8
5
  tokenize() {
9
6
  const tokens = [];
10
7
  let pos = 0;
11
8
  const text = this.text;
12
9
  while (pos < text.length) {
13
- // Skip if inside code block
14
10
  if (this.isInsideCodeBlock(text, pos)) {
15
11
  pos++;
16
12
  continue;
17
13
  }
18
- // Try to match each token type (from outermost to innermost)
19
14
  const token = this.matchToken(pos);
20
15
  if (token) {
21
16
  tokens.push(token);
@@ -30,11 +25,22 @@ class MarkdownTokenizer {
30
25
  matchToken(start) {
31
26
  const text = this.text;
32
27
  const remaining = text.slice(start);
33
- // Skip if we're inside a quote marker
34
28
  if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
35
29
  return null;
36
30
  }
37
- // Match code block (triple backticks) - highest priority
31
+ // Match headings (###, ##)
32
+ const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
33
+ if (headingMatch) {
34
+ const level = headingMatch[1].length;
35
+ const content = headingMatch[2];
36
+ return {
37
+ type: `heading_${level}`,
38
+ content: content,
39
+ start: start,
40
+ end: start + headingMatch[0].length
41
+ };
42
+ }
43
+ // Match code block (triple backticks)
38
44
  const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
39
45
  if (codeBlockMatch) {
40
46
  return {
@@ -95,13 +101,22 @@ class MarkdownTokenizer {
95
101
  end: start + underlineMatch[0].length
96
102
  };
97
103
  }
98
- // Match italic with asterisk
99
- const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
104
+ // FIXED: Match italic with asterisk - require space before and after
105
+ const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
100
106
  if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
101
107
  // Don't match if it's part of bold (**)
102
108
  if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
103
109
  return null;
104
110
  }
111
+ // Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
112
+ if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
113
+ return null;
114
+ }
115
+ // Check if followed by alphanumeric or underscore
116
+ const afterMatch = start + italicAsteriskMatch[0].length;
117
+ if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
118
+ return null;
119
+ }
105
120
  return {
106
121
  type: 'italic',
107
122
  content: italicAsteriskMatch[1],
@@ -109,13 +124,25 @@ class MarkdownTokenizer {
109
124
  end: start + italicAsteriskMatch[0].length
110
125
  };
111
126
  }
112
- // Match italic with underscore
113
- const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
127
+ // FIXED: Match italic with underscore - require word boundaries and not part of username
128
+ const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
114
129
  if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
115
130
  // Don't match if it's part of underline (__)
116
131
  if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
117
132
  return null;
118
133
  }
134
+ // FIXED: Don't match if preceded by @ (username) or alphanumeric
135
+ if (start > 0) {
136
+ const prevChar = text[start - 1];
137
+ if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
138
+ return null;
139
+ }
140
+ }
141
+ // Don't match if followed by alphanumeric (part of word)
142
+ const afterMatch = start + italicUnderscoreMatch[0].length;
143
+ if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
144
+ return null;
145
+ }
119
146
  return {
120
147
  type: 'italic',
121
148
  content: italicUnderscoreMatch[1],
@@ -137,12 +164,10 @@ class MarkdownTokenizer {
137
164
  return null;
138
165
  }
139
166
  isInsideCodeBlock(text, position) {
140
- // Check for code blocks
141
167
  const codeBlockRegex = /```[\s\S]*?```/g;
142
168
  let match;
143
169
  while ((match = codeBlockRegex.exec(text)) !== null) {
144
170
  if (position > match.index && position < match.index + match[0].length) {
145
- // But allow matching the closing ``` itself
146
171
  if (position >= match.index + match[0].length - 3) {
147
172
  return false;
148
173
  }
@@ -152,12 +177,10 @@ class MarkdownTokenizer {
152
177
  return false;
153
178
  }
154
179
  isInsideInlineCode(text, position) {
155
- // Check for inline code
156
180
  const inlineCodeRegex = /`[^`\n]*`/g;
157
181
  let match;
158
182
  while ((match = inlineCodeRegex.exec(text)) !== null) {
159
183
  if (position > match.index && position < match.index + match[0].length) {
160
- // But allow matching the closing ` itself
161
184
  if (position === match.index + match[0].length - 1) {
162
185
  return false;
163
186
  }
@@ -212,84 +235,77 @@ class MarkdownConverter {
212
235
  constructor(options = {}) {
213
236
  this.hasCustomLinkProcessor = !!options.linkProcessor;
214
237
  this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
238
+ // Fixed: Add heading symbol option (default: '▎')
239
+ this.headingSymbol = options.headingSymbol ?? '▎';
215
240
  this.options = {
216
241
  escapeHtml: options.escapeHtml ?? true,
217
242
  autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
243
+ headingSymbol: options.headingSymbol ?? '▎',
244
+ headingBlank: options.headingBlank ?? false,
218
245
  linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
219
246
  codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
220
247
  };
221
248
  }
222
- /**
223
- * Convert markdown text to Telegram HTML
224
- */
225
249
  convert(text) {
226
- // Auto-close code blocks if enabled
227
250
  let processedText = this.options.autoCloseCodeBlocks
228
251
  ? autoCloseCodeBlocks(text)
229
252
  : text;
230
- // First pass: convert blockquotes (they should be at line starts)
231
253
  processedText = this.preprocessBlockquotes(processedText);
232
- // Convert the text recursively
233
254
  let result = this.convertRecursive(processedText);
234
- // Process blockquote markers
235
255
  result = this.processBlockquoteMarkers(result);
236
- // Only trim if there's actual content (not just whitespace)
237
256
  if (result.trim() === '') {
238
- return text; // Return original text (spaces) if result is empty
257
+ return text;
239
258
  }
240
259
  return result.trim();
241
260
  }
242
- /**
243
- * Recursively convert markdown, handling nested styles
244
- */
245
261
  convertRecursive(text, depth = 0) {
246
262
  if (depth > 10)
247
- return text; // Prevent infinite recursion
248
- // Tokenize the text
263
+ return text;
249
264
  const tokenizer = new MarkdownTokenizer(text);
250
265
  const tokens = tokenizer.tokenize();
251
- // If no tokens found, return the text as-is (with HTML escaping)
252
266
  if (tokens.length === 0) {
253
267
  return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
254
268
  }
255
269
  let result = '';
256
270
  let lastPos = 0;
257
271
  for (const token of tokens) {
258
- // Add text before token
259
272
  if (token.start > lastPos) {
260
273
  const textBefore = text.slice(lastPos, token.start);
261
274
  result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
262
275
  }
263
- // Handle code blocks specially (no recursive parsing inside)
276
+ // Fixed: Handle headings
277
+ if (token.type === 'heading_2' || token.type === 'heading_3') {
278
+ token.type === 'heading_2' ? '##' : '###';
279
+ const processedContent = this.convertRecursive(token.content, depth + 1);
280
+ // Fixed: Add symbol and bold styling
281
+ const symbol = this.options.headingBlank ? '' : this.headingSymbol;
282
+ const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
283
+ result += `<b>${headingText}</b>`;
284
+ lastPos = token.end;
285
+ continue;
286
+ }
264
287
  if (token.type === 'code_block') {
265
288
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
266
289
  result += this.wrapToken(token.type, codeContent, token.language);
267
290
  lastPos = token.end;
268
291
  continue;
269
292
  }
270
- // Handle inline code specially (no recursive parsing inside)
271
293
  if (token.type === 'inline_code') {
272
294
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
273
295
  result += `<code>${codeContent}</code>`;
274
296
  lastPos = token.end;
275
297
  continue;
276
298
  }
277
- // Process other token content recursively
278
299
  const tokenContent = this.convertRecursive(token.content, depth + 1);
279
- // Wrap the content in appropriate HTML tags
280
300
  result += this.wrapToken(token.type, tokenContent, token.language);
281
301
  lastPos = token.end;
282
302
  }
283
- // Add remaining text
284
303
  if (lastPos < text.length) {
285
304
  const remainingText = text.slice(lastPos);
286
305
  result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
287
306
  }
288
307
  return result;
289
308
  }
290
- /**
291
- * Wrap token content in HTML tags
292
- */
293
309
  wrapToken(type, content, language) {
294
310
  switch (type) {
295
311
  case 'bold':
@@ -303,16 +319,14 @@ class MarkdownConverter {
303
319
  case 'spoiler':
304
320
  return `<span class="tg-spoiler">${content}</span>`;
305
321
  case 'inline_code':
306
- // Already handled above
307
322
  return `<code>${content}</code>`;
308
323
  case 'code_block':
309
- // Already handled above, but handle custom processor
310
324
  if (this.hasCustomCodeBlockProcessor) {
311
325
  return this.options.codeBlockProcessor(content, language);
312
326
  }
313
327
  const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
314
328
  const langAttr = language ? ` class="language-${language}"` : '';
315
- return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
329
+ return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
316
330
  case 'link':
317
331
  const url = language || '';
318
332
  if (this.hasCustomLinkProcessor) {
@@ -322,29 +336,23 @@ class MarkdownConverter {
322
336
  const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
323
337
  return `<a href="${escapedUrl}">${escapedText}</a>`;
324
338
  case 'quote':
325
- return `\n<blockquote>${content.trim()}</blockquote>\n`;
339
+ return `<blockquote>${content.trim()}</blockquote>`;
326
340
  case 'expandable_quote':
327
- return `\n<blockquote expandable>${content.trim()}</blockquote>\n`;
341
+ return `<blockquote expandable>${content.trim()}</blockquote>`;
328
342
  default:
329
343
  return content;
330
344
  }
331
345
  }
332
- /**
333
- * Preprocess blockquotes to mark them before other parsing
334
- */
335
346
  preprocessBlockquotes(text) {
336
347
  const lines = text.split('\n');
337
348
  const processedLines = [];
338
349
  for (const line of lines) {
339
350
  const trimmedLine = line.trim();
340
- // Only treat lines starting with > at the beginning of line as blockquotes
341
351
  if (trimmedLine.startsWith('**>')) {
342
- // Expandable blockquote
343
352
  const content = trimmedLine.substring(3).trim();
344
353
  processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
345
354
  }
346
355
  else if (trimmedLine.startsWith('>')) {
347
- // Regular blockquote
348
356
  const content = trimmedLine.substring(1).trim();
349
357
  processedLines.push(`[QUOTE]${content}`);
350
358
  }
@@ -354,22 +362,17 @@ class MarkdownConverter {
354
362
  }
355
363
  return processedLines.join('\n');
356
364
  }
357
- /**
358
- * Process blockquote markers
359
- */
360
365
  processBlockquoteMarkers(text) {
361
366
  let result = text;
362
- // Replace expandable quote markers (process content recursively)
363
367
  const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
364
368
  result = result.replace(expandableQuoteRegex, (match, content) => {
365
369
  const processedContent = this.convertRecursive(content);
366
- return `\n<blockquote expandable>${processedContent.trim()}</blockquote>\n`;
370
+ return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
367
371
  });
368
- // Replace regular quote markers (process content recursively)
369
372
  const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
370
373
  result = result.replace(quoteRegex, (match, content) => {
371
374
  const processedContent = this.convertRecursive(content);
372
- return `\n<blockquote>${processedContent.trim()}</blockquote>\n`;
375
+ return `<blockquote>${processedContent.trim()}</blockquote>`;
373
376
  });
374
377
  return result;
375
378
  }
@@ -381,7 +384,7 @@ class MarkdownConverter {
381
384
  defaultCodeBlockProcessor(code, language) {
382
385
  const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
383
386
  const langAttr = language ? ` class="language-${language}"` : '';
384
- return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
387
+ return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
385
388
  }
386
389
  }
387
390
 
package/dist/index.umd.js CHANGED
@@ -8,20 +8,15 @@
8
8
  constructor(text) {
9
9
  this.text = text;
10
10
  }
11
- /**
12
- * Tokenize the markdown text
13
- */
14
11
  tokenize() {
15
12
  const tokens = [];
16
13
  let pos = 0;
17
14
  const text = this.text;
18
15
  while (pos < text.length) {
19
- // Skip if inside code block
20
16
  if (this.isInsideCodeBlock(text, pos)) {
21
17
  pos++;
22
18
  continue;
23
19
  }
24
- // Try to match each token type (from outermost to innermost)
25
20
  const token = this.matchToken(pos);
26
21
  if (token) {
27
22
  tokens.push(token);
@@ -36,11 +31,22 @@
36
31
  matchToken(start) {
37
32
  const text = this.text;
38
33
  const remaining = text.slice(start);
39
- // Skip if we're inside a quote marker
40
34
  if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
41
35
  return null;
42
36
  }
43
- // Match code block (triple backticks) - highest priority
37
+ // Match headings (###, ##)
38
+ const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
39
+ if (headingMatch) {
40
+ const level = headingMatch[1].length;
41
+ const content = headingMatch[2];
42
+ return {
43
+ type: `heading_${level}`,
44
+ content: content,
45
+ start: start,
46
+ end: start + headingMatch[0].length
47
+ };
48
+ }
49
+ // Match code block (triple backticks)
44
50
  const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
45
51
  if (codeBlockMatch) {
46
52
  return {
@@ -101,13 +107,22 @@
101
107
  end: start + underlineMatch[0].length
102
108
  };
103
109
  }
104
- // Match italic with asterisk
105
- const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
110
+ // FIXED: Match italic with asterisk - require space before and after
111
+ const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
106
112
  if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
107
113
  // Don't match if it's part of bold (**)
108
114
  if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
109
115
  return null;
110
116
  }
117
+ // Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
118
+ if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
119
+ return null;
120
+ }
121
+ // Check if followed by alphanumeric or underscore
122
+ const afterMatch = start + italicAsteriskMatch[0].length;
123
+ if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
124
+ return null;
125
+ }
111
126
  return {
112
127
  type: 'italic',
113
128
  content: italicAsteriskMatch[1],
@@ -115,13 +130,25 @@
115
130
  end: start + italicAsteriskMatch[0].length
116
131
  };
117
132
  }
118
- // Match italic with underscore
119
- const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
133
+ // FIXED: Match italic with underscore - require word boundaries and not part of username
134
+ const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
120
135
  if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
121
136
  // Don't match if it's part of underline (__)
122
137
  if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
123
138
  return null;
124
139
  }
140
+ // FIXED: Don't match if preceded by @ (username) or alphanumeric
141
+ if (start > 0) {
142
+ const prevChar = text[start - 1];
143
+ if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
144
+ return null;
145
+ }
146
+ }
147
+ // Don't match if followed by alphanumeric (part of word)
148
+ const afterMatch = start + italicUnderscoreMatch[0].length;
149
+ if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
150
+ return null;
151
+ }
125
152
  return {
126
153
  type: 'italic',
127
154
  content: italicUnderscoreMatch[1],
@@ -143,12 +170,10 @@
143
170
  return null;
144
171
  }
145
172
  isInsideCodeBlock(text, position) {
146
- // Check for code blocks
147
173
  const codeBlockRegex = /```[\s\S]*?```/g;
148
174
  let match;
149
175
  while ((match = codeBlockRegex.exec(text)) !== null) {
150
176
  if (position > match.index && position < match.index + match[0].length) {
151
- // But allow matching the closing ``` itself
152
177
  if (position >= match.index + match[0].length - 3) {
153
178
  return false;
154
179
  }
@@ -158,12 +183,10 @@
158
183
  return false;
159
184
  }
160
185
  isInsideInlineCode(text, position) {
161
- // Check for inline code
162
186
  const inlineCodeRegex = /`[^`\n]*`/g;
163
187
  let match;
164
188
  while ((match = inlineCodeRegex.exec(text)) !== null) {
165
189
  if (position > match.index && position < match.index + match[0].length) {
166
- // But allow matching the closing ` itself
167
190
  if (position === match.index + match[0].length - 1) {
168
191
  return false;
169
192
  }
@@ -218,84 +241,77 @@
218
241
  constructor(options = {}) {
219
242
  this.hasCustomLinkProcessor = !!options.linkProcessor;
220
243
  this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
244
+ // Fixed: Add heading symbol option (default: '▎')
245
+ this.headingSymbol = options.headingSymbol ?? '▎';
221
246
  this.options = {
222
247
  escapeHtml: options.escapeHtml ?? true,
223
248
  autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
249
+ headingSymbol: options.headingSymbol ?? '▎',
250
+ headingBlank: options.headingBlank ?? false,
224
251
  linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
225
252
  codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
226
253
  };
227
254
  }
228
- /**
229
- * Convert markdown text to Telegram HTML
230
- */
231
255
  convert(text) {
232
- // Auto-close code blocks if enabled
233
256
  let processedText = this.options.autoCloseCodeBlocks
234
257
  ? autoCloseCodeBlocks(text)
235
258
  : text;
236
- // First pass: convert blockquotes (they should be at line starts)
237
259
  processedText = this.preprocessBlockquotes(processedText);
238
- // Convert the text recursively
239
260
  let result = this.convertRecursive(processedText);
240
- // Process blockquote markers
241
261
  result = this.processBlockquoteMarkers(result);
242
- // Only trim if there's actual content (not just whitespace)
243
262
  if (result.trim() === '') {
244
- return text; // Return original text (spaces) if result is empty
263
+ return text;
245
264
  }
246
265
  return result.trim();
247
266
  }
248
- /**
249
- * Recursively convert markdown, handling nested styles
250
- */
251
267
  convertRecursive(text, depth = 0) {
252
268
  if (depth > 10)
253
- return text; // Prevent infinite recursion
254
- // Tokenize the text
269
+ return text;
255
270
  const tokenizer = new MarkdownTokenizer(text);
256
271
  const tokens = tokenizer.tokenize();
257
- // If no tokens found, return the text as-is (with HTML escaping)
258
272
  if (tokens.length === 0) {
259
273
  return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
260
274
  }
261
275
  let result = '';
262
276
  let lastPos = 0;
263
277
  for (const token of tokens) {
264
- // Add text before token
265
278
  if (token.start > lastPos) {
266
279
  const textBefore = text.slice(lastPos, token.start);
267
280
  result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
268
281
  }
269
- // Handle code blocks specially (no recursive parsing inside)
282
+ // Fixed: Handle headings
283
+ if (token.type === 'heading_2' || token.type === 'heading_3') {
284
+ token.type === 'heading_2' ? '##' : '###';
285
+ const processedContent = this.convertRecursive(token.content, depth + 1);
286
+ // Fixed: Add symbol and bold styling
287
+ const symbol = this.options.headingBlank ? '' : this.headingSymbol;
288
+ const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
289
+ result += `<b>${headingText}</b>`;
290
+ lastPos = token.end;
291
+ continue;
292
+ }
270
293
  if (token.type === 'code_block') {
271
294
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
272
295
  result += this.wrapToken(token.type, codeContent, token.language);
273
296
  lastPos = token.end;
274
297
  continue;
275
298
  }
276
- // Handle inline code specially (no recursive parsing inside)
277
299
  if (token.type === 'inline_code') {
278
300
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
279
301
  result += `<code>${codeContent}</code>`;
280
302
  lastPos = token.end;
281
303
  continue;
282
304
  }
283
- // Process other token content recursively
284
305
  const tokenContent = this.convertRecursive(token.content, depth + 1);
285
- // Wrap the content in appropriate HTML tags
286
306
  result += this.wrapToken(token.type, tokenContent, token.language);
287
307
  lastPos = token.end;
288
308
  }
289
- // Add remaining text
290
309
  if (lastPos < text.length) {
291
310
  const remainingText = text.slice(lastPos);
292
311
  result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
293
312
  }
294
313
  return result;
295
314
  }
296
- /**
297
- * Wrap token content in HTML tags
298
- */
299
315
  wrapToken(type, content, language) {
300
316
  switch (type) {
301
317
  case 'bold':
@@ -309,16 +325,14 @@
309
325
  case 'spoiler':
310
326
  return `<span class="tg-spoiler">${content}</span>`;
311
327
  case 'inline_code':
312
- // Already handled above
313
328
  return `<code>${content}</code>`;
314
329
  case 'code_block':
315
- // Already handled above, but handle custom processor
316
330
  if (this.hasCustomCodeBlockProcessor) {
317
331
  return this.options.codeBlockProcessor(content, language);
318
332
  }
319
333
  const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
320
334
  const langAttr = language ? ` class="language-${language}"` : '';
321
- return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
335
+ return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
322
336
  case 'link':
323
337
  const url = language || '';
324
338
  if (this.hasCustomLinkProcessor) {
@@ -328,29 +342,23 @@
328
342
  const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
329
343
  return `<a href="${escapedUrl}">${escapedText}</a>`;
330
344
  case 'quote':
331
- return `\n<blockquote>${content.trim()}</blockquote>\n`;
345
+ return `<blockquote>${content.trim()}</blockquote>`;
332
346
  case 'expandable_quote':
333
- return `\n<blockquote expandable>${content.trim()}</blockquote>\n`;
347
+ return `<blockquote expandable>${content.trim()}</blockquote>`;
334
348
  default:
335
349
  return content;
336
350
  }
337
351
  }
338
- /**
339
- * Preprocess blockquotes to mark them before other parsing
340
- */
341
352
  preprocessBlockquotes(text) {
342
353
  const lines = text.split('\n');
343
354
  const processedLines = [];
344
355
  for (const line of lines) {
345
356
  const trimmedLine = line.trim();
346
- // Only treat lines starting with > at the beginning of line as blockquotes
347
357
  if (trimmedLine.startsWith('**>')) {
348
- // Expandable blockquote
349
358
  const content = trimmedLine.substring(3).trim();
350
359
  processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
351
360
  }
352
361
  else if (trimmedLine.startsWith('>')) {
353
- // Regular blockquote
354
362
  const content = trimmedLine.substring(1).trim();
355
363
  processedLines.push(`[QUOTE]${content}`);
356
364
  }
@@ -360,22 +368,17 @@
360
368
  }
361
369
  return processedLines.join('\n');
362
370
  }
363
- /**
364
- * Process blockquote markers
365
- */
366
371
  processBlockquoteMarkers(text) {
367
372
  let result = text;
368
- // Replace expandable quote markers (process content recursively)
369
373
  const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
370
374
  result = result.replace(expandableQuoteRegex, (match, content) => {
371
375
  const processedContent = this.convertRecursive(content);
372
- return `\n<blockquote expandable>${processedContent.trim()}</blockquote>\n`;
376
+ return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
373
377
  });
374
- // Replace regular quote markers (process content recursively)
375
378
  const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
376
379
  result = result.replace(quoteRegex, (match, content) => {
377
380
  const processedContent = this.convertRecursive(content);
378
- return `\n<blockquote>${processedContent.trim()}</blockquote>\n`;
381
+ return `<blockquote>${processedContent.trim()}</blockquote>`;
379
382
  });
380
383
  return result;
381
384
  }
@@ -387,7 +390,7 @@
387
390
  defaultCodeBlockProcessor(code, language) {
388
391
  const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
389
392
  const langAttr = language ? ` class="language-${language}"` : '';
390
- return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
393
+ return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
391
394
  }
392
395
  }
393
396
 
@@ -2,9 +2,6 @@ import { Token } from './types';
2
2
  export declare class MarkdownTokenizer {
3
3
  private text;
4
4
  constructor(text: string);
5
- /**
6
- * Tokenize the markdown text
7
- */
8
5
  tokenize(): Token[];
9
6
  private matchToken;
10
7
  private isInsideCodeBlock;
package/dist/types.d.ts CHANGED
@@ -9,6 +9,15 @@ export interface ConvertOptions {
9
9
  * @default true
10
10
  */
11
11
  autoCloseCodeBlocks?: boolean;
12
+ /**
13
+ * Custom symbol for headings (default: '▎')
14
+ */
15
+ headingSymbol?: string;
16
+ /**
17
+ * Whether to show blank (no symbol) for headings
18
+ * @default false
19
+ */
20
+ headingBlank?: boolean;
12
21
  /**
13
22
  * Custom replacement function for links
14
23
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "telegram-md2html",
3
- "version": "1.0.1",
3
+ "version": "1.0.3",
4
4
  "description": "A smart converter for Telegram-style Markdown to Telegram-compatible HTML",
5
5
  "main": "./dist/index.cjs",
6
6
  "module": "./dist/index.mjs",