telegram-md2html 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -2,20 +2,15 @@ class MarkdownTokenizer {
2
2
  constructor(text) {
3
3
  this.text = text;
4
4
  }
5
- /**
6
- * Tokenize the markdown text
7
- */
8
5
  tokenize() {
9
6
  const tokens = [];
10
7
  let pos = 0;
11
8
  const text = this.text;
12
9
  while (pos < text.length) {
13
- // Skip if inside code block
14
10
  if (this.isInsideCodeBlock(text, pos)) {
15
11
  pos++;
16
12
  continue;
17
13
  }
18
- // Try to match each token type (from outermost to innermost)
19
14
  const token = this.matchToken(pos);
20
15
  if (token) {
21
16
  tokens.push(token);
@@ -30,11 +25,22 @@ class MarkdownTokenizer {
30
25
  matchToken(start) {
31
26
  const text = this.text;
32
27
  const remaining = text.slice(start);
33
- // Skip if we're inside a quote marker
34
28
  if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
35
29
  return null;
36
30
  }
37
- // Match code block (triple backticks) - highest priority
31
+ // Match headings (###, ##)
32
+ const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
33
+ if (headingMatch) {
34
+ const level = headingMatch[1].length;
35
+ const content = headingMatch[2];
36
+ return {
37
+ type: `heading_${level}`,
38
+ content: content,
39
+ start: start,
40
+ end: start + headingMatch[0].length
41
+ };
42
+ }
43
+ // Match code block (triple backticks)
38
44
  const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
39
45
  if (codeBlockMatch) {
40
46
  return {
@@ -95,13 +101,22 @@ class MarkdownTokenizer {
95
101
  end: start + underlineMatch[0].length
96
102
  };
97
103
  }
98
- // Match italic with asterisk
99
- const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
104
+ // FIXED: Match italic with asterisk - require space before and after
105
+ const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
100
106
  if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
101
107
  // Don't match if it's part of bold (**)
102
108
  if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
103
109
  return null;
104
110
  }
111
+ // Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
112
+ if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
113
+ return null;
114
+ }
115
+ // Check if followed by alphanumeric or underscore
116
+ const afterMatch = start + italicAsteriskMatch[0].length;
117
+ if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
118
+ return null;
119
+ }
105
120
  return {
106
121
  type: 'italic',
107
122
  content: italicAsteriskMatch[1],
@@ -109,13 +124,25 @@ class MarkdownTokenizer {
109
124
  end: start + italicAsteriskMatch[0].length
110
125
  };
111
126
  }
112
- // Match italic with underscore
113
- const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
127
+ // FIXED: Match italic with underscore - require word boundaries and not part of username
128
+ const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
114
129
  if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
115
130
  // Don't match if it's part of underline (__)
116
131
  if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
117
132
  return null;
118
133
  }
134
+ // FIXED: Don't match if preceded by @ (username) or alphanumeric
135
+ if (start > 0) {
136
+ const prevChar = text[start - 1];
137
+ if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
138
+ return null;
139
+ }
140
+ }
141
+ // Don't match if followed by alphanumeric (part of word)
142
+ const afterMatch = start + italicUnderscoreMatch[0].length;
143
+ if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
144
+ return null;
145
+ }
119
146
  return {
120
147
  type: 'italic',
121
148
  content: italicUnderscoreMatch[1],
@@ -137,12 +164,10 @@ class MarkdownTokenizer {
137
164
  return null;
138
165
  }
139
166
  isInsideCodeBlock(text, position) {
140
- // Check for code blocks
141
167
  const codeBlockRegex = /```[\s\S]*?```/g;
142
168
  let match;
143
169
  while ((match = codeBlockRegex.exec(text)) !== null) {
144
170
  if (position > match.index && position < match.index + match[0].length) {
145
- // But allow matching the closing ``` itself
146
171
  if (position >= match.index + match[0].length - 3) {
147
172
  return false;
148
173
  }
@@ -152,12 +177,10 @@ class MarkdownTokenizer {
152
177
  return false;
153
178
  }
154
179
  isInsideInlineCode(text, position) {
155
- // Check for inline code
156
180
  const inlineCodeRegex = /`[^`\n]*`/g;
157
181
  let match;
158
182
  while ((match = inlineCodeRegex.exec(text)) !== null) {
159
183
  if (position > match.index && position < match.index + match[0].length) {
160
- // But allow matching the closing ` itself
161
184
  if (position === match.index + match[0].length - 1) {
162
185
  return false;
163
186
  }
@@ -212,86 +235,77 @@ class MarkdownConverter {
212
235
  constructor(options = {}) {
213
236
  this.hasCustomLinkProcessor = !!options.linkProcessor;
214
237
  this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
238
+ // Fixed: Add heading symbol option (default: '▎')
239
+ this.headingSymbol = options.headingSymbol ?? '▎';
215
240
  this.options = {
216
241
  escapeHtml: options.escapeHtml ?? true,
217
242
  autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
243
+ headingSymbol: options.headingSymbol ?? '▎',
244
+ headingBlank: options.headingBlank ?? false,
218
245
  linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
219
246
  codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
220
247
  };
221
248
  }
222
- /**
223
- * Convert markdown text to Telegram HTML
224
- */
225
249
  convert(text) {
226
- // Auto-close code blocks if enabled
227
250
  let processedText = this.options.autoCloseCodeBlocks
228
251
  ? autoCloseCodeBlocks(text)
229
252
  : text;
230
- // First pass: convert blockquotes (they should be at line starts)
231
253
  processedText = this.preprocessBlockquotes(processedText);
232
- // Convert the text recursively
233
254
  let result = this.convertRecursive(processedText);
234
- // Process blockquote markers
235
255
  result = this.processBlockquoteMarkers(result);
236
- // Only trim if there's actual content (not just whitespace)
237
256
  if (result.trim() === '') {
238
- return text; // Return original text (spaces) if result is empty
257
+ return text;
239
258
  }
240
259
  return result.trim();
241
260
  }
242
- /**
243
- * Recursively convert markdown, handling nested styles
244
- */
245
261
  convertRecursive(text, depth = 0) {
246
262
  if (depth > 10)
247
- return text; // Prevent infinite recursion
248
- // Tokenize the text
263
+ return text;
249
264
  const tokenizer = new MarkdownTokenizer(text);
250
265
  const tokens = tokenizer.tokenize();
251
- // If no tokens found, return the text as-is (with HTML escaping)
252
266
  if (tokens.length === 0) {
253
267
  return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
254
268
  }
255
269
  let result = '';
256
270
  let lastPos = 0;
257
271
  for (const token of tokens) {
258
- // Add text before token
259
272
  if (token.start > lastPos) {
260
273
  const textBefore = text.slice(lastPos, token.start);
261
274
  result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
262
275
  }
263
- // Handle code blocks specially (no recursive parsing inside)
276
+ // Fixed: Handle headings
277
+ if (token.type === 'heading_2' || token.type === 'heading_3') {
278
+ token.type === 'heading_2' ? '##' : '###';
279
+ const processedContent = this.convertRecursive(token.content, depth + 1);
280
+ // Fixed: Add symbol and bold styling
281
+ const symbol = this.options.headingBlank ? '' : this.headingSymbol;
282
+ const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
283
+ result += `<b>${headingText}</b>`;
284
+ lastPos = token.end;
285
+ continue;
286
+ }
264
287
  if (token.type === 'code_block') {
265
288
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
266
289
  result += this.wrapToken(token.type, codeContent, token.language);
267
290
  lastPos = token.end;
268
291
  continue;
269
292
  }
270
- // Handle inline code specially (no recursive parsing inside)
271
293
  if (token.type === 'inline_code') {
272
294
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
273
295
  result += `<code>${codeContent}</code>`;
274
296
  lastPos = token.end;
275
297
  continue;
276
298
  }
277
- // Process other token content recursively
278
299
  const tokenContent = this.convertRecursive(token.content, depth + 1);
279
- // Wrap the content in appropriate HTML tags
280
300
  result += this.wrapToken(token.type, tokenContent, token.language);
281
301
  lastPos = token.end;
282
302
  }
283
- // Add remaining text
284
303
  if (lastPos < text.length) {
285
304
  const remainingText = text.slice(lastPos);
286
305
  result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
287
306
  }
288
307
  return result;
289
308
  }
290
- /**
291
- * Wrap token content in HTML tags
292
- * FIXED: Removed extra newlines that were being added around code blocks and quotes
293
- * Previously added \n before and after, now returns clean tags without extra whitespace
294
- */
295
309
  wrapToken(type, content, language) {
296
310
  switch (type) {
297
311
  case 'bold':
@@ -305,16 +319,13 @@ class MarkdownConverter {
305
319
  case 'spoiler':
306
320
  return `<span class="tg-spoiler">${content}</span>`;
307
321
  case 'inline_code':
308
- // Already handled above
309
322
  return `<code>${content}</code>`;
310
323
  case 'code_block':
311
- // Already handled above, but handle custom processor
312
324
  if (this.hasCustomCodeBlockProcessor) {
313
325
  return this.options.codeBlockProcessor(content, language);
314
326
  }
315
327
  const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
316
328
  const langAttr = language ? ` class="language-${language}"` : '';
317
- // FIXED: Removed \n before and after - now returns just the tag
318
329
  return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
319
330
  case 'link':
320
331
  const url = language || '';
@@ -325,31 +336,23 @@ class MarkdownConverter {
325
336
  const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
326
337
  return `<a href="${escapedUrl}">${escapedText}</a>`;
327
338
  case 'quote':
328
- // FIXED: Removed \n before and after - now returns just the tag
329
339
  return `<blockquote>${content.trim()}</blockquote>`;
330
340
  case 'expandable_quote':
331
- // FIXED: Removed \n before and after - now returns just the tag
332
341
  return `<blockquote expandable>${content.trim()}</blockquote>`;
333
342
  default:
334
343
  return content;
335
344
  }
336
345
  }
337
- /**
338
- * Preprocess blockquotes to mark them before other parsing
339
- */
340
346
  preprocessBlockquotes(text) {
341
347
  const lines = text.split('\n');
342
348
  const processedLines = [];
343
349
  for (const line of lines) {
344
350
  const trimmedLine = line.trim();
345
- // Only treat lines starting with > at the beginning of line as blockquotes
346
351
  if (trimmedLine.startsWith('**>')) {
347
- // Expandable blockquote
348
352
  const content = trimmedLine.substring(3).trim();
349
353
  processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
350
354
  }
351
355
  else if (trimmedLine.startsWith('>')) {
352
- // Regular blockquote
353
356
  const content = trimmedLine.substring(1).trim();
354
357
  processedLines.push(`[QUOTE]${content}`);
355
358
  }
@@ -359,24 +362,16 @@ class MarkdownConverter {
359
362
  }
360
363
  return processedLines.join('\n');
361
364
  }
362
- /**
363
- * Process blockquote markers
364
- * FIXED: Removed extra newlines from the replacement strings
365
- */
366
365
  processBlockquoteMarkers(text) {
367
366
  let result = text;
368
- // Replace expandable quote markers (process content recursively)
369
367
  const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
370
368
  result = result.replace(expandableQuoteRegex, (match, content) => {
371
369
  const processedContent = this.convertRecursive(content);
372
- // FIXED: Removed \n before and after
373
370
  return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
374
371
  });
375
- // Replace regular quote markers (process content recursively)
376
372
  const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
377
373
  result = result.replace(quoteRegex, (match, content) => {
378
374
  const processedContent = this.convertRecursive(content);
379
- // FIXED: Removed \n before and after
380
375
  return `<blockquote>${processedContent.trim()}</blockquote>`;
381
376
  });
382
377
  return result;
@@ -389,7 +384,6 @@ class MarkdownConverter {
389
384
  defaultCodeBlockProcessor(code, language) {
390
385
  const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
391
386
  const langAttr = language ? ` class="language-${language}"` : '';
392
- // FIXED: Removed \n before and after in default processor too
393
387
  return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
394
388
  }
395
389
  }
package/dist/index.umd.js CHANGED
@@ -8,20 +8,15 @@
8
8
  constructor(text) {
9
9
  this.text = text;
10
10
  }
11
- /**
12
- * Tokenize the markdown text
13
- */
14
11
  tokenize() {
15
12
  const tokens = [];
16
13
  let pos = 0;
17
14
  const text = this.text;
18
15
  while (pos < text.length) {
19
- // Skip if inside code block
20
16
  if (this.isInsideCodeBlock(text, pos)) {
21
17
  pos++;
22
18
  continue;
23
19
  }
24
- // Try to match each token type (from outermost to innermost)
25
20
  const token = this.matchToken(pos);
26
21
  if (token) {
27
22
  tokens.push(token);
@@ -36,11 +31,22 @@
36
31
  matchToken(start) {
37
32
  const text = this.text;
38
33
  const remaining = text.slice(start);
39
- // Skip if we're inside a quote marker
40
34
  if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
41
35
  return null;
42
36
  }
43
- // Match code block (triple backticks) - highest priority
37
+ // Match headings (###, ##)
38
+ const headingMatch = remaining.match(/^(#{1,3})\s+(.+?)(?=\n|$)/);
39
+ if (headingMatch) {
40
+ const level = headingMatch[1].length;
41
+ const content = headingMatch[2];
42
+ return {
43
+ type: `heading_${level}`,
44
+ content: content,
45
+ start: start,
46
+ end: start + headingMatch[0].length
47
+ };
48
+ }
49
+ // Match code block (triple backticks)
44
50
  const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
45
51
  if (codeBlockMatch) {
46
52
  return {
@@ -101,13 +107,22 @@
101
107
  end: start + underlineMatch[0].length
102
108
  };
103
109
  }
104
- // Match italic with asterisk
105
- const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
110
+ // FIXED: Match italic with asterisk - require space before and after
111
+ const italicAsteriskMatch = remaining.match(/^\*([^*\s][^*]*?[^*\s])\*/);
106
112
  if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
107
113
  // Don't match if it's part of bold (**)
108
114
  if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
109
115
  return null;
110
116
  }
117
+ // Check if preceded by alphanumeric or underscore (prevents matching words like *italic* inside words)
118
+ if (start > 0 && /[a-zA-Z0-9_]/.test(text[start - 1])) {
119
+ return null;
120
+ }
121
+ // Check if followed by alphanumeric or underscore
122
+ const afterMatch = start + italicAsteriskMatch[0].length;
123
+ if (afterMatch < text.length && /[a-zA-Z0-9_]/.test(text[afterMatch])) {
124
+ return null;
125
+ }
111
126
  return {
112
127
  type: 'italic',
113
128
  content: italicAsteriskMatch[1],
@@ -115,13 +130,25 @@
115
130
  end: start + italicAsteriskMatch[0].length
116
131
  };
117
132
  }
118
- // Match italic with underscore
119
- const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
133
+ // FIXED: Match italic with underscore - require word boundaries and not part of username
134
+ const italicUnderscoreMatch = remaining.match(/^_([^_\s][^_]*?[^_\s])_/);
120
135
  if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
121
136
  // Don't match if it's part of underline (__)
122
137
  if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
123
138
  return null;
124
139
  }
140
+ // FIXED: Don't match if preceded by @ (username) or alphanumeric
141
+ if (start > 0) {
142
+ const prevChar = text[start - 1];
143
+ if (prevChar === '@' || /[a-zA-Z0-9]/.test(prevChar)) {
144
+ return null;
145
+ }
146
+ }
147
+ // Don't match if followed by alphanumeric (part of word)
148
+ const afterMatch = start + italicUnderscoreMatch[0].length;
149
+ if (afterMatch < text.length && /[a-zA-Z0-9]/.test(text[afterMatch])) {
150
+ return null;
151
+ }
125
152
  return {
126
153
  type: 'italic',
127
154
  content: italicUnderscoreMatch[1],
@@ -143,12 +170,10 @@
143
170
  return null;
144
171
  }
145
172
  isInsideCodeBlock(text, position) {
146
- // Check for code blocks
147
173
  const codeBlockRegex = /```[\s\S]*?```/g;
148
174
  let match;
149
175
  while ((match = codeBlockRegex.exec(text)) !== null) {
150
176
  if (position > match.index && position < match.index + match[0].length) {
151
- // But allow matching the closing ``` itself
152
177
  if (position >= match.index + match[0].length - 3) {
153
178
  return false;
154
179
  }
@@ -158,12 +183,10 @@
158
183
  return false;
159
184
  }
160
185
  isInsideInlineCode(text, position) {
161
- // Check for inline code
162
186
  const inlineCodeRegex = /`[^`\n]*`/g;
163
187
  let match;
164
188
  while ((match = inlineCodeRegex.exec(text)) !== null) {
165
189
  if (position > match.index && position < match.index + match[0].length) {
166
- // But allow matching the closing ` itself
167
190
  if (position === match.index + match[0].length - 1) {
168
191
  return false;
169
192
  }
@@ -218,86 +241,77 @@
218
241
  constructor(options = {}) {
219
242
  this.hasCustomLinkProcessor = !!options.linkProcessor;
220
243
  this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
244
+ // Fixed: Add heading symbol option (default: '▎')
245
+ this.headingSymbol = options.headingSymbol ?? '▎';
221
246
  this.options = {
222
247
  escapeHtml: options.escapeHtml ?? true,
223
248
  autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
249
+ headingSymbol: options.headingSymbol ?? '▎',
250
+ headingBlank: options.headingBlank ?? false,
224
251
  linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
225
252
  codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
226
253
  };
227
254
  }
228
- /**
229
- * Convert markdown text to Telegram HTML
230
- */
231
255
  convert(text) {
232
- // Auto-close code blocks if enabled
233
256
  let processedText = this.options.autoCloseCodeBlocks
234
257
  ? autoCloseCodeBlocks(text)
235
258
  : text;
236
- // First pass: convert blockquotes (they should be at line starts)
237
259
  processedText = this.preprocessBlockquotes(processedText);
238
- // Convert the text recursively
239
260
  let result = this.convertRecursive(processedText);
240
- // Process blockquote markers
241
261
  result = this.processBlockquoteMarkers(result);
242
- // Only trim if there's actual content (not just whitespace)
243
262
  if (result.trim() === '') {
244
- return text; // Return original text (spaces) if result is empty
263
+ return text;
245
264
  }
246
265
  return result.trim();
247
266
  }
248
- /**
249
- * Recursively convert markdown, handling nested styles
250
- */
251
267
  convertRecursive(text, depth = 0) {
252
268
  if (depth > 10)
253
- return text; // Prevent infinite recursion
254
- // Tokenize the text
269
+ return text;
255
270
  const tokenizer = new MarkdownTokenizer(text);
256
271
  const tokens = tokenizer.tokenize();
257
- // If no tokens found, return the text as-is (with HTML escaping)
258
272
  if (tokens.length === 0) {
259
273
  return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
260
274
  }
261
275
  let result = '';
262
276
  let lastPos = 0;
263
277
  for (const token of tokens) {
264
- // Add text before token
265
278
  if (token.start > lastPos) {
266
279
  const textBefore = text.slice(lastPos, token.start);
267
280
  result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
268
281
  }
269
- // Handle code blocks specially (no recursive parsing inside)
282
+ // Fixed: Handle headings
283
+ if (token.type === 'heading_2' || token.type === 'heading_3') {
284
+ token.type === 'heading_2' ? '##' : '###';
285
+ const processedContent = this.convertRecursive(token.content, depth + 1);
286
+ // Fixed: Add symbol and bold styling
287
+ const symbol = this.options.headingBlank ? '' : this.headingSymbol;
288
+ const headingText = symbol ? `${symbol} ${processedContent}` : processedContent;
289
+ result += `<b>${headingText}</b>`;
290
+ lastPos = token.end;
291
+ continue;
292
+ }
270
293
  if (token.type === 'code_block') {
271
294
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
272
295
  result += this.wrapToken(token.type, codeContent, token.language);
273
296
  lastPos = token.end;
274
297
  continue;
275
298
  }
276
- // Handle inline code specially (no recursive parsing inside)
277
299
  if (token.type === 'inline_code') {
278
300
  const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
279
301
  result += `<code>${codeContent}</code>`;
280
302
  lastPos = token.end;
281
303
  continue;
282
304
  }
283
- // Process other token content recursively
284
305
  const tokenContent = this.convertRecursive(token.content, depth + 1);
285
- // Wrap the content in appropriate HTML tags
286
306
  result += this.wrapToken(token.type, tokenContent, token.language);
287
307
  lastPos = token.end;
288
308
  }
289
- // Add remaining text
290
309
  if (lastPos < text.length) {
291
310
  const remainingText = text.slice(lastPos);
292
311
  result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
293
312
  }
294
313
  return result;
295
314
  }
296
- /**
297
- * Wrap token content in HTML tags
298
- * FIXED: Removed extra newlines that were being added around code blocks and quotes
299
- * Previously added \n before and after, now returns clean tags without extra whitespace
300
- */
301
315
  wrapToken(type, content, language) {
302
316
  switch (type) {
303
317
  case 'bold':
@@ -311,16 +325,13 @@
311
325
  case 'spoiler':
312
326
  return `<span class="tg-spoiler">${content}</span>`;
313
327
  case 'inline_code':
314
- // Already handled above
315
328
  return `<code>${content}</code>`;
316
329
  case 'code_block':
317
- // Already handled above, but handle custom processor
318
330
  if (this.hasCustomCodeBlockProcessor) {
319
331
  return this.options.codeBlockProcessor(content, language);
320
332
  }
321
333
  const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
322
334
  const langAttr = language ? ` class="language-${language}"` : '';
323
- // FIXED: Removed \n before and after - now returns just the tag
324
335
  return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
325
336
  case 'link':
326
337
  const url = language || '';
@@ -331,31 +342,23 @@
331
342
  const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
332
343
  return `<a href="${escapedUrl}">${escapedText}</a>`;
333
344
  case 'quote':
334
- // FIXED: Removed \n before and after - now returns just the tag
335
345
  return `<blockquote>${content.trim()}</blockquote>`;
336
346
  case 'expandable_quote':
337
- // FIXED: Removed \n before and after - now returns just the tag
338
347
  return `<blockquote expandable>${content.trim()}</blockquote>`;
339
348
  default:
340
349
  return content;
341
350
  }
342
351
  }
343
- /**
344
- * Preprocess blockquotes to mark them before other parsing
345
- */
346
352
  preprocessBlockquotes(text) {
347
353
  const lines = text.split('\n');
348
354
  const processedLines = [];
349
355
  for (const line of lines) {
350
356
  const trimmedLine = line.trim();
351
- // Only treat lines starting with > at the beginning of line as blockquotes
352
357
  if (trimmedLine.startsWith('**>')) {
353
- // Expandable blockquote
354
358
  const content = trimmedLine.substring(3).trim();
355
359
  processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
356
360
  }
357
361
  else if (trimmedLine.startsWith('>')) {
358
- // Regular blockquote
359
362
  const content = trimmedLine.substring(1).trim();
360
363
  processedLines.push(`[QUOTE]${content}`);
361
364
  }
@@ -365,24 +368,16 @@
365
368
  }
366
369
  return processedLines.join('\n');
367
370
  }
368
- /**
369
- * Process blockquote markers
370
- * FIXED: Removed extra newlines from the replacement strings
371
- */
372
371
  processBlockquoteMarkers(text) {
373
372
  let result = text;
374
- // Replace expandable quote markers (process content recursively)
375
373
  const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
376
374
  result = result.replace(expandableQuoteRegex, (match, content) => {
377
375
  const processedContent = this.convertRecursive(content);
378
- // FIXED: Removed \n before and after
379
376
  return `<blockquote expandable>${processedContent.trim()}</blockquote>`;
380
377
  });
381
- // Replace regular quote markers (process content recursively)
382
378
  const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
383
379
  result = result.replace(quoteRegex, (match, content) => {
384
380
  const processedContent = this.convertRecursive(content);
385
- // FIXED: Removed \n before and after
386
381
  return `<blockquote>${processedContent.trim()}</blockquote>`;
387
382
  });
388
383
  return result;
@@ -395,7 +390,6 @@
395
390
  defaultCodeBlockProcessor(code, language) {
396
391
  const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
397
392
  const langAttr = language ? ` class="language-${language}"` : '';
398
- // FIXED: Removed \n before and after in default processor too
399
393
  return `<pre><code${langAttr}>${escapedCode}</code></pre>`;
400
394
  }
401
395
  }
@@ -2,9 +2,6 @@ import { Token } from './types';
2
2
  export declare class MarkdownTokenizer {
3
3
  private text;
4
4
  constructor(text: string);
5
- /**
6
- * Tokenize the markdown text
7
- */
8
5
  tokenize(): Token[];
9
6
  private matchToken;
10
7
  private isInsideCodeBlock;
package/dist/types.d.ts CHANGED
@@ -9,6 +9,15 @@ export interface ConvertOptions {
9
9
  * @default true
10
10
  */
11
11
  autoCloseCodeBlocks?: boolean;
12
+ /**
13
+ * Custom symbol for headings (default: '▎')
14
+ */
15
+ headingSymbol?: string;
16
+ /**
17
+ * Whether to show blank (no symbol) for headings
18
+ * @default false
19
+ */
20
+ headingBlank?: boolean;
12
21
  /**
13
22
  * Custom replacement function for links
14
23
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "telegram-md2html",
3
- "version": "1.0.2",
3
+ "version": "1.0.4",
4
4
  "description": "A smart converter for Telegram-style Markdown to Telegram-compatible HTML",
5
5
  "main": "./dist/index.cjs",
6
6
  "module": "./dist/index.mjs",