telegram-md2html 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +306 -0
- package/dist/converter.d.ts +29 -0
- package/dist/index.cjs +417 -0
- package/dist/index.d.mts +21 -0
- package/dist/index.d.ts +21 -0
- package/dist/index.mjs +410 -0
- package/dist/tokenizer.d.ts +12 -0
- package/dist/types.d.ts +28 -0
- package/dist/utils.d.ts +24 -0
- package/package.json +61 -0
package/README.md
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
1
|
+
# Telegram Markdown to HTML Converter
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/telegram-md2html)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
[](https://www.typescriptlang.org/)
|
|
6
|
+
|
|
7
|
+
A smart, efficient, and reliable library for converting Telegram-style Markdown to Telegram-compatible HTML. Perfect for Telegram bots, messaging applications, and content processing pipelines.
|
|
8
|
+
|
|
9
|
+
## ✨ Features
|
|
10
|
+
|
|
11
|
+
- ✅ **Complete Telegram Markdown Support** - All Telegram-specific formatting
|
|
12
|
+
- ✅ **Smart Parsing** - Context-aware (ignores formatting inside code blocks)
|
|
13
|
+
- ✅ **Nested Formatting** - Proper handling of nested styles
|
|
14
|
+
- ✅ **HTML Safety** - Automatic escaping of HTML special characters
|
|
15
|
+
- ✅ **Auto-recovery** - Automatically closes unclosed code blocks
|
|
16
|
+
- ✅ **Dual Module Support** - Works with both CommonJS (`require`) and ES Modules (`import`)
|
|
17
|
+
- ✅ **TypeScript Ready** - Full type definitions included
|
|
18
|
+
- ✅ **Highly Customizable** - Extensible with custom processors
|
|
19
|
+
- ✅ **Production Ready** - Minified builds, comprehensive tests
|
|
20
|
+
- ✅ **Zero Dependencies** - Lightweight and fast
|
|
21
|
+
|
|
22
|
+
## 📦 Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
npm install telegram-md2html
|
|
26
|
+
# or
|
|
27
|
+
yarn add telegram-md2html
|
|
28
|
+
# or
|
|
29
|
+
pnpm add telegram-md2html
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
## 🚀 Quick Start
|
|
33
|
+
|
|
34
|
+
### Basic Usage
|
|
35
|
+
|
|
36
|
+
```javascript
|
|
37
|
+
// CommonJS
|
|
38
|
+
const { markdownToHtml } = require('telegram-md2html');
|
|
39
|
+
|
|
40
|
+
// ES Modules
|
|
41
|
+
import { markdownToHtml } from 'telegram-md2html';
|
|
42
|
+
|
|
43
|
+
const markdown = '**Bold text** and *italic text* with a [link](https://example.com)';
|
|
44
|
+
const html = markdownToHtml(markdown);
|
|
45
|
+
|
|
46
|
+
console.log(html);
|
|
47
|
+
// Output: <b>Bold text</b> and <i>italic text</i> with a <a href="https://example.com">link</a>
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### Complex Example
|
|
51
|
+
|
|
52
|
+
```javascript
|
|
53
|
+
import { markdownToHtml } from 'telegram-md2html';
|
|
54
|
+
|
|
55
|
+
const markdown = `
|
|
56
|
+
# Welcome to Telegram Bot!
|
|
57
|
+
|
|
58
|
+
**Important Features:**
|
|
59
|
+
• *Italic* and __underline__ formatting
|
|
60
|
+
• ~~Strikethrough~~ and ||spoiler|| text
|
|
61
|
+
• \`Inline code\` and code blocks:
|
|
62
|
+
\`\`\`javascript
|
|
63
|
+
function greet() {
|
|
64
|
+
console.log("Hello, Telegram!");
|
|
65
|
+
}
|
|
66
|
+
\`\`\`
|
|
67
|
+
|
|
68
|
+
> This is a regular blockquote
|
|
69
|
+
**> This is an expandable blockquote
|
|
70
|
+
|
|
71
|
+
[Learn more](https://core.telegram.org/bots/api#html-style)
|
|
72
|
+
`;
|
|
73
|
+
|
|
74
|
+
const html = markdownToHtml(markdown);
|
|
75
|
+
// Send to Telegram bot...
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## 📚 Supported Syntax
|
|
79
|
+
|
|
80
|
+
| Markdown Syntax | HTML Output | Description |
|
|
81
|
+
|----------------|-------------|-------------|
|
|
82
|
+
| `**bold**` | `<b>bold</b>` | Bold text |
|
|
83
|
+
| `*italic*` | `<i>italic</i>` | Italic text (asterisk) |
|
|
84
|
+
| `_italic_` | `<i>italic</i>` | Italic text (underscore) |
|
|
85
|
+
| `__underline__` | `<u>underline</u>` | Underlined text |
|
|
86
|
+
| `~~strikethrough~~` | `<s>strikethrough</s>` | Strikethrough text |
|
|
87
|
+
| `\|\|spoiler\|\|` | `<span class="tg-spoiler">spoiler</span>` | Spoiler text |
|
|
88
|
+
| `` `code` `` | `<code>code</code>` | Inline code |
|
|
89
|
+
| ```` ```language\ncode\n``` ```` | `<pre><code class="language-xxx">code</code></pre>` | Code block with syntax highlighting |
|
|
90
|
+
| `[text](url)` | `<a href="url">text</a>` | Hyperlink |
|
|
91
|
+
| `> quote` | `<blockquote>quote</blockquote>` | Regular blockquote |
|
|
92
|
+
| `**> quote` | `<blockquote expandable>quote</blockquote>` | Expandable blockquote |
|
|
93
|
+
|
|
94
|
+
## ⚙️ Advanced Usage
|
|
95
|
+
|
|
96
|
+
### Custom Converter with Options
|
|
97
|
+
|
|
98
|
+
```javascript
|
|
99
|
+
import { createConverter } from 'telegram-md2html';
|
|
100
|
+
|
|
101
|
+
// Create a custom converter with advanced options
|
|
102
|
+
const converter = createConverter({
|
|
103
|
+
escapeHtml: true, // Escape HTML special characters (default: true)
|
|
104
|
+
autoCloseCodeBlocks: true, // Auto-close unclosed code blocks (default: true)
|
|
105
|
+
|
|
106
|
+
// Custom link processor
|
|
107
|
+
linkProcessor: (url, text) =>
|
|
108
|
+
`<a href="${url}" target="_blank" rel="noopener noreferrer">🔗 ${text}</a>`,
|
|
109
|
+
|
|
110
|
+
// Custom code block processor
|
|
111
|
+
codeBlockProcessor: (code, language) =>
|
|
112
|
+
`<div class="code-container">
|
|
113
|
+
<div class="code-header">${language || 'code'}</div>
|
|
114
|
+
<pre><code>${code}</code></pre>
|
|
115
|
+
</div>`
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
const customHtml = converter.convert('Check [this](https://example.com) out!');
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
### Disable HTML Escaping (Use with Caution)
|
|
122
|
+
|
|
123
|
+
```javascript
|
|
124
|
+
import { markdownToHtml } from 'telegram-md2html';
|
|
125
|
+
|
|
126
|
+
// For trusted content where you want to preserve existing HTML
|
|
127
|
+
const html = markdownToHtml('Mix <b>HTML</b> with **Markdown**', {
|
|
128
|
+
escapeHtml: false
|
|
129
|
+
});
|
|
130
|
+
// Output: Mix <b>HTML</b> with <b>Markdown</b>
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
## 🔧 API Reference
|
|
134
|
+
|
|
135
|
+
### `markdownToHtml(text: string, options?: ConvertOptions): string`
|
|
136
|
+
|
|
137
|
+
Main conversion function that converts Markdown to Telegram HTML.
|
|
138
|
+
|
|
139
|
+
**Parameters:**
|
|
140
|
+
- `text` - The Markdown text to convert
|
|
141
|
+
- `options` - Optional conversion settings (see below)
|
|
142
|
+
|
|
143
|
+
**Returns:** Telegram-compatible HTML string
|
|
144
|
+
|
|
145
|
+
### `createConverter(options?: ConvertOptions): MarkdownConverter`
|
|
146
|
+
|
|
147
|
+
Creates a reusable converter instance with custom options.
|
|
148
|
+
|
|
149
|
+
### `ConvertOptions` Interface
|
|
150
|
+
|
|
151
|
+
```typescript
|
|
152
|
+
interface ConvertOptions {
|
|
153
|
+
/**
|
|
154
|
+
* Whether to escape HTML special characters (&, <, >, ", ')
|
|
155
|
+
* @default true
|
|
156
|
+
*/
|
|
157
|
+
escapeHtml?: boolean;
|
|
158
|
+
|
|
159
|
+
/**
|
|
160
|
+
* Whether to automatically append missing ``` to close code blocks
|
|
161
|
+
* @default true
|
|
162
|
+
*/
|
|
163
|
+
autoCloseCodeBlocks?: boolean;
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Custom processor for links
|
|
167
|
+
* @param url - The URL
|
|
168
|
+
* @param text - The link text
|
|
169
|
+
* @returns HTML string for the link
|
|
170
|
+
*/
|
|
171
|
+
linkProcessor?: (url: string, text: string) => string;
|
|
172
|
+
|
|
173
|
+
/**
|
|
174
|
+
* Custom processor for code blocks
|
|
175
|
+
* @param code - The code content
|
|
176
|
+
* @param language - Optional language specified after ```
|
|
177
|
+
* @returns HTML string for the code block
|
|
178
|
+
*/
|
|
179
|
+
codeBlockProcessor?: (code: string, language?: string) => string;
|
|
180
|
+
}
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
## 💡 Real-World Examples
|
|
184
|
+
|
|
185
|
+
### Telegram Bot Integration
|
|
186
|
+
|
|
187
|
+
```javascript
|
|
188
|
+
const { Telegraf } = require('telegraf');
|
|
189
|
+
const { markdownToHtml } = require('telegram-md2html');
|
|
190
|
+
|
|
191
|
+
const bot = new Telegraf(process.env.BOT_TOKEN);
|
|
192
|
+
|
|
193
|
+
bot.command('format', (ctx) => {
|
|
194
|
+
const markdown = `
|
|
195
|
+
**Formatting Examples:**
|
|
196
|
+
|
|
197
|
+
*Bold*: **bold text**
|
|
198
|
+
*Italic*: *italic text* or _italic text_
|
|
199
|
+
*Code*: \`inline code\`
|
|
200
|
+
*Link*: [Telegram](https://telegram.org)
|
|
201
|
+
*Quote*:
|
|
202
|
+
> To be or not to be
|
|
203
|
+
`;
|
|
204
|
+
|
|
205
|
+
// Convert to Telegram HTML
|
|
206
|
+
const html = markdownToHtml(markdown);
|
|
207
|
+
|
|
208
|
+
// Send as HTML message
|
|
209
|
+
ctx.replyWithHTML(html);
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
bot.launch();
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Content Processing Pipeline
|
|
216
|
+
|
|
217
|
+
```javascript
|
|
218
|
+
import { createConverter } from 'telegram-md2html';
|
|
219
|
+
import { readFileSync, writeFileSync } from 'fs';
|
|
220
|
+
|
|
221
|
+
// Process multiple files
|
|
222
|
+
const converter = createConverter({
|
|
223
|
+
codeBlockProcessor: (code, language) => `
|
|
224
|
+
<details>
|
|
225
|
+
<summary>${language ? `📁 ${language.toUpperCase()}` : '📄 CODE'}</summary>
|
|
226
|
+
<pre><code>${code}</code></pre>
|
|
227
|
+
</details>
|
|
228
|
+
`
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
const input = readFileSync('document.md', 'utf-8');
|
|
232
|
+
const output = converter.convert(input);
|
|
233
|
+
writeFileSync('document.html', output);
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
## 🧪 Testing
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
# Run tests
|
|
240
|
+
npm test
|
|
241
|
+
|
|
242
|
+
# Run tests with coverage
|
|
243
|
+
npm test -- --coverage
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## 🔍 How It Works
|
|
247
|
+
|
|
248
|
+
The library uses a sophisticated tokenizer that:
|
|
249
|
+
1. Scans the text for Markdown patterns
|
|
250
|
+
2. Intelligently ignores formatting inside code blocks and inline code
|
|
251
|
+
3. Processes nested formatting correctly
|
|
252
|
+
4. Applies custom processors if provided
|
|
253
|
+
5. Escapes HTML characters for security
|
|
254
|
+
6. Auto-closes unclosed code blocks
|
|
255
|
+
|
|
256
|
+
## 📖 Common Use Cases
|
|
257
|
+
|
|
258
|
+
1. **Telegram Bots** - Format bot responses with rich text
|
|
259
|
+
2. **Content Management Systems** - Convert user input to safe HTML
|
|
260
|
+
3. **Documentation Tools** - Generate Telegram-compatible documentation
|
|
261
|
+
4. **Chat Applications** - Format messages for display
|
|
262
|
+
5. **Export Tools** - Convert Markdown to Telegram HTML for export
|
|
263
|
+
|
|
264
|
+
### Development Setup
|
|
265
|
+
|
|
266
|
+
```bash
|
|
267
|
+
# Clone the repository
|
|
268
|
+
git clone https://github.com/soumyadeep765/telegram-md2html.git
|
|
269
|
+
cd telegram-md2html
|
|
270
|
+
|
|
271
|
+
# Install dependencies
|
|
272
|
+
npm install
|
|
273
|
+
|
|
274
|
+
# Build the library
|
|
275
|
+
npm run build
|
|
276
|
+
|
|
277
|
+
# Run tests
|
|
278
|
+
npm test
|
|
279
|
+
|
|
280
|
+
# Development mode (watch for changes)
|
|
281
|
+
npm run dev
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
## License
|
|
285
|
+
|
|
286
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
287
|
+
|
|
288
|
+
## Issue Reporting
|
|
289
|
+
|
|
290
|
+
Found a bug or have a feature request? Please open an issue on the [GitHub repository](https://github.com/soumyadeep765/telegram-md2html/issues).
|
|
291
|
+
|
|
292
|
+
## Acknowledgments
|
|
293
|
+
|
|
294
|
+
- Telegram for their awesome Bot API and HTML formatting support
|
|
295
|
+
- All contributors who help improve this library
|
|
296
|
+
|
|
297
|
+
## Support
|
|
298
|
+
|
|
299
|
+
For support, questions, or discussions:
|
|
300
|
+
- Open an issue on GitHub
|
|
301
|
+
- Check the [examples](examples/) directory
|
|
302
|
+
- Refer to the [Telegram Bot API documentation](https://core.telegram.org/bots/api#html-style)
|
|
303
|
+
|
|
304
|
+
---
|
|
305
|
+
|
|
306
|
+
**Happy coding!** If you find this library useful, please consider giving it a ⭐ on GitHub!
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { ConvertOptions } from './types';
|
|
2
|
+
export declare class MarkdownConverter {
|
|
3
|
+
private options;
|
|
4
|
+
private hasCustomLinkProcessor;
|
|
5
|
+
private hasCustomCodeBlockProcessor;
|
|
6
|
+
constructor(options?: ConvertOptions);
|
|
7
|
+
/**
|
|
8
|
+
* Convert markdown text to Telegram HTML
|
|
9
|
+
*/
|
|
10
|
+
convert(text: string): string;
|
|
11
|
+
/**
|
|
12
|
+
* Recursively convert markdown, handling nested styles
|
|
13
|
+
*/
|
|
14
|
+
private convertRecursive;
|
|
15
|
+
/**
|
|
16
|
+
* Wrap token content in HTML tags
|
|
17
|
+
*/
|
|
18
|
+
private wrapToken;
|
|
19
|
+
/**
|
|
20
|
+
* Preprocess blockquotes to mark them before other parsing
|
|
21
|
+
*/
|
|
22
|
+
private preprocessBlockquotes;
|
|
23
|
+
/**
|
|
24
|
+
* Process blockquote markers
|
|
25
|
+
*/
|
|
26
|
+
private processBlockquoteMarkers;
|
|
27
|
+
private defaultLinkProcessor;
|
|
28
|
+
private defaultCodeBlockProcessor;
|
|
29
|
+
}
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
Object.defineProperty(exports, '__esModule', { value: true });
|
|
4
|
+
|
|
5
|
+
class MarkdownTokenizer {
|
|
6
|
+
constructor(text) {
|
|
7
|
+
this.text = text;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Tokenize the markdown text
|
|
11
|
+
*/
|
|
12
|
+
tokenize() {
|
|
13
|
+
const tokens = [];
|
|
14
|
+
let pos = 0;
|
|
15
|
+
const text = this.text;
|
|
16
|
+
while (pos < text.length) {
|
|
17
|
+
// Skip if inside code block
|
|
18
|
+
if (this.isInsideCodeBlock(text, pos)) {
|
|
19
|
+
pos++;
|
|
20
|
+
continue;
|
|
21
|
+
}
|
|
22
|
+
// Try to match each token type (from outermost to innermost)
|
|
23
|
+
const token = this.matchToken(pos);
|
|
24
|
+
if (token) {
|
|
25
|
+
tokens.push(token);
|
|
26
|
+
pos = token.end;
|
|
27
|
+
}
|
|
28
|
+
else {
|
|
29
|
+
pos++;
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
return tokens.sort((a, b) => a.start - b.start);
|
|
33
|
+
}
|
|
34
|
+
matchToken(start) {
|
|
35
|
+
const text = this.text;
|
|
36
|
+
const remaining = text.slice(start);
|
|
37
|
+
// Skip if we're inside a quote marker
|
|
38
|
+
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
39
|
+
return null;
|
|
40
|
+
}
|
|
41
|
+
// Match code block (triple backticks) - highest priority
|
|
42
|
+
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
43
|
+
if (codeBlockMatch) {
|
|
44
|
+
return {
|
|
45
|
+
type: 'code_block',
|
|
46
|
+
content: codeBlockMatch[2],
|
|
47
|
+
language: codeBlockMatch[1],
|
|
48
|
+
start: start,
|
|
49
|
+
end: start + codeBlockMatch[0].length
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
// Match inline code
|
|
53
|
+
const inlineCodeMatch = remaining.match(/^`([^`\n]+)`/);
|
|
54
|
+
if (inlineCodeMatch && !this.isInsideInlineCode(text, start)) {
|
|
55
|
+
return {
|
|
56
|
+
type: 'inline_code',
|
|
57
|
+
content: inlineCodeMatch[1],
|
|
58
|
+
start: start,
|
|
59
|
+
end: start + inlineCodeMatch[0].length
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
// Match spoiler
|
|
63
|
+
const spoilerMatch = remaining.match(/^\|\|([^|\n]+?)\|\|/);
|
|
64
|
+
if (spoilerMatch) {
|
|
65
|
+
return {
|
|
66
|
+
type: 'spoiler',
|
|
67
|
+
content: spoilerMatch[1],
|
|
68
|
+
start: start,
|
|
69
|
+
end: start + spoilerMatch[0].length
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
// Match strikethrough
|
|
73
|
+
const strikethroughMatch = remaining.match(/^~~([^~\n]+?)~~/);
|
|
74
|
+
if (strikethroughMatch) {
|
|
75
|
+
return {
|
|
76
|
+
type: 'strikethrough',
|
|
77
|
+
content: strikethroughMatch[1],
|
|
78
|
+
start: start,
|
|
79
|
+
end: start + strikethroughMatch[0].length
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
// Match bold
|
|
83
|
+
const boldMatch = remaining.match(/^\*\*([^*\n]+?)\*\*/);
|
|
84
|
+
if (boldMatch) {
|
|
85
|
+
return {
|
|
86
|
+
type: 'bold',
|
|
87
|
+
content: boldMatch[1],
|
|
88
|
+
start: start,
|
|
89
|
+
end: start + boldMatch[0].length
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
// Match underline
|
|
93
|
+
const underlineMatch = remaining.match(/^__([^_\n]+?)__/);
|
|
94
|
+
if (underlineMatch) {
|
|
95
|
+
return {
|
|
96
|
+
type: 'underline',
|
|
97
|
+
content: underlineMatch[1],
|
|
98
|
+
start: start,
|
|
99
|
+
end: start + underlineMatch[0].length
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
// Match italic with asterisk
|
|
103
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
|
|
104
|
+
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
105
|
+
// Don't match if it's part of bold (**)
|
|
106
|
+
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
107
|
+
return null;
|
|
108
|
+
}
|
|
109
|
+
return {
|
|
110
|
+
type: 'italic',
|
|
111
|
+
content: italicAsteriskMatch[1],
|
|
112
|
+
start: start,
|
|
113
|
+
end: start + italicAsteriskMatch[0].length
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
// Match italic with underscore
|
|
117
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
|
|
118
|
+
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
119
|
+
// Don't match if it's part of underline (__)
|
|
120
|
+
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
type: 'italic',
|
|
125
|
+
content: italicUnderscoreMatch[1],
|
|
126
|
+
start: start,
|
|
127
|
+
end: start + italicUnderscoreMatch[0].length
|
|
128
|
+
};
|
|
129
|
+
}
|
|
130
|
+
// Match link
|
|
131
|
+
const linkMatch = remaining.match(/^\[([^\]]+?)\]\(([^)]+?)\)/);
|
|
132
|
+
if (linkMatch) {
|
|
133
|
+
return {
|
|
134
|
+
type: 'link',
|
|
135
|
+
content: linkMatch[1],
|
|
136
|
+
start: start,
|
|
137
|
+
end: start + linkMatch[0].length,
|
|
138
|
+
language: linkMatch[2]
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
return null;
|
|
142
|
+
}
|
|
143
|
+
isInsideCodeBlock(text, position) {
|
|
144
|
+
// Check for code blocks
|
|
145
|
+
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
146
|
+
let match;
|
|
147
|
+
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
148
|
+
if (position > match.index && position < match.index + match[0].length) {
|
|
149
|
+
// But allow matching the closing ``` itself
|
|
150
|
+
if (position >= match.index + match[0].length - 3) {
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
153
|
+
return true;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
return false;
|
|
157
|
+
}
|
|
158
|
+
isInsideInlineCode(text, position) {
|
|
159
|
+
// Check for inline code
|
|
160
|
+
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
161
|
+
let match;
|
|
162
|
+
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
163
|
+
if (position > match.index && position < match.index + match[0].length) {
|
|
164
|
+
// But allow matching the closing ` itself
|
|
165
|
+
if (position === match.index + match[0].length - 1) {
|
|
166
|
+
return false;
|
|
167
|
+
}
|
|
168
|
+
return true;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Escapes HTML special characters (but not double-escape)
|
|
177
|
+
*/
|
|
178
|
+
function escapeHtml(text) {
|
|
179
|
+
if (!text)
|
|
180
|
+
return text;
|
|
181
|
+
// Replace & first (but not if it's already an entity)
|
|
182
|
+
let result = text.replace(/&(?!#?\w+;)/g, '&');
|
|
183
|
+
result = result.replace(/</g, '<');
|
|
184
|
+
result = result.replace(/>/g, '>');
|
|
185
|
+
result = result.replace(/"/g, '"');
|
|
186
|
+
result = result.replace(/'/g, ''');
|
|
187
|
+
return result;
|
|
188
|
+
}
|
|
189
|
+
/**
|
|
190
|
+
* Escapes Telegram HTML special characters
|
|
191
|
+
*/
|
|
192
|
+
function escapeTelegramHtml(text) {
|
|
193
|
+
if (!text)
|
|
194
|
+
return text;
|
|
195
|
+
// For Telegram, we only need to escape &, <, >, and "
|
|
196
|
+
let result = text.replace(/&(?!#?\w+;)/g, '&');
|
|
197
|
+
result = result.replace(/</g, '<');
|
|
198
|
+
result = result.replace(/>/g, '>');
|
|
199
|
+
result = result.replace(/"/g, '"');
|
|
200
|
+
return result;
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Appends missing code block delimiters
|
|
204
|
+
*/
|
|
205
|
+
function autoCloseCodeBlocks(text) {
|
|
206
|
+
// Count triple backticks
|
|
207
|
+
const tripleBacktickCount = (text.match(/```/g) || []).length;
|
|
208
|
+
// If odd number, add closing backticks
|
|
209
|
+
if (tripleBacktickCount % 2 === 1) {
|
|
210
|
+
return text + '\n```';
|
|
211
|
+
}
|
|
212
|
+
return text;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
class MarkdownConverter {
|
|
216
|
+
constructor(options = {}) {
|
|
217
|
+
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
218
|
+
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
219
|
+
this.options = {
|
|
220
|
+
escapeHtml: options.escapeHtml ?? true,
|
|
221
|
+
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
222
|
+
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
223
|
+
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
/**
|
|
227
|
+
* Convert markdown text to Telegram HTML
|
|
228
|
+
*/
|
|
229
|
+
convert(text) {
|
|
230
|
+
// Auto-close code blocks if enabled
|
|
231
|
+
let processedText = this.options.autoCloseCodeBlocks
|
|
232
|
+
? autoCloseCodeBlocks(text)
|
|
233
|
+
: text;
|
|
234
|
+
// First pass: convert blockquotes (they should be at line starts)
|
|
235
|
+
processedText = this.preprocessBlockquotes(processedText);
|
|
236
|
+
// Convert the text recursively
|
|
237
|
+
let result = this.convertRecursive(processedText);
|
|
238
|
+
// Process blockquote markers
|
|
239
|
+
result = this.processBlockquoteMarkers(result);
|
|
240
|
+
// Only trim if there's actual content (not just whitespace)
|
|
241
|
+
if (result.trim() === '') {
|
|
242
|
+
return text; // Return original text (spaces) if result is empty
|
|
243
|
+
}
|
|
244
|
+
return result.trim();
|
|
245
|
+
}
|
|
246
|
+
/**
|
|
247
|
+
* Recursively convert markdown, handling nested styles
|
|
248
|
+
*/
|
|
249
|
+
convertRecursive(text, depth = 0) {
|
|
250
|
+
if (depth > 10)
|
|
251
|
+
return text; // Prevent infinite recursion
|
|
252
|
+
// Tokenize the text
|
|
253
|
+
const tokenizer = new MarkdownTokenizer(text);
|
|
254
|
+
const tokens = tokenizer.tokenize();
|
|
255
|
+
// If no tokens found, return the text as-is (with HTML escaping)
|
|
256
|
+
if (tokens.length === 0) {
|
|
257
|
+
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
258
|
+
}
|
|
259
|
+
let result = '';
|
|
260
|
+
let lastPos = 0;
|
|
261
|
+
for (const token of tokens) {
|
|
262
|
+
// Add text before token
|
|
263
|
+
if (token.start > lastPos) {
|
|
264
|
+
const textBefore = text.slice(lastPos, token.start);
|
|
265
|
+
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
266
|
+
}
|
|
267
|
+
// Handle code blocks specially (no recursive parsing inside)
|
|
268
|
+
if (token.type === 'code_block') {
|
|
269
|
+
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
270
|
+
result += this.wrapToken(token.type, codeContent, token.language);
|
|
271
|
+
lastPos = token.end;
|
|
272
|
+
continue;
|
|
273
|
+
}
|
|
274
|
+
// Handle inline code specially (no recursive parsing inside)
|
|
275
|
+
if (token.type === 'inline_code') {
|
|
276
|
+
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
277
|
+
result += `<code>${codeContent}</code>`;
|
|
278
|
+
lastPos = token.end;
|
|
279
|
+
continue;
|
|
280
|
+
}
|
|
281
|
+
// Process other token content recursively
|
|
282
|
+
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
283
|
+
// Wrap the content in appropriate HTML tags
|
|
284
|
+
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
285
|
+
lastPos = token.end;
|
|
286
|
+
}
|
|
287
|
+
// Add remaining text
|
|
288
|
+
if (lastPos < text.length) {
|
|
289
|
+
const remainingText = text.slice(lastPos);
|
|
290
|
+
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
291
|
+
}
|
|
292
|
+
return result;
|
|
293
|
+
}
|
|
294
|
+
/**
|
|
295
|
+
* Wrap token content in HTML tags
|
|
296
|
+
*/
|
|
297
|
+
wrapToken(type, content, language) {
|
|
298
|
+
switch (type) {
|
|
299
|
+
case 'bold':
|
|
300
|
+
return `<b>${content}</b>`;
|
|
301
|
+
case 'italic':
|
|
302
|
+
return `<i>${content}</i>`;
|
|
303
|
+
case 'underline':
|
|
304
|
+
return `<u>${content}</u>`;
|
|
305
|
+
case 'strikethrough':
|
|
306
|
+
return `<s>${content}</s>`;
|
|
307
|
+
case 'spoiler':
|
|
308
|
+
return `<span class="tg-spoiler">${content}</span>`;
|
|
309
|
+
case 'inline_code':
|
|
310
|
+
// Already handled above
|
|
311
|
+
return `<code>${content}</code>`;
|
|
312
|
+
case 'code_block':
|
|
313
|
+
// Already handled above, but handle custom processor
|
|
314
|
+
if (this.hasCustomCodeBlockProcessor) {
|
|
315
|
+
return this.options.codeBlockProcessor(content, language);
|
|
316
|
+
}
|
|
317
|
+
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
318
|
+
const langAttr = language ? ` class="language-${language}"` : '';
|
|
319
|
+
return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
|
|
320
|
+
case 'link':
|
|
321
|
+
const url = language || '';
|
|
322
|
+
if (this.hasCustomLinkProcessor) {
|
|
323
|
+
return this.options.linkProcessor(url, content);
|
|
324
|
+
}
|
|
325
|
+
const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
|
|
326
|
+
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
327
|
+
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
328
|
+
case 'quote':
|
|
329
|
+
return `\n<blockquote>${content.trim()}</blockquote>\n`;
|
|
330
|
+
case 'expandable_quote':
|
|
331
|
+
return `\n<blockquote expandable>${content.trim()}</blockquote>\n`;
|
|
332
|
+
default:
|
|
333
|
+
return content;
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
/**
|
|
337
|
+
* Preprocess blockquotes to mark them before other parsing
|
|
338
|
+
*/
|
|
339
|
+
preprocessBlockquotes(text) {
|
|
340
|
+
const lines = text.split('\n');
|
|
341
|
+
const processedLines = [];
|
|
342
|
+
for (const line of lines) {
|
|
343
|
+
const trimmedLine = line.trim();
|
|
344
|
+
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
345
|
+
if (trimmedLine.startsWith('**>')) {
|
|
346
|
+
// Expandable blockquote
|
|
347
|
+
const content = trimmedLine.substring(3).trim();
|
|
348
|
+
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
349
|
+
}
|
|
350
|
+
else if (trimmedLine.startsWith('>')) {
|
|
351
|
+
// Regular blockquote
|
|
352
|
+
const content = trimmedLine.substring(1).trim();
|
|
353
|
+
processedLines.push(`[QUOTE]${content}`);
|
|
354
|
+
}
|
|
355
|
+
else {
|
|
356
|
+
processedLines.push(line);
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
return processedLines.join('\n');
|
|
360
|
+
}
|
|
361
|
+
/**
|
|
362
|
+
* Process blockquote markers
|
|
363
|
+
*/
|
|
364
|
+
processBlockquoteMarkers(text) {
|
|
365
|
+
let result = text;
|
|
366
|
+
// Replace expandable quote markers (process content recursively)
|
|
367
|
+
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
368
|
+
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
369
|
+
const processedContent = this.convertRecursive(content);
|
|
370
|
+
return `\n<blockquote expandable>${processedContent.trim()}</blockquote>\n`;
|
|
371
|
+
});
|
|
372
|
+
// Replace regular quote markers (process content recursively)
|
|
373
|
+
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
374
|
+
result = result.replace(quoteRegex, (match, content) => {
|
|
375
|
+
const processedContent = this.convertRecursive(content);
|
|
376
|
+
return `\n<blockquote>${processedContent.trim()}</blockquote>\n`;
|
|
377
|
+
});
|
|
378
|
+
return result;
|
|
379
|
+
}
|
|
380
|
+
defaultLinkProcessor(url, text) {
|
|
381
|
+
const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
|
|
382
|
+
const escapedText = this.options.escapeHtml ? escapeHtml(text) : text;
|
|
383
|
+
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
384
|
+
}
|
|
385
|
+
defaultCodeBlockProcessor(code, language) {
|
|
386
|
+
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
387
|
+
const langAttr = language ? ` class="language-${language}"` : '';
|
|
388
|
+
return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/**
|
|
393
|
+
* Convert Telegram-style Markdown to HTML
|
|
394
|
+
* @param text - Markdown text to convert
|
|
395
|
+
* @param options - Conversion options
|
|
396
|
+
* @returns Telegram-compatible HTML
|
|
397
|
+
*/
|
|
398
|
+
function markdownToHtml(text, options) {
|
|
399
|
+
const converter = new MarkdownConverter(options);
|
|
400
|
+
return converter.convert(text);
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Create a converter instance with custom options
|
|
404
|
+
*/
|
|
405
|
+
function createConverter(options) {
|
|
406
|
+
return new MarkdownConverter(options);
|
|
407
|
+
}
|
|
408
|
+
var index = {
|
|
409
|
+
markdownToHtml,
|
|
410
|
+
createConverter,
|
|
411
|
+
MarkdownConverter
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
exports.MarkdownConverter = MarkdownConverter;
|
|
415
|
+
exports.createConverter = createConverter;
|
|
416
|
+
exports.default = index;
|
|
417
|
+
exports.markdownToHtml = markdownToHtml;
|
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { MarkdownConverter } from './converter.js';
|
|
2
|
+
import { ConvertOptions } from './types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Convert Telegram-style Markdown to HTML
|
|
5
|
+
* @param text - Markdown text to convert
|
|
6
|
+
* @param options - Conversion options
|
|
7
|
+
* @returns Telegram-compatible HTML
|
|
8
|
+
*/
|
|
9
|
+
export declare function markdownToHtml(text: string, options?: ConvertOptions): string;
|
|
10
|
+
/**
|
|
11
|
+
* Create a converter instance with custom options
|
|
12
|
+
*/
|
|
13
|
+
export declare function createConverter(options?: ConvertOptions): MarkdownConverter;
|
|
14
|
+
export { MarkdownConverter } from './converter.js';
|
|
15
|
+
export type { ConvertOptions, Token } from './types.js';
|
|
16
|
+
declare const _default: {
|
|
17
|
+
markdownToHtml: typeof markdownToHtml;
|
|
18
|
+
createConverter: typeof createConverter;
|
|
19
|
+
MarkdownConverter: typeof MarkdownConverter;
|
|
20
|
+
};
|
|
21
|
+
export default _default;
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { MarkdownConverter } from './converter.js';
|
|
2
|
+
import { ConvertOptions } from './types.js';
|
|
3
|
+
/**
|
|
4
|
+
* Convert Telegram-style Markdown to HTML
|
|
5
|
+
* @param text - Markdown text to convert
|
|
6
|
+
* @param options - Conversion options
|
|
7
|
+
* @returns Telegram-compatible HTML
|
|
8
|
+
*/
|
|
9
|
+
export declare function markdownToHtml(text: string, options?: ConvertOptions): string;
|
|
10
|
+
/**
|
|
11
|
+
* Create a converter instance with custom options
|
|
12
|
+
*/
|
|
13
|
+
export declare function createConverter(options?: ConvertOptions): MarkdownConverter;
|
|
14
|
+
export { MarkdownConverter } from './converter.js';
|
|
15
|
+
export type { ConvertOptions, Token } from './types.js';
|
|
16
|
+
declare const _default: {
|
|
17
|
+
markdownToHtml: typeof markdownToHtml;
|
|
18
|
+
createConverter: typeof createConverter;
|
|
19
|
+
MarkdownConverter: typeof MarkdownConverter;
|
|
20
|
+
};
|
|
21
|
+
export default _default;
|
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
class MarkdownTokenizer {
|
|
2
|
+
constructor(text) {
|
|
3
|
+
this.text = text;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Tokenize the markdown text
|
|
7
|
+
*/
|
|
8
|
+
tokenize() {
|
|
9
|
+
const tokens = [];
|
|
10
|
+
let pos = 0;
|
|
11
|
+
const text = this.text;
|
|
12
|
+
while (pos < text.length) {
|
|
13
|
+
// Skip if inside code block
|
|
14
|
+
if (this.isInsideCodeBlock(text, pos)) {
|
|
15
|
+
pos++;
|
|
16
|
+
continue;
|
|
17
|
+
}
|
|
18
|
+
// Try to match each token type (from outermost to innermost)
|
|
19
|
+
const token = this.matchToken(pos);
|
|
20
|
+
if (token) {
|
|
21
|
+
tokens.push(token);
|
|
22
|
+
pos = token.end;
|
|
23
|
+
}
|
|
24
|
+
else {
|
|
25
|
+
pos++;
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
return tokens.sort((a, b) => a.start - b.start);
|
|
29
|
+
}
|
|
30
|
+
matchToken(start) {
|
|
31
|
+
const text = this.text;
|
|
32
|
+
const remaining = text.slice(start);
|
|
33
|
+
// Skip if we're inside a quote marker
|
|
34
|
+
if (remaining.startsWith('[QUOTE]') || remaining.startsWith('[EXPANDABLE_QUOTE]')) {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
// Match code block (triple backticks) - highest priority
|
|
38
|
+
const codeBlockMatch = remaining.match(/^```(\w+)?\n([\s\S]*?)```/);
|
|
39
|
+
if (codeBlockMatch) {
|
|
40
|
+
return {
|
|
41
|
+
type: 'code_block',
|
|
42
|
+
content: codeBlockMatch[2],
|
|
43
|
+
language: codeBlockMatch[1],
|
|
44
|
+
start: start,
|
|
45
|
+
end: start + codeBlockMatch[0].length
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
// Match inline code
|
|
49
|
+
const inlineCodeMatch = remaining.match(/^`([^`\n]+)`/);
|
|
50
|
+
if (inlineCodeMatch && !this.isInsideInlineCode(text, start)) {
|
|
51
|
+
return {
|
|
52
|
+
type: 'inline_code',
|
|
53
|
+
content: inlineCodeMatch[1],
|
|
54
|
+
start: start,
|
|
55
|
+
end: start + inlineCodeMatch[0].length
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
// Match spoiler
|
|
59
|
+
const spoilerMatch = remaining.match(/^\|\|([^|\n]+?)\|\|/);
|
|
60
|
+
if (spoilerMatch) {
|
|
61
|
+
return {
|
|
62
|
+
type: 'spoiler',
|
|
63
|
+
content: spoilerMatch[1],
|
|
64
|
+
start: start,
|
|
65
|
+
end: start + spoilerMatch[0].length
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// Match strikethrough
|
|
69
|
+
const strikethroughMatch = remaining.match(/^~~([^~\n]+?)~~/);
|
|
70
|
+
if (strikethroughMatch) {
|
|
71
|
+
return {
|
|
72
|
+
type: 'strikethrough',
|
|
73
|
+
content: strikethroughMatch[1],
|
|
74
|
+
start: start,
|
|
75
|
+
end: start + strikethroughMatch[0].length
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
// Match bold
|
|
79
|
+
const boldMatch = remaining.match(/^\*\*([^*\n]+?)\*\*/);
|
|
80
|
+
if (boldMatch) {
|
|
81
|
+
return {
|
|
82
|
+
type: 'bold',
|
|
83
|
+
content: boldMatch[1],
|
|
84
|
+
start: start,
|
|
85
|
+
end: start + boldMatch[0].length
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
// Match underline
|
|
89
|
+
const underlineMatch = remaining.match(/^__([^_\n]+?)__/);
|
|
90
|
+
if (underlineMatch) {
|
|
91
|
+
return {
|
|
92
|
+
type: 'underline',
|
|
93
|
+
content: underlineMatch[1],
|
|
94
|
+
start: start,
|
|
95
|
+
end: start + underlineMatch[0].length
|
|
96
|
+
};
|
|
97
|
+
}
|
|
98
|
+
// Match italic with asterisk
|
|
99
|
+
const italicAsteriskMatch = remaining.match(/^\*([^*\n][^*]*?)\*/);
|
|
100
|
+
if (italicAsteriskMatch && italicAsteriskMatch[1].trim().length > 0) {
|
|
101
|
+
// Don't match if it's part of bold (**)
|
|
102
|
+
if (start > 0 && text[start - 1] === '*' && start < text.length - 1 && text[start + 1] === '*') {
|
|
103
|
+
return null;
|
|
104
|
+
}
|
|
105
|
+
return {
|
|
106
|
+
type: 'italic',
|
|
107
|
+
content: italicAsteriskMatch[1],
|
|
108
|
+
start: start,
|
|
109
|
+
end: start + italicAsteriskMatch[0].length
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
// Match italic with underscore
|
|
113
|
+
const italicUnderscoreMatch = remaining.match(/^_([^_\n]+?)_/);
|
|
114
|
+
if (italicUnderscoreMatch && italicUnderscoreMatch[1].trim().length > 0) {
|
|
115
|
+
// Don't match if it's part of underline (__)
|
|
116
|
+
if (start > 0 && text[start - 1] === '_' && start < text.length - 1 && text[start + 1] === '_') {
|
|
117
|
+
return null;
|
|
118
|
+
}
|
|
119
|
+
return {
|
|
120
|
+
type: 'italic',
|
|
121
|
+
content: italicUnderscoreMatch[1],
|
|
122
|
+
start: start,
|
|
123
|
+
end: start + italicUnderscoreMatch[0].length
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
// Match link
|
|
127
|
+
const linkMatch = remaining.match(/^\[([^\]]+?)\]\(([^)]+?)\)/);
|
|
128
|
+
if (linkMatch) {
|
|
129
|
+
return {
|
|
130
|
+
type: 'link',
|
|
131
|
+
content: linkMatch[1],
|
|
132
|
+
start: start,
|
|
133
|
+
end: start + linkMatch[0].length,
|
|
134
|
+
language: linkMatch[2]
|
|
135
|
+
};
|
|
136
|
+
}
|
|
137
|
+
return null;
|
|
138
|
+
}
|
|
139
|
+
isInsideCodeBlock(text, position) {
|
|
140
|
+
// Check for code blocks
|
|
141
|
+
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
142
|
+
let match;
|
|
143
|
+
while ((match = codeBlockRegex.exec(text)) !== null) {
|
|
144
|
+
if (position > match.index && position < match.index + match[0].length) {
|
|
145
|
+
// But allow matching the closing ``` itself
|
|
146
|
+
if (position >= match.index + match[0].length - 3) {
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
return true;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
isInsideInlineCode(text, position) {
|
|
155
|
+
// Check for inline code
|
|
156
|
+
const inlineCodeRegex = /`[^`\n]*`/g;
|
|
157
|
+
let match;
|
|
158
|
+
while ((match = inlineCodeRegex.exec(text)) !== null) {
|
|
159
|
+
if (position > match.index && position < match.index + match[0].length) {
|
|
160
|
+
// But allow matching the closing ` itself
|
|
161
|
+
if (position === match.index + match[0].length - 1) {
|
|
162
|
+
return false;
|
|
163
|
+
}
|
|
164
|
+
return true;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return false;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* Escapes HTML special characters (but not double-escape)
|
|
173
|
+
*/
|
|
174
|
+
function escapeHtml(text) {
|
|
175
|
+
if (!text)
|
|
176
|
+
return text;
|
|
177
|
+
// Replace & first (but not if it's already an entity)
|
|
178
|
+
let result = text.replace(/&(?!#?\w+;)/g, '&');
|
|
179
|
+
result = result.replace(/</g, '<');
|
|
180
|
+
result = result.replace(/>/g, '>');
|
|
181
|
+
result = result.replace(/"/g, '"');
|
|
182
|
+
result = result.replace(/'/g, ''');
|
|
183
|
+
return result;
|
|
184
|
+
}
|
|
185
|
+
/**
|
|
186
|
+
* Escapes Telegram HTML special characters
|
|
187
|
+
*/
|
|
188
|
+
function escapeTelegramHtml(text) {
|
|
189
|
+
if (!text)
|
|
190
|
+
return text;
|
|
191
|
+
// For Telegram, we only need to escape &, <, >, and "
|
|
192
|
+
let result = text.replace(/&(?!#?\w+;)/g, '&');
|
|
193
|
+
result = result.replace(/</g, '<');
|
|
194
|
+
result = result.replace(/>/g, '>');
|
|
195
|
+
result = result.replace(/"/g, '"');
|
|
196
|
+
return result;
|
|
197
|
+
}
|
|
198
|
+
/**
|
|
199
|
+
* Appends missing code block delimiters
|
|
200
|
+
*/
|
|
201
|
+
function autoCloseCodeBlocks(text) {
|
|
202
|
+
// Count triple backticks
|
|
203
|
+
const tripleBacktickCount = (text.match(/```/g) || []).length;
|
|
204
|
+
// If odd number, add closing backticks
|
|
205
|
+
if (tripleBacktickCount % 2 === 1) {
|
|
206
|
+
return text + '\n```';
|
|
207
|
+
}
|
|
208
|
+
return text;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
class MarkdownConverter {
|
|
212
|
+
constructor(options = {}) {
|
|
213
|
+
this.hasCustomLinkProcessor = !!options.linkProcessor;
|
|
214
|
+
this.hasCustomCodeBlockProcessor = !!options.codeBlockProcessor;
|
|
215
|
+
this.options = {
|
|
216
|
+
escapeHtml: options.escapeHtml ?? true,
|
|
217
|
+
autoCloseCodeBlocks: options.autoCloseCodeBlocks ?? true,
|
|
218
|
+
linkProcessor: options.linkProcessor || this.defaultLinkProcessor.bind(this),
|
|
219
|
+
codeBlockProcessor: options.codeBlockProcessor || this.defaultCodeBlockProcessor.bind(this)
|
|
220
|
+
};
|
|
221
|
+
}
|
|
222
|
+
/**
|
|
223
|
+
* Convert markdown text to Telegram HTML
|
|
224
|
+
*/
|
|
225
|
+
convert(text) {
|
|
226
|
+
// Auto-close code blocks if enabled
|
|
227
|
+
let processedText = this.options.autoCloseCodeBlocks
|
|
228
|
+
? autoCloseCodeBlocks(text)
|
|
229
|
+
: text;
|
|
230
|
+
// First pass: convert blockquotes (they should be at line starts)
|
|
231
|
+
processedText = this.preprocessBlockquotes(processedText);
|
|
232
|
+
// Convert the text recursively
|
|
233
|
+
let result = this.convertRecursive(processedText);
|
|
234
|
+
// Process blockquote markers
|
|
235
|
+
result = this.processBlockquoteMarkers(result);
|
|
236
|
+
// Only trim if there's actual content (not just whitespace)
|
|
237
|
+
if (result.trim() === '') {
|
|
238
|
+
return text; // Return original text (spaces) if result is empty
|
|
239
|
+
}
|
|
240
|
+
return result.trim();
|
|
241
|
+
}
|
|
242
|
+
/**
|
|
243
|
+
* Recursively convert markdown, handling nested styles
|
|
244
|
+
*/
|
|
245
|
+
convertRecursive(text, depth = 0) {
|
|
246
|
+
if (depth > 10)
|
|
247
|
+
return text; // Prevent infinite recursion
|
|
248
|
+
// Tokenize the text
|
|
249
|
+
const tokenizer = new MarkdownTokenizer(text);
|
|
250
|
+
const tokens = tokenizer.tokenize();
|
|
251
|
+
// If no tokens found, return the text as-is (with HTML escaping)
|
|
252
|
+
if (tokens.length === 0) {
|
|
253
|
+
return this.options.escapeHtml ? escapeTelegramHtml(text) : text;
|
|
254
|
+
}
|
|
255
|
+
let result = '';
|
|
256
|
+
let lastPos = 0;
|
|
257
|
+
for (const token of tokens) {
|
|
258
|
+
// Add text before token
|
|
259
|
+
if (token.start > lastPos) {
|
|
260
|
+
const textBefore = text.slice(lastPos, token.start);
|
|
261
|
+
result += this.options.escapeHtml ? escapeTelegramHtml(textBefore) : textBefore;
|
|
262
|
+
}
|
|
263
|
+
// Handle code blocks specially (no recursive parsing inside)
|
|
264
|
+
if (token.type === 'code_block') {
|
|
265
|
+
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
266
|
+
result += this.wrapToken(token.type, codeContent, token.language);
|
|
267
|
+
lastPos = token.end;
|
|
268
|
+
continue;
|
|
269
|
+
}
|
|
270
|
+
// Handle inline code specially (no recursive parsing inside)
|
|
271
|
+
if (token.type === 'inline_code') {
|
|
272
|
+
const codeContent = this.options.escapeHtml ? escapeHtml(token.content) : token.content;
|
|
273
|
+
result += `<code>${codeContent}</code>`;
|
|
274
|
+
lastPos = token.end;
|
|
275
|
+
continue;
|
|
276
|
+
}
|
|
277
|
+
// Process other token content recursively
|
|
278
|
+
const tokenContent = this.convertRecursive(token.content, depth + 1);
|
|
279
|
+
// Wrap the content in appropriate HTML tags
|
|
280
|
+
result += this.wrapToken(token.type, tokenContent, token.language);
|
|
281
|
+
lastPos = token.end;
|
|
282
|
+
}
|
|
283
|
+
// Add remaining text
|
|
284
|
+
if (lastPos < text.length) {
|
|
285
|
+
const remainingText = text.slice(lastPos);
|
|
286
|
+
result += this.options.escapeHtml ? escapeTelegramHtml(remainingText) : remainingText;
|
|
287
|
+
}
|
|
288
|
+
return result;
|
|
289
|
+
}
|
|
290
|
+
/**
|
|
291
|
+
* Wrap token content in HTML tags
|
|
292
|
+
*/
|
|
293
|
+
wrapToken(type, content, language) {
|
|
294
|
+
switch (type) {
|
|
295
|
+
case 'bold':
|
|
296
|
+
return `<b>${content}</b>`;
|
|
297
|
+
case 'italic':
|
|
298
|
+
return `<i>${content}</i>`;
|
|
299
|
+
case 'underline':
|
|
300
|
+
return `<u>${content}</u>`;
|
|
301
|
+
case 'strikethrough':
|
|
302
|
+
return `<s>${content}</s>`;
|
|
303
|
+
case 'spoiler':
|
|
304
|
+
return `<span class="tg-spoiler">${content}</span>`;
|
|
305
|
+
case 'inline_code':
|
|
306
|
+
// Already handled above
|
|
307
|
+
return `<code>${content}</code>`;
|
|
308
|
+
case 'code_block':
|
|
309
|
+
// Already handled above, but handle custom processor
|
|
310
|
+
if (this.hasCustomCodeBlockProcessor) {
|
|
311
|
+
return this.options.codeBlockProcessor(content, language);
|
|
312
|
+
}
|
|
313
|
+
const escapedCode = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
314
|
+
const langAttr = language ? ` class="language-${language}"` : '';
|
|
315
|
+
return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
|
|
316
|
+
case 'link':
|
|
317
|
+
const url = language || '';
|
|
318
|
+
if (this.hasCustomLinkProcessor) {
|
|
319
|
+
return this.options.linkProcessor(url, content);
|
|
320
|
+
}
|
|
321
|
+
const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
|
|
322
|
+
const escapedText = this.options.escapeHtml ? escapeHtml(content) : content;
|
|
323
|
+
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
324
|
+
case 'quote':
|
|
325
|
+
return `\n<blockquote>${content.trim()}</blockquote>\n`;
|
|
326
|
+
case 'expandable_quote':
|
|
327
|
+
return `\n<blockquote expandable>${content.trim()}</blockquote>\n`;
|
|
328
|
+
default:
|
|
329
|
+
return content;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
/**
|
|
333
|
+
* Preprocess blockquotes to mark them before other parsing
|
|
334
|
+
*/
|
|
335
|
+
preprocessBlockquotes(text) {
|
|
336
|
+
const lines = text.split('\n');
|
|
337
|
+
const processedLines = [];
|
|
338
|
+
for (const line of lines) {
|
|
339
|
+
const trimmedLine = line.trim();
|
|
340
|
+
// Only treat lines starting with > at the beginning of line as blockquotes
|
|
341
|
+
if (trimmedLine.startsWith('**>')) {
|
|
342
|
+
// Expandable blockquote
|
|
343
|
+
const content = trimmedLine.substring(3).trim();
|
|
344
|
+
processedLines.push(`[EXPANDABLE_QUOTE]${content}`);
|
|
345
|
+
}
|
|
346
|
+
else if (trimmedLine.startsWith('>')) {
|
|
347
|
+
// Regular blockquote
|
|
348
|
+
const content = trimmedLine.substring(1).trim();
|
|
349
|
+
processedLines.push(`[QUOTE]${content}`);
|
|
350
|
+
}
|
|
351
|
+
else {
|
|
352
|
+
processedLines.push(line);
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
return processedLines.join('\n');
|
|
356
|
+
}
|
|
357
|
+
/**
|
|
358
|
+
* Process blockquote markers
|
|
359
|
+
*/
|
|
360
|
+
processBlockquoteMarkers(text) {
|
|
361
|
+
let result = text;
|
|
362
|
+
// Replace expandable quote markers (process content recursively)
|
|
363
|
+
const expandableQuoteRegex = /\[EXPANDABLE_QUOTE\](.*?)(?=\n|$)/g;
|
|
364
|
+
result = result.replace(expandableQuoteRegex, (match, content) => {
|
|
365
|
+
const processedContent = this.convertRecursive(content);
|
|
366
|
+
return `\n<blockquote expandable>${processedContent.trim()}</blockquote>\n`;
|
|
367
|
+
});
|
|
368
|
+
// Replace regular quote markers (process content recursively)
|
|
369
|
+
const quoteRegex = /\[QUOTE\](.*?)(?=\n|$)/g;
|
|
370
|
+
result = result.replace(quoteRegex, (match, content) => {
|
|
371
|
+
const processedContent = this.convertRecursive(content);
|
|
372
|
+
return `\n<blockquote>${processedContent.trim()}</blockquote>\n`;
|
|
373
|
+
});
|
|
374
|
+
return result;
|
|
375
|
+
}
|
|
376
|
+
defaultLinkProcessor(url, text) {
|
|
377
|
+
const escapedUrl = this.options.escapeHtml ? escapeHtml(url) : url;
|
|
378
|
+
const escapedText = this.options.escapeHtml ? escapeHtml(text) : text;
|
|
379
|
+
return `<a href="${escapedUrl}">${escapedText}</a>`;
|
|
380
|
+
}
|
|
381
|
+
defaultCodeBlockProcessor(code, language) {
|
|
382
|
+
const escapedCode = this.options.escapeHtml ? escapeHtml(code) : code;
|
|
383
|
+
const langAttr = language ? ` class="language-${language}"` : '';
|
|
384
|
+
return `\n<pre><code${langAttr}>${escapedCode}</code></pre>\n`;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Convert Telegram-style Markdown to HTML
|
|
390
|
+
* @param text - Markdown text to convert
|
|
391
|
+
* @param options - Conversion options
|
|
392
|
+
* @returns Telegram-compatible HTML
|
|
393
|
+
*/
|
|
394
|
+
function markdownToHtml(text, options) {
|
|
395
|
+
const converter = new MarkdownConverter(options);
|
|
396
|
+
return converter.convert(text);
|
|
397
|
+
}
|
|
398
|
+
/**
|
|
399
|
+
* Create a converter instance with custom options
|
|
400
|
+
*/
|
|
401
|
+
function createConverter(options) {
|
|
402
|
+
return new MarkdownConverter(options);
|
|
403
|
+
}
|
|
404
|
+
var index = {
|
|
405
|
+
markdownToHtml,
|
|
406
|
+
createConverter,
|
|
407
|
+
MarkdownConverter
|
|
408
|
+
};
|
|
409
|
+
|
|
410
|
+
export { MarkdownConverter, createConverter, index as default, markdownToHtml };
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Token } from './types';
|
|
2
|
+
export declare class MarkdownTokenizer {
|
|
3
|
+
private text;
|
|
4
|
+
constructor(text: string);
|
|
5
|
+
/**
|
|
6
|
+
* Tokenize the markdown text
|
|
7
|
+
*/
|
|
8
|
+
tokenize(): Token[];
|
|
9
|
+
private matchToken;
|
|
10
|
+
private isInsideCodeBlock;
|
|
11
|
+
private isInsideInlineCode;
|
|
12
|
+
}
|
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
export interface ConvertOptions {
|
|
2
|
+
/**
|
|
3
|
+
* Whether to escape HTML special characters
|
|
4
|
+
* @default true
|
|
5
|
+
*/
|
|
6
|
+
escapeHtml?: boolean;
|
|
7
|
+
/**
|
|
8
|
+
* Whether to append missing code block delimiters
|
|
9
|
+
* @default true
|
|
10
|
+
*/
|
|
11
|
+
autoCloseCodeBlocks?: boolean;
|
|
12
|
+
/**
|
|
13
|
+
* Custom replacement function for links
|
|
14
|
+
*/
|
|
15
|
+
linkProcessor?: (url: string, text: string) => string;
|
|
16
|
+
/**
|
|
17
|
+
* Custom replacement function for code blocks
|
|
18
|
+
*/
|
|
19
|
+
codeBlockProcessor?: (code: string, language?: string) => string;
|
|
20
|
+
}
|
|
21
|
+
export interface Token {
|
|
22
|
+
type: string;
|
|
23
|
+
content: string;
|
|
24
|
+
start: number;
|
|
25
|
+
end: number;
|
|
26
|
+
language?: string;
|
|
27
|
+
children?: [];
|
|
28
|
+
}
|
package/dist/utils.d.ts
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escapes HTML special characters (but not double-escape)
|
|
3
|
+
*/
|
|
4
|
+
export declare function escapeHtml(text: string): string;
|
|
5
|
+
/**
|
|
6
|
+
* Escapes Telegram HTML special characters
|
|
7
|
+
*/
|
|
8
|
+
export declare function escapeTelegramHtml(text: string): string;
|
|
9
|
+
/**
|
|
10
|
+
* Checks if a position is inside a code block
|
|
11
|
+
*/
|
|
12
|
+
export declare function isInsideCodeBlock(text: string, position: number): boolean;
|
|
13
|
+
/**
|
|
14
|
+
* Checks if a position is inside inline code
|
|
15
|
+
*/
|
|
16
|
+
export declare function isInsideInlineCode(text: string, position: number): boolean;
|
|
17
|
+
/**
|
|
18
|
+
* Checks if a position is inside any code
|
|
19
|
+
*/
|
|
20
|
+
export declare function isInsideCode(text: string, position: number): boolean;
|
|
21
|
+
/**
|
|
22
|
+
* Appends missing code block delimiters
|
|
23
|
+
*/
|
|
24
|
+
export declare function autoCloseCodeBlocks(text: string): string;
|
package/package.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "telegram-md2html",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "A smart converter for Telegram-style Markdown to Telegram-compatible HTML",
|
|
5
|
+
|
|
6
|
+
"main": "./dist/index.cjs",
|
|
7
|
+
"module": "./dist/index.mjs",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"types": "./dist/index.d.ts",
|
|
13
|
+
"import": "./dist/index.mjs",
|
|
14
|
+
"require": "./dist/index.cjs"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
|
|
18
|
+
"scripts": {
|
|
19
|
+
"build": "rollup -c && npm run build:types",
|
|
20
|
+
"build:types": "cp dist/index.d.ts dist/index.d.mts",
|
|
21
|
+
"dev": "rollup -c -w",
|
|
22
|
+
"test": "node --no-warnings --experimental-vm-modules node_modules/jest/bin/jest.js",
|
|
23
|
+
"test:ci": "npm test -- --coverage",
|
|
24
|
+
"prepublishOnly": "npm run build && npm run test"
|
|
25
|
+
},
|
|
26
|
+
|
|
27
|
+
"keywords": [
|
|
28
|
+
"telegram",
|
|
29
|
+
"markdown",
|
|
30
|
+
"html",
|
|
31
|
+
"converter",
|
|
32
|
+
"bot",
|
|
33
|
+
"messaging"
|
|
34
|
+
],
|
|
35
|
+
|
|
36
|
+
"author": "Soumyadeep Das <soumyadeepdas765@gmail.com>",
|
|
37
|
+
"license": "MIT",
|
|
38
|
+
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@rollup/plugin-commonjs": "^25.0.7",
|
|
41
|
+
"@rollup/plugin-node-resolve": "^15.2.3",
|
|
42
|
+
"@rollup/plugin-terser": "^0.4.4",
|
|
43
|
+
"@rollup/plugin-typescript": "^11.1.5",
|
|
44
|
+
"@types/jest": "^29.5.8",
|
|
45
|
+
"jest": "^29.7.0",
|
|
46
|
+
"rollup": "^4.9.0",
|
|
47
|
+
"ts-jest": "^29.1.1",
|
|
48
|
+
"tslib": "^2.6.2",
|
|
49
|
+
"typescript": "^5.3.3"
|
|
50
|
+
},
|
|
51
|
+
|
|
52
|
+
"files": [
|
|
53
|
+
"dist",
|
|
54
|
+
"README.md",
|
|
55
|
+
"LICENSE"
|
|
56
|
+
],
|
|
57
|
+
|
|
58
|
+
"engines": {
|
|
59
|
+
"node": ">=14.0.0"
|
|
60
|
+
}
|
|
61
|
+
}
|