tellegram 1.0.0 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -8
- package/index.mjs +5 -0
- package/lib/convert.mjs +25 -0
- package/lib/{definitions.js → definitions.mjs} +7 -9
- package/lib/paginate.mjs +117 -0
- package/lib/tellegram.mjs +158 -0
- package/lib/utils.mjs +74 -0
- package/package.json +71 -70
- package/types/index.d.ts +19 -13
- package/index.js +0 -2
- package/lib/convert.js +0 -25
- package/lib/telegramify.js +0 -158
- package/lib/utils.js +0 -76
package/README.md
CHANGED
|
@@ -1,11 +1,23 @@
|
|
|
1
1
|
# teLLegraM
|
|
2
2
|
|
|
3
|
-
[](https://github.com/leask/tellegram/actions)
|
|
4
|
+
[](https://codecov.io/gh/leask/tellegram)
|
|
5
|
+

|
|
6
6
|
|
|
7
7
|
teLLegraM is a library designed to format LLM (Large Language Model) generated text into [Telegram-specific-markdown (MarkdownV2)](https://core.telegram.org/bots/api#formatting-options), based on [Unified](https://github.com/unifiedjs/unified) and [Remark](https://github.com/remarkjs/remark/). It ensures that complex markdown from AI responses is perfectly interpreted by Telegram clients.
|
|
8
8
|
|
|
9
|
+
## Acknowledgements
|
|
10
|
+
|
|
11
|
+
This project is based on [telegramify-markdown](https://github.com/skoropadas/telegramify-markdown) but has been evolved to specifically address the needs of LLM-generated content.
|
|
12
|
+
|
|
13
|
+
## Why teLLegraM?
|
|
14
|
+
|
|
15
|
+
While the original library provided a solid foundation, teLLegraM introduces several key optimizations for the "LLM to Telegram" workflow:
|
|
16
|
+
|
|
17
|
+
1. **LLM-Specific Optimizations**: Tailored handling of common artifacts found in AI responses, ensuring cleaner output.
|
|
18
|
+
2. **Lossless Pagination**: Telegram has strict message length limits. teLLegraM includes a smart pagination feature that splits long text into multiple messages *without* breaking MarkdownV2 syntax. It ensures bold, italic, or code blocks are correctly closed in one message and reopened in the next, preventing "unclosed entity" errors.
|
|
19
|
+
3. **Strict MarkdownV2 Compliance**: Enhanced escaping rules to handle edge cases often produced by generative models.
|
|
20
|
+
|
|
9
21
|
## Install
|
|
10
22
|
|
|
11
23
|
```bash
|
|
@@ -14,8 +26,13 @@ npm install tellegram
|
|
|
14
26
|
|
|
15
27
|
## Usage
|
|
16
28
|
|
|
29
|
+
### Basic Conversion
|
|
30
|
+
|
|
17
31
|
```js
|
|
18
|
-
|
|
32
|
+
import tellegram from 'tellegram';
|
|
33
|
+
// OR
|
|
34
|
+
// import { convert } from 'tellegram';
|
|
35
|
+
|
|
19
36
|
const markdown = `
|
|
20
37
|
# Header
|
|
21
38
|
## Subheader
|
|
@@ -29,11 +46,12 @@ const markdown = `
|
|
|
29
46
|
And simple text with + some - symbols.
|
|
30
47
|
`;
|
|
31
48
|
|
|
32
|
-
|
|
49
|
+
const result = tellegram(markdown);
|
|
50
|
+
console.log(result);
|
|
33
51
|
/*
|
|
34
52
|
*Header*
|
|
35
53
|
*Subheader*
|
|
36
|
-
|
|
54
|
+
|
|
37
55
|
[1\.0\.0](http://version.com)
|
|
38
56
|
|
|
39
57
|
• item 1
|
|
@@ -44,13 +62,31 @@ And simple text with \+ some \- symbols\.
|
|
|
44
62
|
*/
|
|
45
63
|
```
|
|
46
64
|
|
|
65
|
+
### Pagination (Handling Long Messages)
|
|
66
|
+
|
|
67
|
+
When dealing with verbose LLM outputs, use the `paginate` function to safely split text into chunks that respect Telegram's limits (4096 characters) while preserving formatting context.
|
|
68
|
+
|
|
69
|
+
```js
|
|
70
|
+
import { paginate } from 'tellegram';
|
|
71
|
+
|
|
72
|
+
const longLlmOutput = `... extremely long text with **markdown** ...`;
|
|
73
|
+
|
|
74
|
+
// Split into an array of strings, each safe to send
|
|
75
|
+
const messages = paginate(longLlmOutput);
|
|
76
|
+
|
|
77
|
+
for (const msg of messages) {
|
|
78
|
+
// Send each part sequentially
|
|
79
|
+
await bot.sendMessage(chatId, msg, { parse_mode: 'MarkdownV2' });
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
|
|
47
83
|
## Possible options
|
|
48
84
|
|
|
49
85
|
You can also add unsupported tags strategy as a second argument, which can be one of the following:
|
|
50
86
|
|
|
51
|
-
- `escape` - escape unsupported symbols for unsupported tags
|
|
87
|
+
- `escape` - escape unsupported symbols for unsupported tags (default)
|
|
52
88
|
- `remove` - remove unsupported tags
|
|
53
|
-
- `keep` - ignore unsupported tags
|
|
89
|
+
- `keep` - ignore unsupported tags
|
|
54
90
|
|
|
55
91
|
```js
|
|
56
92
|
const teLLegraM = require('teLLegraM');
|
package/index.mjs
ADDED
package/lib/convert.mjs
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import gfm from 'remark-gfm';
|
|
2
|
+
import parse from 'remark-parse';
|
|
3
|
+
import stringify from 'remark-stringify';
|
|
4
|
+
import removeComments from 'remark-remove-comments';
|
|
5
|
+
import unified from 'unified';
|
|
6
|
+
|
|
7
|
+
import { collectDefinitions, removeDefinitions } from './definitions.mjs';
|
|
8
|
+
import createTellegramOptions from './tellegram.mjs';
|
|
9
|
+
|
|
10
|
+
export default (markdown, unsupportedTagsStrategy = 'escape') => {
|
|
11
|
+
const definitions = {};
|
|
12
|
+
|
|
13
|
+
const tellegramOptions = createTellegramOptions(definitions, unsupportedTagsStrategy);
|
|
14
|
+
|
|
15
|
+
return unified()
|
|
16
|
+
.use(parse)
|
|
17
|
+
.use(gfm)
|
|
18
|
+
.use(removeComments)
|
|
19
|
+
.use(collectDefinitions, definitions)
|
|
20
|
+
.use(removeDefinitions)
|
|
21
|
+
.use(stringify, tellegramOptions)
|
|
22
|
+
.processSync(markdown)
|
|
23
|
+
.toString()
|
|
24
|
+
.replace(/<!---->\n/gi, '').trim();
|
|
25
|
+
};
|
|
@@ -8,12 +8,12 @@ import visit from 'unist-util-visit';
|
|
|
8
8
|
* @param {Record<string, { title: null | string, url: string }>} definitions
|
|
9
9
|
*/
|
|
10
10
|
export const collectDefinitions = definitions => tree => {
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
11
|
+
visit(tree, 'definition', node => {
|
|
12
|
+
definitions[node.identifier] = {
|
|
13
|
+
title: node.title,
|
|
14
|
+
url: node.url,
|
|
15
|
+
};
|
|
16
|
+
});
|
|
17
17
|
};
|
|
18
18
|
|
|
19
19
|
/**
|
|
@@ -21,7 +21,5 @@ export const collectDefinitions = definitions => tree => {
|
|
|
21
21
|
* This avoids unwanted negative space in stringified output.
|
|
22
22
|
*/
|
|
23
23
|
export const removeDefinitions = () => tree => {
|
|
24
|
-
|
|
24
|
+
remove(tree, { cascade: true }, 'definition');
|
|
25
25
|
};
|
|
26
|
-
|
|
27
|
-
|
package/lib/paginate.mjs
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
import assert from 'assert';
|
|
2
|
+
import convert from './convert.mjs';
|
|
3
|
+
|
|
4
|
+
const MESSAGE_LENGTH_LIMIT = parseInt(4096 * 0.93); // ~= 3800
|
|
5
|
+
const clarify = str => str.toLowerCase().split(/[^a-zA-Z0-9]+/).filter(x => x);
|
|
6
|
+
const lines = (arr, sep = '\n') => arr.join(sep);
|
|
7
|
+
const extError = (err, status, opt = {}) => Object.assign(err, { status }, opt);
|
|
8
|
+
const newError = (msg, status, opt) => extError(new Error(msg), status, opt);
|
|
9
|
+
const throwError = (msg, status, opt) => { throw newError(msg, status, opt); };
|
|
10
|
+
const trim = (str, opts) => ensureString(str, { trim: true, ...opts || {} });
|
|
11
|
+
|
|
12
|
+
// Is
|
|
13
|
+
const _is = (type, value) => value?.constructor === type;
|
|
14
|
+
const _type = (any) => typeof any === 'undefined' ? 'Undefined'
|
|
15
|
+
: Object.prototype.toString.call(any).replace(/^\[[^\ ]*\ (.*)\]$/, '$1');
|
|
16
|
+
[
|
|
17
|
+
ArrayBuffer, BigInt, Boolean, Error, Number, Object, Set, String, Uint8Array
|
|
18
|
+
].map(type => {
|
|
19
|
+
const name = `is${type.name}`;
|
|
20
|
+
type[name] = type[name] || (value => _is(type, value));
|
|
21
|
+
});
|
|
22
|
+
Date.isDate = Date.isDate || ((value, strict) => _is(Date, value) ? (
|
|
23
|
+
strict ? value.toTimeString().toLowerCase() !== 'invalid date' : true
|
|
24
|
+
) : false);
|
|
25
|
+
Function.isFunction = Function.isFunction
|
|
26
|
+
|| (value => ['Function', 'AsyncFunction'].includes(_type(value)));
|
|
27
|
+
|
|
28
|
+
const toString = (any, options) => {
|
|
29
|
+
if (Object.isObject(any)) { return JSON.stringify(any); }
|
|
30
|
+
else if (Date.isDate(any)) { return any.toISOString(); }
|
|
31
|
+
else if (Error.isError(any)) { return options?.trace ? any.stack : any.message; }
|
|
32
|
+
return String(any ?? '');
|
|
33
|
+
};
|
|
34
|
+
|
|
35
|
+
const ensureString = (str, options) => {
|
|
36
|
+
str = toString(str, options);
|
|
37
|
+
if (options?.case) {
|
|
38
|
+
switch (toString(options?.case).trim().toUpperCase()) {
|
|
39
|
+
case 'UP':
|
|
40
|
+
str = str.toUpperCase();
|
|
41
|
+
break;
|
|
42
|
+
case 'LOW':
|
|
43
|
+
str = str.toLowerCase();
|
|
44
|
+
break;
|
|
45
|
+
case 'CAP': // capitalize
|
|
46
|
+
str = `${str.charAt(0).toUpperCase()}${str.slice(1)}`;
|
|
47
|
+
break;
|
|
48
|
+
case 'CAMEL':
|
|
49
|
+
str = clarify(str).map((x, i) => i ? `${x.charAt(0).toUpperCase()}${x.slice(1)}` : x).join('');
|
|
50
|
+
break;
|
|
51
|
+
case 'SNAKE':
|
|
52
|
+
str = clarify(str).join('_');
|
|
53
|
+
assert(str, 'String can not convert to snake case.', 500);
|
|
54
|
+
break;
|
|
55
|
+
default:
|
|
56
|
+
throwError(`Invalid case option: '${options?.case}'.`, 500);
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
options?.trim && (str = str.trim());
|
|
60
|
+
options?.compact && (str = str.replace(/\s+/g, ' ').trim());
|
|
61
|
+
options?.limit && (str = str.trim()) && str.length > options.limit
|
|
62
|
+
&& (str = `${str.slice(0, options.limit).trim()}...`);
|
|
63
|
+
return str;
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
export default (message, options) => {
|
|
67
|
+
let [pages, page, size, codeMark, concat, prefix] =
|
|
68
|
+
[[], [], ~~options?.size || MESSAGE_LENGTH_LIMIT, '', '', ''];
|
|
69
|
+
const countLength = pos => {
|
|
70
|
+
const str = prefix + lines(page) + message.substring(0, pos + 1);
|
|
71
|
+
return Math.max(str.length, convert(str).length);
|
|
72
|
+
};
|
|
73
|
+
const submit = () => {
|
|
74
|
+
const content = trim(lines(page));
|
|
75
|
+
content && pages.push(prefix + content + concat + (codeMark ? '\n```' : ''));
|
|
76
|
+
page.length = 0;
|
|
77
|
+
prefix = '';
|
|
78
|
+
if (codeMark) {
|
|
79
|
+
prefix += `${codeMark}\n`;
|
|
80
|
+
}
|
|
81
|
+
if (concat) {
|
|
82
|
+
prefix += concat;
|
|
83
|
+
concat = '';
|
|
84
|
+
}
|
|
85
|
+
};
|
|
86
|
+
while ((message || '').length) {
|
|
87
|
+
let nextN = message.indexOf('\n'); // 獲得下一個換行
|
|
88
|
+
nextN === -1 && (nextN = message.length); // 剩下只有一行
|
|
89
|
+
let cut = nextN; // 初始化當前預測裁切
|
|
90
|
+
if (countLength(cut) > size && page.length) { submit(); continue; }
|
|
91
|
+
let high = Math.min(cut, size);
|
|
92
|
+
cut = 0;
|
|
93
|
+
while (cut < high) {
|
|
94
|
+
let mid = Math.ceil((cut + high) / 2);
|
|
95
|
+
if (countLength(mid) <= size) {
|
|
96
|
+
cut = mid;
|
|
97
|
+
} else {
|
|
98
|
+
high = mid - 1;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
concat = cut < nextN ? '...' : '';
|
|
102
|
+
const line = message.substring(0, cut + 1).trimEnd();
|
|
103
|
+
page.push(line);
|
|
104
|
+
/^```.{0,20}$/.test(line) && (codeMark = codeMark ? '' : line);
|
|
105
|
+
if (concat) {
|
|
106
|
+
submit();
|
|
107
|
+
}
|
|
108
|
+
message = message.substring(cut + 1);
|
|
109
|
+
}
|
|
110
|
+
submit();
|
|
111
|
+
return pages.map((p, i) => convert((
|
|
112
|
+
pages.length > 1 && !options?.noPageNum
|
|
113
|
+
? `📃 PAGE ${i + 1} / ${pages.length}:\n\n` : ''
|
|
114
|
+
) + p));
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
export { MESSAGE_LENGTH_LIMIT };
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import defaultHandlers from 'mdast-util-to-markdown/lib/handle/index.js';
|
|
2
|
+
import phrasing from 'mdast-util-to-markdown/lib/util/container-phrasing.js';
|
|
3
|
+
import { toMarkdown as gfmTableToMarkdown } from 'mdast-util-gfm-table';
|
|
4
|
+
|
|
5
|
+
import { wrap, isURL, escapeSymbols, processUnsupportedTags } from './utils.mjs';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Creates custom `mdast-util-to-markdown` handlers that tailor the output for
|
|
9
|
+
* Telegram Markdown.
|
|
10
|
+
*
|
|
11
|
+
* @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
|
|
12
|
+
* Record of `Definition`s in the Markdown document, keyed by identifier.
|
|
13
|
+
*
|
|
14
|
+
* @returns {import('mdast-util-to-markdown').Handlers}
|
|
15
|
+
*/
|
|
16
|
+
const createHandlers = (definitions, unsupportedTagsStrategy) => ({
|
|
17
|
+
heading: (node, _parent, context) => {
|
|
18
|
+
// make headers to be just *strong*
|
|
19
|
+
const marker = '*';
|
|
20
|
+
|
|
21
|
+
const exit = context.enter('heading');
|
|
22
|
+
const value = phrasing(node, context, { before: marker, after: marker });
|
|
23
|
+
exit();
|
|
24
|
+
|
|
25
|
+
return wrap(value, marker);
|
|
26
|
+
},
|
|
27
|
+
|
|
28
|
+
strong: (node, _parent, context) => {
|
|
29
|
+
const marker = '*';
|
|
30
|
+
|
|
31
|
+
const exit = context.enter('strong');
|
|
32
|
+
const value = phrasing(node, context, { before: marker, after: marker });
|
|
33
|
+
exit();
|
|
34
|
+
|
|
35
|
+
return wrap(value, marker);
|
|
36
|
+
},
|
|
37
|
+
|
|
38
|
+
delete(node, _parent, context) {
|
|
39
|
+
const marker = '~';
|
|
40
|
+
|
|
41
|
+
const exit = context.enter('delete');
|
|
42
|
+
const value = phrasing(node, context, { before: marker, after: marker });
|
|
43
|
+
exit();
|
|
44
|
+
|
|
45
|
+
return wrap(value, marker);
|
|
46
|
+
},
|
|
47
|
+
|
|
48
|
+
emphasis: (node, _parent, context) => {
|
|
49
|
+
const marker = '_';
|
|
50
|
+
|
|
51
|
+
const exit = context.enter('emphasis');
|
|
52
|
+
const value = phrasing(node, context, { before: marker, after: marker });
|
|
53
|
+
exit();
|
|
54
|
+
|
|
55
|
+
return wrap(value, marker);
|
|
56
|
+
},
|
|
57
|
+
|
|
58
|
+
list: (...args) => defaultHandlers.list(...args).replace(/^(\d+)./gm, '$1\\.'),
|
|
59
|
+
|
|
60
|
+
listItem: (...args) => defaultHandlers.listItem(...args).replace(/^\*/, '•'),
|
|
61
|
+
|
|
62
|
+
code(node, _parent, context) {
|
|
63
|
+
const exit = context.enter('code');
|
|
64
|
+
// delete language prefix for deprecated markdown formatters (old Bitbucket Editor)
|
|
65
|
+
const content = node.value.replace(/^#![a-z]+\n/, ''); // ```\n#!javascript\ncode block\n```
|
|
66
|
+
exit();
|
|
67
|
+
|
|
68
|
+
const language = node.lang || '';
|
|
69
|
+
return `\`\`\`${language}\n${escapeSymbols(content, 'code')}\n\`\`\``;
|
|
70
|
+
},
|
|
71
|
+
|
|
72
|
+
link: (node, _parent, context) => {
|
|
73
|
+
const exit = context.enter('link');
|
|
74
|
+
const text = phrasing(node, context, { before: '|', after: '>' }) || escapeSymbols(node.title);
|
|
75
|
+
const isUrlEncoded = decodeURI(node.url) !== node.url;
|
|
76
|
+
const url = isUrlEncoded ? node.url : encodeURI(node.url);
|
|
77
|
+
exit();
|
|
78
|
+
|
|
79
|
+
if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
|
|
80
|
+
|
|
81
|
+
return text
|
|
82
|
+
? `[${text}](${escapeSymbols(url, 'link')})`
|
|
83
|
+
: `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
|
|
84
|
+
},
|
|
85
|
+
|
|
86
|
+
linkReference: (node, _parent, context) => {
|
|
87
|
+
const exit = context.enter('linkReference');
|
|
88
|
+
const definition = definitions[node.identifier];
|
|
89
|
+
const text = phrasing(node, context, { before: '|', after: '>' }) || (definition ? definition.title : null);
|
|
90
|
+
exit();
|
|
91
|
+
|
|
92
|
+
if (!definition || !isURL(definition.url)) return escapeSymbols(text);
|
|
93
|
+
|
|
94
|
+
return text
|
|
95
|
+
? `[${text}](${escapeSymbols(definition.url, 'link')})`
|
|
96
|
+
: `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
image: (node, _parent, context) => {
|
|
100
|
+
const exit = context.enter('image');
|
|
101
|
+
const text = node.alt || node.title;
|
|
102
|
+
const url = node.url
|
|
103
|
+
exit();
|
|
104
|
+
|
|
105
|
+
if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
|
|
106
|
+
|
|
107
|
+
return text
|
|
108
|
+
? `[${escapeSymbols(text)}](${escapeSymbols(url, 'link')})`
|
|
109
|
+
: `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
|
|
110
|
+
},
|
|
111
|
+
|
|
112
|
+
imageReference: (node, _parent, context) => {
|
|
113
|
+
const exit = context.enter('imageReference');
|
|
114
|
+
const definition = definitions[node.identifier];
|
|
115
|
+
const text = node.alt || (definition ? definition.title : null);
|
|
116
|
+
exit();
|
|
117
|
+
|
|
118
|
+
if (!definition || !isURL(definition.url)) return escapeSymbols(text);
|
|
119
|
+
|
|
120
|
+
return text
|
|
121
|
+
? `[${escapeSymbols(text)}](${escapeSymbols(definition.url, 'link')})`
|
|
122
|
+
: `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
|
|
123
|
+
},
|
|
124
|
+
|
|
125
|
+
text: (node, _parent, context) => {
|
|
126
|
+
const exit = context.enter('text');
|
|
127
|
+
const text = node.value;
|
|
128
|
+
exit();
|
|
129
|
+
|
|
130
|
+
return escapeSymbols(text);
|
|
131
|
+
},
|
|
132
|
+
|
|
133
|
+
blockquote: (node, _parent, context) =>
|
|
134
|
+
processUnsupportedTags(defaultHandlers.blockquote(node, _parent, context), unsupportedTagsStrategy),
|
|
135
|
+
html: (node, _parent, context) =>
|
|
136
|
+
processUnsupportedTags(defaultHandlers.html(node, _parent, context), unsupportedTagsStrategy),
|
|
137
|
+
table: (node, _parent, context) =>
|
|
138
|
+
processUnsupportedTags(gfmTableToMarkdown().handlers.table(node, _parent, context), unsupportedTagsStrategy),
|
|
139
|
+
thematicBreak: (_node, _parent, _context) =>
|
|
140
|
+
processUnsupportedTags('---', unsupportedTagsStrategy),
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Creates options to be passed into a `remark-stringify` processor that tailor
|
|
145
|
+
* the output for Telegram Markdown.
|
|
146
|
+
*
|
|
147
|
+
* @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
|
|
148
|
+
* Record of `Definition`s in the Markdown document, keyed by identifier.
|
|
149
|
+
*
|
|
150
|
+
* @returns {import('remark-stringify').RemarkStringifyOptions}
|
|
151
|
+
*/
|
|
152
|
+
const createOptions = (definitions, unsupportedTagsStrategy) => ({
|
|
153
|
+
bullet: '*',
|
|
154
|
+
tightDefinitions: true,
|
|
155
|
+
handlers: createHandlers(definitions, unsupportedTagsStrategy),
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
export default createOptions;
|
package/lib/utils.mjs
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { URL } from 'url';
|
|
2
|
+
|
|
3
|
+
export function wrap(string, ...wrappers) {
|
|
4
|
+
return [
|
|
5
|
+
...wrappers,
|
|
6
|
+
string,
|
|
7
|
+
...wrappers.reverse(),
|
|
8
|
+
].join('');
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function isURL(string) {
|
|
12
|
+
try {
|
|
13
|
+
return Boolean(new URL(string));
|
|
14
|
+
} catch (error) {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function escapeSymbols(text, textType = 'text') {
|
|
20
|
+
if (!text) {
|
|
21
|
+
return text;
|
|
22
|
+
}
|
|
23
|
+
switch (textType) {
|
|
24
|
+
case 'code':
|
|
25
|
+
return text
|
|
26
|
+
.replace(/\\/g, '\\\\')
|
|
27
|
+
.replace(/`/g, '\\`')
|
|
28
|
+
case 'link':
|
|
29
|
+
return text
|
|
30
|
+
.replace(/\\/g, '\\\\')
|
|
31
|
+
.replace(/\(/g, '\\(')
|
|
32
|
+
.replace(/\)/g, '\\)')
|
|
33
|
+
case 'ignore_escaped':
|
|
34
|
+
return text.replace(/(\\.)|([_*\[\]()~`>#+\-=|{}.!])/g, (match, escaped, char) => {
|
|
35
|
+
if (escaped) {
|
|
36
|
+
return escaped;
|
|
37
|
+
}
|
|
38
|
+
return '\\' + char;
|
|
39
|
+
});
|
|
40
|
+
default:
|
|
41
|
+
return text
|
|
42
|
+
.replace(/_/g, '\\_')
|
|
43
|
+
.replace(/\*/g, '\\*')
|
|
44
|
+
.replace(/\[/g, '\\[')
|
|
45
|
+
.replace(/]/g, '\\]')
|
|
46
|
+
.replace(/\(/g, '\\(')
|
|
47
|
+
.replace(/\)/g, '\\)')
|
|
48
|
+
.replace(/~/g, '\\~')
|
|
49
|
+
.replace(/`/g, '\\`')
|
|
50
|
+
.replace(/>/g, '\\>')
|
|
51
|
+
.replace(/#/g, '\\#')
|
|
52
|
+
.replace(/\+/g, '\\+')
|
|
53
|
+
.replace(/-/g, '\\-')
|
|
54
|
+
.replace(/=/g, '\\=')
|
|
55
|
+
.replace(/\|/g, '\\|')
|
|
56
|
+
.replace(/{/g, '\\{')
|
|
57
|
+
.replace(/}/g, '\\}')
|
|
58
|
+
.replace(/\./g, '\\.')
|
|
59
|
+
.replace(/!/g, '\\!');
|
|
60
|
+
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function processUnsupportedTags(content, strategy) {
|
|
65
|
+
switch (strategy) {
|
|
66
|
+
case 'escape':
|
|
67
|
+
return escapeSymbols(content, 'ignore_escaped');
|
|
68
|
+
case 'remove':
|
|
69
|
+
return '';
|
|
70
|
+
case 'keep':
|
|
71
|
+
default:
|
|
72
|
+
return content;
|
|
73
|
+
}
|
|
74
|
+
}
|
package/package.json
CHANGED
|
@@ -1,70 +1,71 @@
|
|
|
1
|
-
{
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "tellegram",
|
|
3
|
+
"version": "1.1.3",
|
|
4
|
+
"description": "Convert LLM-generated markdown into Telegram-specific markdown (MarkdownV2)",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "index.mjs",
|
|
7
|
+
"scripts": {
|
|
8
|
+
"test": "NODE_OPTIONS='--experimental-vm-modules --no-deprecation' npx jest --coverage --verbose",
|
|
9
|
+
"lint": "eslint",
|
|
10
|
+
"semantic-release": "semantic-release",
|
|
11
|
+
"prepare": "husky install",
|
|
12
|
+
"codecov": "codecov"
|
|
13
|
+
},
|
|
14
|
+
"files": [
|
|
15
|
+
"README.md",
|
|
16
|
+
"LICENSE",
|
|
17
|
+
"index.mjs",
|
|
18
|
+
"lib",
|
|
19
|
+
"types"
|
|
20
|
+
],
|
|
21
|
+
"repository": {
|
|
22
|
+
"type": "git",
|
|
23
|
+
"url": "git+https://github.com/leask/tellegram.git"
|
|
24
|
+
},
|
|
25
|
+
"keywords": [
|
|
26
|
+
"tellegram",
|
|
27
|
+
"telegram",
|
|
28
|
+
"markdown",
|
|
29
|
+
"telegramify",
|
|
30
|
+
"parser",
|
|
31
|
+
"remark",
|
|
32
|
+
"unified"
|
|
33
|
+
],
|
|
34
|
+
"types": "types/index.d.ts",
|
|
35
|
+
"author": "Leask Wong",
|
|
36
|
+
"license": "MIT",
|
|
37
|
+
"bugs": {
|
|
38
|
+
"url": "https://github.com/leask/tellegram/issues"
|
|
39
|
+
},
|
|
40
|
+
"homepage": "https://github.com/leask/tellegram#readme",
|
|
41
|
+
"dependencies": {
|
|
42
|
+
"mdast-util-gfm-table": "^0.1.6",
|
|
43
|
+
"mdast-util-to-markdown": "^0.6.2",
|
|
44
|
+
"remark-gfm": "^1.0.0",
|
|
45
|
+
"remark-parse": "^9.0.0",
|
|
46
|
+
"remark-remove-comments": "^0.2.0",
|
|
47
|
+
"remark-stringify": "^9.0.1",
|
|
48
|
+
"unified": "^9.0.0",
|
|
49
|
+
"unist-util-remove": "^2.0.1",
|
|
50
|
+
"unist-util-visit": "^2.0.3"
|
|
51
|
+
},
|
|
52
|
+
"devDependencies": {
|
|
53
|
+
"@commitlint/cli": "^12.1.1",
|
|
54
|
+
"@commitlint/config-conventional": "^12.1.1",
|
|
55
|
+
"codecov": "^3.8.3",
|
|
56
|
+
"eslint": "^7.24.0",
|
|
57
|
+
"husky": "^6.0.0",
|
|
58
|
+
"jest": "^30.2.0",
|
|
59
|
+
"lint-staged": "^10.5.4",
|
|
60
|
+
"prettier": "2.2.1",
|
|
61
|
+
"semantic-release": "^17.4.2"
|
|
62
|
+
},
|
|
63
|
+
"lint-staged": {
|
|
64
|
+
"*.{js,json,md}": [
|
|
65
|
+
"prettier --write",
|
|
66
|
+
"git add"
|
|
67
|
+
],
|
|
68
|
+
"*.{css,scss,less}": "stylelint --fix",
|
|
69
|
+
"*.js": "eslint --cache --fix"
|
|
70
|
+
}
|
|
71
|
+
}
|
package/types/index.d.ts
CHANGED
|
@@ -1,13 +1,19 @@
|
|
|
1
|
-
type UnsupportedTagsStrategy = 'escape' | 'remove' | 'keep'
|
|
2
|
-
|
|
3
|
-
declare module 'tellegram' {
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
*
|
|
7
|
-
* @param
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
1
|
+
type UnsupportedTagsStrategy = 'escape' | 'remove' | 'keep'
|
|
2
|
+
|
|
3
|
+
declare module 'tellegram' {
|
|
4
|
+
/**
|
|
5
|
+
* Converts markdown to Telegram's format.
|
|
6
|
+
* @param markdown The markdown to convert.
|
|
7
|
+
* @param unsupportedTagsStrategy The strategy to use for unsupported tags.
|
|
8
|
+
*/
|
|
9
|
+
export function convert(markdown: string, unsupportedTagsStrategy?: UnsupportedTagsStrategy): string;
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Paginates text (placeholder).
|
|
13
|
+
* @param text The text to paginate.
|
|
14
|
+
*/
|
|
15
|
+
export function paginate(text: string): string;
|
|
16
|
+
|
|
17
|
+
const defaultExport: typeof convert;
|
|
18
|
+
export default defaultExport;
|
|
19
|
+
}
|
package/index.js
DELETED
package/lib/convert.js
DELETED
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
import gfm from 'remark-gfm';
|
|
2
|
-
import parse from 'remark-parse';
|
|
3
|
-
import stringify from 'remark-stringify';
|
|
4
|
-
import removeComments from 'remark-remove-comments';
|
|
5
|
-
import unified from 'unified';
|
|
6
|
-
|
|
7
|
-
import { collectDefinitions, removeDefinitions } from './definitions.js';
|
|
8
|
-
import createTelegramifyOptions from './telegramify.js';
|
|
9
|
-
|
|
10
|
-
export default (markdown, unsupportedTagsStrategy) => {
|
|
11
|
-
const definitions = {};
|
|
12
|
-
|
|
13
|
-
const telegramifyOptions = createTelegramifyOptions(definitions, unsupportedTagsStrategy);
|
|
14
|
-
|
|
15
|
-
return unified()
|
|
16
|
-
.use(parse)
|
|
17
|
-
.use(gfm)
|
|
18
|
-
.use(removeComments)
|
|
19
|
-
.use(collectDefinitions, definitions)
|
|
20
|
-
.use(removeDefinitions)
|
|
21
|
-
.use(stringify, telegramifyOptions)
|
|
22
|
-
.processSync(markdown)
|
|
23
|
-
.toString()
|
|
24
|
-
.replace(/<!---->\n/gi, '');
|
|
25
|
-
};
|
package/lib/telegramify.js
DELETED
|
@@ -1,158 +0,0 @@
|
|
|
1
|
-
import defaultHandlers from 'mdast-util-to-markdown/lib/handle/index.js';
|
|
2
|
-
import phrasing from 'mdast-util-to-markdown/lib/util/container-phrasing.js';
|
|
3
|
-
import {toMarkdown as gfmTableToMarkdown} from 'mdast-util-gfm-table';
|
|
4
|
-
|
|
5
|
-
import {wrap, isURL, escapeSymbols, processUnsupportedTags} from './utils.js';
|
|
6
|
-
|
|
7
|
-
/**
|
|
8
|
-
* Creates custom `mdast-util-to-markdown` handlers that tailor the output for
|
|
9
|
-
* Telegram Markdown.
|
|
10
|
-
*
|
|
11
|
-
* @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
|
|
12
|
-
* Record of `Definition`s in the Markdown document, keyed by identifier.
|
|
13
|
-
*
|
|
14
|
-
* @returns {import('mdast-util-to-markdown').Handlers}
|
|
15
|
-
*/
|
|
16
|
-
const createHandlers = (definitions, unsupportedTagsStrategy) => ({
|
|
17
|
-
heading: (node, _parent, context) => {
|
|
18
|
-
// make headers to be just *strong*
|
|
19
|
-
const marker = '*';
|
|
20
|
-
|
|
21
|
-
const exit = context.enter('heading');
|
|
22
|
-
const value = phrasing(node, context, {before: marker, after: marker});
|
|
23
|
-
exit();
|
|
24
|
-
|
|
25
|
-
return wrap(value, marker);
|
|
26
|
-
},
|
|
27
|
-
|
|
28
|
-
strong: (node, _parent, context) => {
|
|
29
|
-
const marker = '*';
|
|
30
|
-
|
|
31
|
-
const exit = context.enter('strong');
|
|
32
|
-
const value = phrasing(node, context, {before: marker, after: marker});
|
|
33
|
-
exit();
|
|
34
|
-
|
|
35
|
-
return wrap(value, marker);
|
|
36
|
-
},
|
|
37
|
-
|
|
38
|
-
delete(node, _parent, context) {
|
|
39
|
-
const marker = '~';
|
|
40
|
-
|
|
41
|
-
const exit = context.enter('delete');
|
|
42
|
-
const value = phrasing(node, context, {before: marker, after: marker});
|
|
43
|
-
exit();
|
|
44
|
-
|
|
45
|
-
return wrap(value, marker);
|
|
46
|
-
},
|
|
47
|
-
|
|
48
|
-
emphasis: (node, _parent, context) => {
|
|
49
|
-
const marker = '_';
|
|
50
|
-
|
|
51
|
-
const exit = context.enter('emphasis');
|
|
52
|
-
const value = phrasing(node, context, {before: marker, after: marker});
|
|
53
|
-
exit();
|
|
54
|
-
|
|
55
|
-
return wrap(value, marker);
|
|
56
|
-
},
|
|
57
|
-
|
|
58
|
-
list: (...args) => defaultHandlers.list(...args).replace(/^(\d+)./gm, '$1\\.'),
|
|
59
|
-
|
|
60
|
-
listItem: (...args) => defaultHandlers.listItem(...args).replace(/^\*/, '•'),
|
|
61
|
-
|
|
62
|
-
code(node, _parent, context) {
|
|
63
|
-
const exit = context.enter('code');
|
|
64
|
-
// delete language prefix for deprecated markdown formatters (old Bitbucket Editor)
|
|
65
|
-
const content = node.value.replace(/^#![a-z]+\n/, ''); // ```\n#!javascript\ncode block\n```
|
|
66
|
-
exit();
|
|
67
|
-
|
|
68
|
-
const language = node.lang || '';
|
|
69
|
-
return `\`\`\`${language}\n${escapeSymbols(content, 'code')}\n\`\`\``;
|
|
70
|
-
},
|
|
71
|
-
|
|
72
|
-
link: (node, _parent, context) => {
|
|
73
|
-
const exit = context.enter('link');
|
|
74
|
-
const text = phrasing(node, context, {before: '|', after: '>'}) || escapeSymbols(node.title);
|
|
75
|
-
const isUrlEncoded = decodeURI(node.url) !== node.url;
|
|
76
|
-
const url = isUrlEncoded ? node.url : encodeURI(node.url);
|
|
77
|
-
exit();
|
|
78
|
-
|
|
79
|
-
if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
|
|
80
|
-
|
|
81
|
-
return text
|
|
82
|
-
? `[${text}](${escapeSymbols(url, 'link')})`
|
|
83
|
-
: `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
|
|
84
|
-
},
|
|
85
|
-
|
|
86
|
-
linkReference: (node, _parent, context) => {
|
|
87
|
-
const exit = context.enter('linkReference');
|
|
88
|
-
const definition = definitions[node.identifier];
|
|
89
|
-
const text = phrasing(node, context, {before: '|', after: '>'}) || (definition ? definition.title : null);
|
|
90
|
-
exit();
|
|
91
|
-
|
|
92
|
-
if (!definition || !isURL(definition.url)) return escapeSymbols(text);
|
|
93
|
-
|
|
94
|
-
return text
|
|
95
|
-
? `[${text}](${escapeSymbols(definition.url, 'link')})`
|
|
96
|
-
: `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
|
|
97
|
-
},
|
|
98
|
-
|
|
99
|
-
image: (node, _parent, context) => {
|
|
100
|
-
const exit = context.enter('image');
|
|
101
|
-
const text = node.alt || node.title;
|
|
102
|
-
const url = node.url
|
|
103
|
-
exit();
|
|
104
|
-
|
|
105
|
-
if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
|
|
106
|
-
|
|
107
|
-
return text
|
|
108
|
-
? `[${escapeSymbols(text)}](${escapeSymbols(url, 'link')})`
|
|
109
|
-
: `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
|
|
110
|
-
},
|
|
111
|
-
|
|
112
|
-
imageReference: (node, _parent, context) => {
|
|
113
|
-
const exit = context.enter('imageReference');
|
|
114
|
-
const definition = definitions[node.identifier];
|
|
115
|
-
const text = node.alt || (definition ? definition.title : null);
|
|
116
|
-
exit();
|
|
117
|
-
|
|
118
|
-
if (!definition || !isURL(definition.url)) return escapeSymbols(text);
|
|
119
|
-
|
|
120
|
-
return text
|
|
121
|
-
? `[${escapeSymbols(text)}](${escapeSymbols(definition.url, 'link')})`
|
|
122
|
-
: `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
|
|
123
|
-
},
|
|
124
|
-
|
|
125
|
-
text: (node, _parent, context) => {
|
|
126
|
-
const exit = context.enter('text');
|
|
127
|
-
const text = node.value;
|
|
128
|
-
exit();
|
|
129
|
-
|
|
130
|
-
return escapeSymbols(text);
|
|
131
|
-
},
|
|
132
|
-
|
|
133
|
-
blockquote: (node, _parent, context) =>
|
|
134
|
-
processUnsupportedTags(defaultHandlers.blockquote(node, _parent, context), unsupportedTagsStrategy),
|
|
135
|
-
html: (node, _parent, context) =>
|
|
136
|
-
processUnsupportedTags(defaultHandlers.html(node, _parent, context), unsupportedTagsStrategy),
|
|
137
|
-
table: (node, _parent, context) =>
|
|
138
|
-
processUnsupportedTags(gfmTableToMarkdown().handlers.table(node, _parent, context), unsupportedTagsStrategy),
|
|
139
|
-
thematicBreak: (_node, _parent, _context) =>
|
|
140
|
-
processUnsupportedTags('---', unsupportedTagsStrategy),
|
|
141
|
-
});
|
|
142
|
-
|
|
143
|
-
/**
|
|
144
|
-
* Creates options to be passed into a `remark-stringify` processor that tailor
|
|
145
|
-
* the output for Telegram Markdown.
|
|
146
|
-
*
|
|
147
|
-
* @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
|
|
148
|
-
* Record of `Definition`s in the Markdown document, keyed by identifier.
|
|
149
|
-
*
|
|
150
|
-
* @returns {import('remark-stringify').RemarkStringifyOptions}
|
|
151
|
-
*/
|
|
152
|
-
const createOptions = (definitions, unsupportedTagsStrategy) => ({
|
|
153
|
-
bullet: '*',
|
|
154
|
-
tightDefinitions: true,
|
|
155
|
-
handlers: createHandlers(definitions, unsupportedTagsStrategy),
|
|
156
|
-
});
|
|
157
|
-
|
|
158
|
-
export default createOptions;
|
package/lib/utils.js
DELETED
|
@@ -1,76 +0,0 @@
|
|
|
1
|
-
import { URL } from 'url';
|
|
2
|
-
|
|
3
|
-
export function wrap(string, ...wrappers) {
|
|
4
|
-
return [
|
|
5
|
-
...wrappers,
|
|
6
|
-
string,
|
|
7
|
-
...wrappers.reverse(),
|
|
8
|
-
].join('');
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function isURL(string) {
|
|
12
|
-
try {
|
|
13
|
-
return Boolean(new URL(string));
|
|
14
|
-
} catch (error) {
|
|
15
|
-
return false;
|
|
16
|
-
}
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
export function escapeSymbols(text, textType = 'text') {
|
|
20
|
-
if (!text) {
|
|
21
|
-
return text;
|
|
22
|
-
}
|
|
23
|
-
switch (textType) {
|
|
24
|
-
case 'code':
|
|
25
|
-
return text
|
|
26
|
-
.replace(/\\/g, '\\\\')
|
|
27
|
-
.replace(/`/g, '\\`')
|
|
28
|
-
case 'link':
|
|
29
|
-
return text
|
|
30
|
-
.replace(/\\/g, '\\\\')
|
|
31
|
-
.replace(/\(/g, '\\(')
|
|
32
|
-
.replace(/\)/g, '\\)')
|
|
33
|
-
case 'ignore_escaped':
|
|
34
|
-
return text.replace(/(\\.)|([_*\[\]()~`>#+\-=|{}.!])/g, (match, escaped, char) => {
|
|
35
|
-
if (escaped) {
|
|
36
|
-
return escaped;
|
|
37
|
-
}
|
|
38
|
-
return '\\' + char;
|
|
39
|
-
});
|
|
40
|
-
default:
|
|
41
|
-
return text
|
|
42
|
-
.replace(/_/g, '\\_')
|
|
43
|
-
.replace(/\*/g, '\\*')
|
|
44
|
-
.replace(/\[/g, '\\[')
|
|
45
|
-
.replace(/]/g, '\\]')
|
|
46
|
-
.replace(/\(/g, '\\(')
|
|
47
|
-
.replace(/\)/g, '\\)')
|
|
48
|
-
.replace(/~/g, '\\~')
|
|
49
|
-
.replace(/`/g, '\\`')
|
|
50
|
-
.replace(/>/g, '\\>')
|
|
51
|
-
.replace(/#/g, '\\#')
|
|
52
|
-
.replace(/\+/g, '\\+')
|
|
53
|
-
.replace(/-/g, '\\-')
|
|
54
|
-
.replace(/=/g, '\\=')
|
|
55
|
-
.replace(/\|/g, '\\|')
|
|
56
|
-
.replace(/{/g, '\\{')
|
|
57
|
-
.replace(/}/g, '\\}')
|
|
58
|
-
.replace(/\./g, '\\.')
|
|
59
|
-
.replace(/!/g, '\\!');
|
|
60
|
-
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export function processUnsupportedTags(content, strategy) {
|
|
65
|
-
switch (strategy) {
|
|
66
|
-
case 'escape':
|
|
67
|
-
return escapeSymbols(content, 'ignore_escaped');
|
|
68
|
-
case 'remove':
|
|
69
|
-
return '';
|
|
70
|
-
case 'keep':
|
|
71
|
-
default:
|
|
72
|
-
return content;
|
|
73
|
-
}
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
|