tellegram 1.0.0 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,11 +1,23 @@
1
1
  # teLLegraM
2
2
 
3
- [![Build](https://img.shields.io/github/actions/workflow/status/skoropadas/telegramify-markdown/release.yml?branch=master)](https://github.com/skoropadas/telegramify-markdown/actions)
4
- [![codecov](https://codecov.io/gh/skoropadas/telegramify-markdown/branch/master/graph/badge.svg?token=LxCmgGNUHl)](https://codecov.io/gh/skoropadas/telegramify-markdown)
5
- ![License](https://img.shields.io/github/license/skoropadas/telegramify-markdown)
3
+ [![Build](https://img.shields.io/github/actions/workflow/status/leask/tellegram/release.yml?branch=master)](https://github.com/leask/tellegram/actions)
4
+ [![codecov](https://codecov.io/gh/leask/tellegram/branch/master/graph/badge.svg?token=LxCmgGNUHl)](https://codecov.io/gh/leask/tellegram)
5
+ ![License](https://img.shields.io/github/license/leask/tellegram)
6
6
 
7
7
  teLLegraM is a library designed to format LLM (Large Language Model) generated text into [Telegram-specific-markdown (MarkdownV2)](https://core.telegram.org/bots/api#formatting-options), based on [Unified](https://github.com/unifiedjs/unified) and [Remark](https://github.com/remarkjs/remark/). It ensures that complex markdown from AI responses is perfectly interpreted by Telegram clients.
8
8
 
9
+ ## Acknowledgements
10
+
11
+ This project is based on [telegramify-markdown](https://github.com/skoropadas/telegramify-markdown) but has been evolved to specifically address the needs of LLM-generated content.
12
+
13
+ ## Why teLLegraM?
14
+
15
+ While the original library provided a solid foundation, teLLegraM introduces several key optimizations for the "LLM to Telegram" workflow:
16
+
17
+ 1. **LLM-Specific Optimizations**: Tailored handling of common artifacts found in AI responses, ensuring cleaner output.
18
+ 2. **Lossless Pagination**: Telegram has strict message length limits. teLLegraM includes a smart pagination feature that splits long text into multiple messages *without* breaking MarkdownV2 syntax. It ensures bold, italic, or code blocks are correctly closed in one message and reopened in the next, preventing "unclosed entity" errors.
19
+ 3. **Strict MarkdownV2 Compliance**: Enhanced escaping rules to handle edge cases often produced by generative models.
20
+
9
21
  ## Install
10
22
 
11
23
  ```bash
@@ -14,8 +26,13 @@ npm install tellegram
14
26
 
15
27
  ## Usage
16
28
 
29
+ ### Basic Conversion
30
+
17
31
  ```js
18
- const teLLegraM = require('tellegram');
32
+ import tellegram from 'tellegram';
33
+ // OR
34
+ // import { convert } from 'tellegram';
35
+
19
36
  const markdown = `
20
37
  # Header
21
38
  ## Subheader
@@ -29,11 +46,12 @@ const markdown = `
29
46
  And simple text with + some - symbols.
30
47
  `;
31
48
 
32
- teLLegraM(markdown);
49
+ const result = tellegram(markdown);
50
+ console.log(result);
33
51
  /*
34
52
  *Header*
35
53
  *Subheader*
36
-
54
+
37
55
  [1\.0\.0](http://version.com)
38
56
 
39
57
  • item 1
@@ -44,13 +62,31 @@ And simple text with \+ some \- symbols\.
44
62
  */
45
63
  ```
46
64
 
65
+ ### Pagination (Handling Long Messages)
66
+
67
+ When dealing with verbose LLM outputs, use the `paginate` function to safely split text into chunks that respect Telegram's limits (4096 characters) while preserving formatting context.
68
+
69
+ ```js
70
+ import { paginate } from 'tellegram';
71
+
72
+ const longLlmOutput = `... extremely long text with **markdown** ...`;
73
+
74
+ // Split into an array of strings, each safe to send
75
+ const messages = paginate(longLlmOutput);
76
+
77
+ for (const msg of messages) {
78
+ // Send each part sequentially
79
+ await bot.sendMessage(chatId, msg, { parse_mode: 'MarkdownV2' });
80
+ }
81
+ ```
82
+
47
83
  ## Possible options
48
84
 
49
85
  You can also add unsupported tags strategy as a second argument, which can be one of the following:
50
86
 
51
- - `escape` - escape unsupported symbols for unsupported tags
87
+ - `escape` - escape unsupported symbols for unsupported tags (default)
52
88
  - `remove` - remove unsupported tags
53
- - `keep` - ignore unsupported tags (default)
89
+ - `keep` - ignore unsupported tags
54
90
 
55
91
  ```js
56
92
  const teLLegraM = require('teLLegraM');
package/index.js CHANGED
@@ -1,2 +1,5 @@
1
1
  import convert from './lib/convert.js';
2
+ import paginate from './lib/paginate.js';
3
+
4
+ export { convert, paginate };
2
5
  export default convert;
package/lib/convert.js CHANGED
@@ -5,21 +5,21 @@ import removeComments from 'remark-remove-comments';
5
5
  import unified from 'unified';
6
6
 
7
7
  import { collectDefinitions, removeDefinitions } from './definitions.js';
8
- import createTelegramifyOptions from './telegramify.js';
8
+ import createTellegramOptions from './tellegram.js';
9
9
 
10
- export default (markdown, unsupportedTagsStrategy) => {
11
- const definitions = {};
10
+ export default (markdown, unsupportedTagsStrategy = 'escape') => {
11
+ const definitions = {};
12
12
 
13
- const telegramifyOptions = createTelegramifyOptions(definitions, unsupportedTagsStrategy);
13
+ const tellegramOptions = createTellegramOptions(definitions, unsupportedTagsStrategy);
14
14
 
15
- return unified()
16
- .use(parse)
17
- .use(gfm)
18
- .use(removeComments)
19
- .use(collectDefinitions, definitions)
20
- .use(removeDefinitions)
21
- .use(stringify, telegramifyOptions)
22
- .processSync(markdown)
23
- .toString()
24
- .replace(/<!---->\n/gi, '');
15
+ return unified()
16
+ .use(parse)
17
+ .use(gfm)
18
+ .use(removeComments)
19
+ .use(collectDefinitions, definitions)
20
+ .use(removeDefinitions)
21
+ .use(stringify, tellegramOptions)
22
+ .processSync(markdown)
23
+ .toString()
24
+ .replace(/<!---->\n/gi, '').trim();
25
25
  };
@@ -8,12 +8,12 @@ import visit from 'unist-util-visit';
8
8
  * @param {Record<string, { title: null | string, url: string }>} definitions
9
9
  */
10
10
  export const collectDefinitions = definitions => tree => {
11
- visit(tree, 'definition', node => {
12
- definitions[node.identifier] = {
13
- title: node.title,
14
- url: node.url,
15
- };
16
- });
11
+ visit(tree, 'definition', node => {
12
+ definitions[node.identifier] = {
13
+ title: node.title,
14
+ url: node.url,
15
+ };
16
+ });
17
17
  };
18
18
 
19
19
  /**
@@ -21,7 +21,5 @@ export const collectDefinitions = definitions => tree => {
21
21
  * This avoids unwanted negative space in stringified output.
22
22
  */
23
23
  export const removeDefinitions = () => tree => {
24
- remove(tree, { cascade: true }, 'definition');
24
+ remove(tree, { cascade: true }, 'definition');
25
25
  };
26
-
27
-
@@ -0,0 +1,112 @@
1
+ import assert from 'assert';
2
+ import convert from './convert.js';
3
+
4
+ const MESSAGE_LENGTH_LIMIT = parseInt(4096 * 0.93); // ~= 3800
5
+ const clarify = str => str.toLowerCase().split(/[^a-zA-Z0-9]+/).filter(x => x);
6
+ const lines = (arr, sep = '\n') => arr.join(sep);
7
+ const extError = (err, status, opt = {}) => Object.assign(err, { status }, opt);
8
+ const newError = (msg, status, opt) => extError(new Error(msg), status, opt);
9
+ const throwError = (msg, status, opt) => { throw newError(msg, status, opt); };
10
+ const trim = (str, opts) => ensureString(str, { trim: true, ...opts || {} });
11
+
12
+ // Is
13
+ const _is = (type, value) => value?.constructor === type;
14
+ const _type = (any) => typeof any === 'undefined' ? 'Undefined'
15
+ : Object.prototype.toString.call(any).replace(/^\[[^\ ]*\ (.*)\]$/, '$1');
16
+ [
17
+ ArrayBuffer, BigInt, Boolean, Error, Number, Object, Set, String, Uint8Array
18
+ ].map(type => {
19
+ const name = `is${type.name}`;
20
+ type[name] = type[name] || (value => _is(type, value));
21
+ });
22
+ Date.isDate = Date.isDate || ((value, strict) => _is(Date, value) ? (
23
+ strict ? value.toTimeString().toLowerCase() !== 'invalid date' : true
24
+ ) : false);
25
+ Function.isFunction = Function.isFunction
26
+ || (value => ['Function', 'AsyncFunction'].includes(_type(value)));
27
+
28
+ const toString = (any, options) => {
29
+ if (Object.isObject(any)) { return JSON.stringify(any); }
30
+ else if (Date.isDate(any)) { return any.toISOString(); }
31
+ else if (Error.isError(any)) { return options?.trace ? any.stack : any.message; }
32
+ return String(any ?? '');
33
+ };
34
+
35
+ const ensureString = (str, options) => {
36
+ str = toString(str, options);
37
+ if (options?.case) {
38
+ switch (toString(options?.case).trim().toUpperCase()) {
39
+ case 'UP':
40
+ str = str.toUpperCase();
41
+ break;
42
+ case 'LOW':
43
+ str = str.toLowerCase();
44
+ break;
45
+ case 'CAP': // capitalize
46
+ str = `${str.charAt(0).toUpperCase()}${str.slice(1)}`;
47
+ break;
48
+ case 'CAMEL':
49
+ str = clarify(str).map((x, i) => i ? `${x.charAt(0).toUpperCase()}${x.slice(1)}` : x).join('');
50
+ break;
51
+ case 'SNAKE':
52
+ str = clarify(str).join('_');
53
+ assert(str, 'String can not convert to snake case.', 500);
54
+ break;
55
+ default:
56
+ throwError(`Invalid case option: '${options?.case}'.`, 500);
57
+ }
58
+ }
59
+ options?.trim && (str = str.trim());
60
+ options?.compact && (str = str.replace(/\s+/g, ' ').trim());
61
+ options?.limit && (str = str.trim()) && str.length > options.limit
62
+ && (str = `${str.slice(0, options.limit).trim()}...`);
63
+ return str;
64
+ };
65
+
66
+ export default (message, options) => {
67
+ let [pages, page, size, codeMark, concat, prefix] =
68
+ [[], [], ~~options?.size || MESSAGE_LENGTH_LIMIT, '', '', ''];
69
+ const countLength = str => convert(str).length;
70
+ const submit = () => {
71
+ const content = trim(lines(page));
72
+ content && pages.push(prefix + content + concat + (codeMark ? '\n```' : ''));
73
+ page.length = 0;
74
+ prefix = '';
75
+ if (codeMark) {
76
+ prefix += `${codeMark}\n`;
77
+ }
78
+ if (concat) {
79
+ prefix += concat;
80
+ concat = '';
81
+ }
82
+ };
83
+ while ((message || '').length) {
84
+ let nextN = message.indexOf('\n'); // 獲得下一個換行
85
+ nextN === -1 && (nextN = message.length); // 剩下只有一行
86
+ let [cut, shouldBreak] = [nextN, false]; // 初始化當前預測裁切
87
+ while (countLength(lines(page) + message.substring(0, cut + 1)) > size) {
88
+ if (page.length) {
89
+ submit();
90
+ shouldBreak = true;
91
+ break;
92
+ }
93
+ cut--;
94
+ concat = '...';
95
+ }
96
+ if (shouldBreak) { continue; }
97
+ const line = message.substring(0, cut + 1).trimEnd();
98
+ page.push(line);
99
+ /^```.{0,20}$/.test(line) && (codeMark = codeMark ? '' : line);
100
+ if (concat) {
101
+ submit();
102
+ }
103
+ message = message.substring(cut + 1);
104
+ }
105
+ submit();
106
+ return pages.map((p, i) => convert((
107
+ pages.length > 1 && !options?.noPageNum
108
+ ? `📃 PAGE ${i + 1} / ${pages.length}:\n\n` : ''
109
+ ) + p));
110
+ };
111
+
112
+ export { MESSAGE_LENGTH_LIMIT };
@@ -0,0 +1,158 @@
1
+ import defaultHandlers from 'mdast-util-to-markdown/lib/handle/index.js';
2
+ import phrasing from 'mdast-util-to-markdown/lib/util/container-phrasing.js';
3
+ import { toMarkdown as gfmTableToMarkdown } from 'mdast-util-gfm-table';
4
+
5
+ import { wrap, isURL, escapeSymbols, processUnsupportedTags } from './utils.js';
6
+
7
+ /**
8
+ * Creates custom `mdast-util-to-markdown` handlers that tailor the output for
9
+ * Telegram Markdown.
10
+ *
11
+ * @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
12
+ * Record of `Definition`s in the Markdown document, keyed by identifier.
13
+ *
14
+ * @returns {import('mdast-util-to-markdown').Handlers}
15
+ */
16
+ const createHandlers = (definitions, unsupportedTagsStrategy) => ({
17
+ heading: (node, _parent, context) => {
18
+ // make headers to be just *strong*
19
+ const marker = '*';
20
+
21
+ const exit = context.enter('heading');
22
+ const value = phrasing(node, context, { before: marker, after: marker });
23
+ exit();
24
+
25
+ return wrap(value, marker);
26
+ },
27
+
28
+ strong: (node, _parent, context) => {
29
+ const marker = '*';
30
+
31
+ const exit = context.enter('strong');
32
+ const value = phrasing(node, context, { before: marker, after: marker });
33
+ exit();
34
+
35
+ return wrap(value, marker);
36
+ },
37
+
38
+ delete(node, _parent, context) {
39
+ const marker = '~';
40
+
41
+ const exit = context.enter('delete');
42
+ const value = phrasing(node, context, { before: marker, after: marker });
43
+ exit();
44
+
45
+ return wrap(value, marker);
46
+ },
47
+
48
+ emphasis: (node, _parent, context) => {
49
+ const marker = '_';
50
+
51
+ const exit = context.enter('emphasis');
52
+ const value = phrasing(node, context, { before: marker, after: marker });
53
+ exit();
54
+
55
+ return wrap(value, marker);
56
+ },
57
+
58
+ list: (...args) => defaultHandlers.list(...args).replace(/^(\d+)./gm, '$1\\.'),
59
+
60
+ listItem: (...args) => defaultHandlers.listItem(...args).replace(/^\*/, '•'),
61
+
62
+ code(node, _parent, context) {
63
+ const exit = context.enter('code');
64
+ // delete language prefix for deprecated markdown formatters (old Bitbucket Editor)
65
+ const content = node.value.replace(/^#![a-z]+\n/, ''); // ```\n#!javascript\ncode block\n```
66
+ exit();
67
+
68
+ const language = node.lang || '';
69
+ return `\`\`\`${language}\n${escapeSymbols(content, 'code')}\n\`\`\``;
70
+ },
71
+
72
+ link: (node, _parent, context) => {
73
+ const exit = context.enter('link');
74
+ const text = phrasing(node, context, { before: '|', after: '>' }) || escapeSymbols(node.title);
75
+ const isUrlEncoded = decodeURI(node.url) !== node.url;
76
+ const url = isUrlEncoded ? node.url : encodeURI(node.url);
77
+ exit();
78
+
79
+ if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
80
+
81
+ return text
82
+ ? `[${text}](${escapeSymbols(url, 'link')})`
83
+ : `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
84
+ },
85
+
86
+ linkReference: (node, _parent, context) => {
87
+ const exit = context.enter('linkReference');
88
+ const definition = definitions[node.identifier];
89
+ const text = phrasing(node, context, { before: '|', after: '>' }) || (definition ? definition.title : null);
90
+ exit();
91
+
92
+ if (!definition || !isURL(definition.url)) return escapeSymbols(text);
93
+
94
+ return text
95
+ ? `[${text}](${escapeSymbols(definition.url, 'link')})`
96
+ : `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
97
+ },
98
+
99
+ image: (node, _parent, context) => {
100
+ const exit = context.enter('image');
101
+ const text = node.alt || node.title;
102
+ const url = node.url
103
+ exit();
104
+
105
+ if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
106
+
107
+ return text
108
+ ? `[${escapeSymbols(text)}](${escapeSymbols(url, 'link')})`
109
+ : `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
110
+ },
111
+
112
+ imageReference: (node, _parent, context) => {
113
+ const exit = context.enter('imageReference');
114
+ const definition = definitions[node.identifier];
115
+ const text = node.alt || (definition ? definition.title : null);
116
+ exit();
117
+
118
+ if (!definition || !isURL(definition.url)) return escapeSymbols(text);
119
+
120
+ return text
121
+ ? `[${escapeSymbols(text)}](${escapeSymbols(definition.url, 'link')})`
122
+ : `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
123
+ },
124
+
125
+ text: (node, _parent, context) => {
126
+ const exit = context.enter('text');
127
+ const text = node.value;
128
+ exit();
129
+
130
+ return escapeSymbols(text);
131
+ },
132
+
133
+ blockquote: (node, _parent, context) =>
134
+ processUnsupportedTags(defaultHandlers.blockquote(node, _parent, context), unsupportedTagsStrategy),
135
+ html: (node, _parent, context) =>
136
+ processUnsupportedTags(defaultHandlers.html(node, _parent, context), unsupportedTagsStrategy),
137
+ table: (node, _parent, context) =>
138
+ processUnsupportedTags(gfmTableToMarkdown().handlers.table(node, _parent, context), unsupportedTagsStrategy),
139
+ thematicBreak: (_node, _parent, _context) =>
140
+ processUnsupportedTags('---', unsupportedTagsStrategy),
141
+ });
142
+
143
+ /**
144
+ * Creates options to be passed into a `remark-stringify` processor that tailor
145
+ * the output for Telegram Markdown.
146
+ *
147
+ * @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
148
+ * Record of `Definition`s in the Markdown document, keyed by identifier.
149
+ *
150
+ * @returns {import('remark-stringify').RemarkStringifyOptions}
151
+ */
152
+ const createOptions = (definitions, unsupportedTagsStrategy) => ({
153
+ bullet: '*',
154
+ tightDefinitions: true,
155
+ handlers: createHandlers(definitions, unsupportedTagsStrategy),
156
+ });
157
+
158
+ export default createOptions;
package/lib/utils.js CHANGED
@@ -1,76 +1,74 @@
1
1
  import { URL } from 'url';
2
2
 
3
3
  export function wrap(string, ...wrappers) {
4
- return [
5
- ...wrappers,
6
- string,
7
- ...wrappers.reverse(),
8
- ].join('');
4
+ return [
5
+ ...wrappers,
6
+ string,
7
+ ...wrappers.reverse(),
8
+ ].join('');
9
9
  }
10
10
 
11
11
  export function isURL(string) {
12
- try {
13
- return Boolean(new URL(string));
14
- } catch (error) {
15
- return false;
16
- }
12
+ try {
13
+ return Boolean(new URL(string));
14
+ } catch (error) {
15
+ return false;
16
+ }
17
17
  }
18
18
 
19
19
  export function escapeSymbols(text, textType = 'text') {
20
- if (!text) {
21
- return text;
22
- }
23
- switch (textType) {
24
- case 'code':
25
- return text
26
- .replace(/\\/g, '\\\\')
27
- .replace(/`/g, '\\`')
28
- case 'link':
29
- return text
30
- .replace(/\\/g, '\\\\')
31
- .replace(/\(/g, '\\(')
32
- .replace(/\)/g, '\\)')
33
- case 'ignore_escaped':
34
- return text.replace(/(\\.)|([_*\[\]()~`>#+\-=|{}.!])/g, (match, escaped, char) => {
35
- if (escaped) {
36
- return escaped;
37
- }
38
- return '\\' + char;
39
- });
40
- default:
41
- return text
42
- .replace(/_/g, '\\_')
43
- .replace(/\*/g, '\\*')
44
- .replace(/\[/g, '\\[')
45
- .replace(/]/g, '\\]')
46
- .replace(/\(/g, '\\(')
47
- .replace(/\)/g, '\\)')
48
- .replace(/~/g, '\\~')
49
- .replace(/`/g, '\\`')
50
- .replace(/>/g, '\\>')
51
- .replace(/#/g, '\\#')
52
- .replace(/\+/g, '\\+')
53
- .replace(/-/g, '\\-')
54
- .replace(/=/g, '\\=')
55
- .replace(/\|/g, '\\|')
56
- .replace(/{/g, '\\{')
57
- .replace(/}/g, '\\}')
58
- .replace(/\./g, '\\.')
59
- .replace(/!/g, '\\!');
20
+ if (!text) {
21
+ return text;
22
+ }
23
+ switch (textType) {
24
+ case 'code':
25
+ return text
26
+ .replace(/\\/g, '\\\\')
27
+ .replace(/`/g, '\\`')
28
+ case 'link':
29
+ return text
30
+ .replace(/\\/g, '\\\\')
31
+ .replace(/\(/g, '\\(')
32
+ .replace(/\)/g, '\\)')
33
+ case 'ignore_escaped':
34
+ return text.replace(/(\\.)|([_*\[\]()~`>#+\-=|{}.!])/g, (match, escaped, char) => {
35
+ if (escaped) {
36
+ return escaped;
37
+ }
38
+ return '\\' + char;
39
+ });
40
+ default:
41
+ return text
42
+ .replace(/_/g, '\\_')
43
+ .replace(/\*/g, '\\*')
44
+ .replace(/\[/g, '\\[')
45
+ .replace(/]/g, '\\]')
46
+ .replace(/\(/g, '\\(')
47
+ .replace(/\)/g, '\\)')
48
+ .replace(/~/g, '\\~')
49
+ .replace(/`/g, '\\`')
50
+ .replace(/>/g, '\\>')
51
+ .replace(/#/g, '\\#')
52
+ .replace(/\+/g, '\\+')
53
+ .replace(/-/g, '\\-')
54
+ .replace(/=/g, '\\=')
55
+ .replace(/\|/g, '\\|')
56
+ .replace(/{/g, '\\{')
57
+ .replace(/}/g, '\\}')
58
+ .replace(/\./g, '\\.')
59
+ .replace(/!/g, '\\!');
60
60
 
61
- }
61
+ }
62
62
  }
63
63
 
64
64
  export function processUnsupportedTags(content, strategy) {
65
- switch (strategy) {
66
- case 'escape':
67
- return escapeSymbols(content, 'ignore_escaped');
68
- case 'remove':
69
- return '';
70
- case 'keep':
71
- default:
72
- return content;
73
- }
65
+ switch (strategy) {
66
+ case 'escape':
67
+ return escapeSymbols(content, 'ignore_escaped');
68
+ case 'remove':
69
+ return '';
70
+ case 'keep':
71
+ default:
72
+ return content;
73
+ }
74
74
  }
75
-
76
-
package/package.json CHANGED
@@ -1,70 +1,71 @@
1
- {
2
- "name": "tellegram",
3
- "version": "1.0.0",
4
- "description": "Convert LLM-generated markdown into Telegram-specific markdown (MarkdownV2)",
5
- "type": "module",
6
- "main": "index.js",
7
- "scripts": {
8
- "test": "NODE_OPTIONS=--experimental-vm-modules npx jest --coverage",
9
- "lint": "eslint",
10
- "semantic-release": "semantic-release",
11
- "prepare": "husky install",
12
- "codecov": "codecov"
13
- },
14
- "files": [
15
- "README.md",
16
- "LICENSE",
17
- "index.js",
18
- "lib",
19
- "types"
20
- ],
21
- "repository": {
22
- "type": "git",
23
- "url": "git+https://github.com/leask/tellegram.git"
24
- },
25
- "keywords": [
26
- "telegram",
27
- "markdown",
28
- "telegramify",
29
- "parser",
30
- "remark",
31
- "unified"
32
- ],
33
- "types": "types/index.d.ts",
34
- "author": "Leask Wong",
35
- "license": "MIT",
36
- "bugs": {
37
- "url": "https://github.com/leask/tellegram/issues"
38
- },
39
- "homepage": "https://github.com/leask/tellegram#readme",
40
- "dependencies": {
41
- "mdast-util-gfm-table": "^0.1.6",
42
- "mdast-util-to-markdown": "^0.6.2",
43
- "remark-gfm": "^1.0.0",
44
- "remark-parse": "^9.0.0",
45
- "remark-remove-comments": "^0.2.0",
46
- "remark-stringify": "^9.0.1",
47
- "unified": "^9.0.0",
48
- "unist-util-remove": "^2.0.1",
49
- "unist-util-visit": "^2.0.3"
50
- },
51
- "devDependencies": {
52
- "@commitlint/cli": "^12.1.1",
53
- "@commitlint/config-conventional": "^12.1.1",
54
- "codecov": "^3.8.3",
55
- "eslint": "^7.24.0",
56
- "husky": "^6.0.0",
57
- "jest": "^29.3.1",
58
- "lint-staged": "^10.5.4",
59
- "prettier": "2.2.1",
60
- "semantic-release": "^17.4.2"
61
- },
62
- "lint-staged": {
63
- "*.{js,json,md}": [
64
- "prettier --write",
65
- "git add"
66
- ],
67
- "*.{css,scss,less}": "stylelint --fix",
68
- "*.js": "eslint --cache --fix"
69
- }
70
- }
1
+ {
2
+ "name": "tellegram",
3
+ "version": "1.1.2",
4
+ "description": "Convert LLM-generated markdown into Telegram-specific markdown (MarkdownV2)",
5
+ "type": "module",
6
+ "main": "index.js",
7
+ "scripts": {
8
+ "test": "NODE_OPTIONS='--experimental-vm-modules --no-deprecation' npx jest --coverage --verbose",
9
+ "lint": "eslint",
10
+ "semantic-release": "semantic-release",
11
+ "prepare": "husky install",
12
+ "codecov": "codecov"
13
+ },
14
+ "files": [
15
+ "README.md",
16
+ "LICENSE",
17
+ "index.js",
18
+ "lib",
19
+ "types"
20
+ ],
21
+ "repository": {
22
+ "type": "git",
23
+ "url": "git+https://github.com/leask/tellegram.git"
24
+ },
25
+ "keywords": [
26
+ "tellegram",
27
+ "telegram",
28
+ "markdown",
29
+ "telegramify",
30
+ "parser",
31
+ "remark",
32
+ "unified"
33
+ ],
34
+ "types": "types/index.d.ts",
35
+ "author": "Leask Wong",
36
+ "license": "MIT",
37
+ "bugs": {
38
+ "url": "https://github.com/leask/tellegram/issues"
39
+ },
40
+ "homepage": "https://github.com/leask/tellegram#readme",
41
+ "dependencies": {
42
+ "mdast-util-gfm-table": "^0.1.6",
43
+ "mdast-util-to-markdown": "^0.6.2",
44
+ "remark-gfm": "^1.0.0",
45
+ "remark-parse": "^9.0.0",
46
+ "remark-remove-comments": "^0.2.0",
47
+ "remark-stringify": "^9.0.1",
48
+ "unified": "^9.0.0",
49
+ "unist-util-remove": "^2.0.1",
50
+ "unist-util-visit": "^2.0.3"
51
+ },
52
+ "devDependencies": {
53
+ "@commitlint/cli": "^12.1.1",
54
+ "@commitlint/config-conventional": "^12.1.1",
55
+ "codecov": "^3.8.3",
56
+ "eslint": "^7.24.0",
57
+ "husky": "^6.0.0",
58
+ "jest": "^30.2.0",
59
+ "lint-staged": "^10.5.4",
60
+ "prettier": "2.2.1",
61
+ "semantic-release": "^17.4.2"
62
+ },
63
+ "lint-staged": {
64
+ "*.{js,json,md}": [
65
+ "prettier --write",
66
+ "git add"
67
+ ],
68
+ "*.{css,scss,less}": "stylelint --fix",
69
+ "*.js": "eslint --cache --fix"
70
+ }
71
+ }
package/types/index.d.ts CHANGED
@@ -1,13 +1,19 @@
1
- type UnsupportedTagsStrategy = 'escape' | 'remove' | 'keep'
2
-
3
- declare module 'tellegram' {
4
-
5
- /**
6
- * Converts markdown to Telegram's format.
7
- * @param markdown The markdown to convert.
8
- * @param unsupportedTagsStrategy The strategy to use for unsupported tags.
9
- */
10
- function convert(markdown: string, unsupportedTagsStrategy: UnsupportedTagsStrategy): string
11
-
12
- export = convert
13
- }
1
+ type UnsupportedTagsStrategy = 'escape' | 'remove' | 'keep'
2
+
3
+ declare module 'tellegram' {
4
+ /**
5
+ * Converts markdown to Telegram's format.
6
+ * @param markdown The markdown to convert.
7
+ * @param unsupportedTagsStrategy The strategy to use for unsupported tags.
8
+ */
9
+ export function convert(markdown: string, unsupportedTagsStrategy?: UnsupportedTagsStrategy): string;
10
+
11
+ /**
12
+ * Paginates text (placeholder).
13
+ * @param text The text to paginate.
14
+ */
15
+ export function paginate(text: string): string;
16
+
17
+ const defaultExport: typeof convert;
18
+ export default defaultExport;
19
+ }
@@ -1,158 +0,0 @@
1
- import defaultHandlers from 'mdast-util-to-markdown/lib/handle/index.js';
2
- import phrasing from 'mdast-util-to-markdown/lib/util/container-phrasing.js';
3
- import {toMarkdown as gfmTableToMarkdown} from 'mdast-util-gfm-table';
4
-
5
- import {wrap, isURL, escapeSymbols, processUnsupportedTags} from './utils.js';
6
-
7
- /**
8
- * Creates custom `mdast-util-to-markdown` handlers that tailor the output for
9
- * Telegram Markdown.
10
- *
11
- * @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
12
- * Record of `Definition`s in the Markdown document, keyed by identifier.
13
- *
14
- * @returns {import('mdast-util-to-markdown').Handlers}
15
- */
16
- const createHandlers = (definitions, unsupportedTagsStrategy) => ({
17
- heading: (node, _parent, context) => {
18
- // make headers to be just *strong*
19
- const marker = '*';
20
-
21
- const exit = context.enter('heading');
22
- const value = phrasing(node, context, {before: marker, after: marker});
23
- exit();
24
-
25
- return wrap(value, marker);
26
- },
27
-
28
- strong: (node, _parent, context) => {
29
- const marker = '*';
30
-
31
- const exit = context.enter('strong');
32
- const value = phrasing(node, context, {before: marker, after: marker});
33
- exit();
34
-
35
- return wrap(value, marker);
36
- },
37
-
38
- delete(node, _parent, context) {
39
- const marker = '~';
40
-
41
- const exit = context.enter('delete');
42
- const value = phrasing(node, context, {before: marker, after: marker});
43
- exit();
44
-
45
- return wrap(value, marker);
46
- },
47
-
48
- emphasis: (node, _parent, context) => {
49
- const marker = '_';
50
-
51
- const exit = context.enter('emphasis');
52
- const value = phrasing(node, context, {before: marker, after: marker});
53
- exit();
54
-
55
- return wrap(value, marker);
56
- },
57
-
58
- list: (...args) => defaultHandlers.list(...args).replace(/^(\d+)./gm, '$1\\.'),
59
-
60
- listItem: (...args) => defaultHandlers.listItem(...args).replace(/^\*/, '•'),
61
-
62
- code(node, _parent, context) {
63
- const exit = context.enter('code');
64
- // delete language prefix for deprecated markdown formatters (old Bitbucket Editor)
65
- const content = node.value.replace(/^#![a-z]+\n/, ''); // ```\n#!javascript\ncode block\n```
66
- exit();
67
-
68
- const language = node.lang || '';
69
- return `\`\`\`${language}\n${escapeSymbols(content, 'code')}\n\`\`\``;
70
- },
71
-
72
- link: (node, _parent, context) => {
73
- const exit = context.enter('link');
74
- const text = phrasing(node, context, {before: '|', after: '>'}) || escapeSymbols(node.title);
75
- const isUrlEncoded = decodeURI(node.url) !== node.url;
76
- const url = isUrlEncoded ? node.url : encodeURI(node.url);
77
- exit();
78
-
79
- if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
80
-
81
- return text
82
- ? `[${text}](${escapeSymbols(url, 'link')})`
83
- : `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
84
- },
85
-
86
- linkReference: (node, _parent, context) => {
87
- const exit = context.enter('linkReference');
88
- const definition = definitions[node.identifier];
89
- const text = phrasing(node, context, {before: '|', after: '>'}) || (definition ? definition.title : null);
90
- exit();
91
-
92
- if (!definition || !isURL(definition.url)) return escapeSymbols(text);
93
-
94
- return text
95
- ? `[${text}](${escapeSymbols(definition.url, 'link')})`
96
- : `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
97
- },
98
-
99
- image: (node, _parent, context) => {
100
- const exit = context.enter('image');
101
- const text = node.alt || node.title;
102
- const url = node.url
103
- exit();
104
-
105
- if (!isURL(url)) return escapeSymbols(text) || escapeSymbols(url);
106
-
107
- return text
108
- ? `[${escapeSymbols(text)}](${escapeSymbols(url, 'link')})`
109
- : `[${escapeSymbols(url)}](${escapeSymbols(url, 'link')})`;
110
- },
111
-
112
- imageReference: (node, _parent, context) => {
113
- const exit = context.enter('imageReference');
114
- const definition = definitions[node.identifier];
115
- const text = node.alt || (definition ? definition.title : null);
116
- exit();
117
-
118
- if (!definition || !isURL(definition.url)) return escapeSymbols(text);
119
-
120
- return text
121
- ? `[${escapeSymbols(text)}](${escapeSymbols(definition.url, 'link')})`
122
- : `[${escapeSymbols(definition.url)}](${escapeSymbols(definition.url, 'link')})`;
123
- },
124
-
125
- text: (node, _parent, context) => {
126
- const exit = context.enter('text');
127
- const text = node.value;
128
- exit();
129
-
130
- return escapeSymbols(text);
131
- },
132
-
133
- blockquote: (node, _parent, context) =>
134
- processUnsupportedTags(defaultHandlers.blockquote(node, _parent, context), unsupportedTagsStrategy),
135
- html: (node, _parent, context) =>
136
- processUnsupportedTags(defaultHandlers.html(node, _parent, context), unsupportedTagsStrategy),
137
- table: (node, _parent, context) =>
138
- processUnsupportedTags(gfmTableToMarkdown().handlers.table(node, _parent, context), unsupportedTagsStrategy),
139
- thematicBreak: (_node, _parent, _context) =>
140
- processUnsupportedTags('---', unsupportedTagsStrategy),
141
- });
142
-
143
- /**
144
- * Creates options to be passed into a `remark-stringify` processor that tailor
145
- * the output for Telegram Markdown.
146
- *
147
- * @param {Readonly<Record<string, { title: null | string, url: string }>>} definitions
148
- * Record of `Definition`s in the Markdown document, keyed by identifier.
149
- *
150
- * @returns {import('remark-stringify').RemarkStringifyOptions}
151
- */
152
- const createOptions = (definitions, unsupportedTagsStrategy) => ({
153
- bullet: '*',
154
- tightDefinitions: true,
155
- handlers: createHandlers(definitions, unsupportedTagsStrategy),
156
- });
157
-
158
- export default createOptions;