@inceptionstack/roundhouse 0.3.21 → 0.3.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/telegram-format.ts +138 -0
package/package.json
CHANGED
package/src/telegram-format.ts
CHANGED
|
@@ -93,6 +93,128 @@ export function truncateHtmlSafe(html: string, limit: number): string {
|
|
|
93
93
|
return html.slice(0, safeEnd) + "...";
|
|
94
94
|
}
|
|
95
95
|
|
|
96
|
+
/**
|
|
97
|
+
* Convert a markdown table into a <pre>-wrapped, column-aligned monospace table.
|
|
98
|
+
* Parses the header row, skips the separator row, and pads all columns to uniform width.
|
|
99
|
+
*/
|
|
100
|
+
function formatTable(tableMd: string): string {
|
|
101
|
+
const lines = tableMd.trim().split("\n");
|
|
102
|
+
if (lines.length < 2) return `<pre>${escapeHtml(tableMd)}</pre>`;
|
|
103
|
+
|
|
104
|
+
// Parse rows: split by | and trim each cell
|
|
105
|
+
const parseRow = (line: string): string[] =>
|
|
106
|
+
line.replace(/^\|/, "").replace(/\|$/, "").split("|").map(c => c.trim());
|
|
107
|
+
|
|
108
|
+
const headerCells = parseRow(lines[0]);
|
|
109
|
+
// lines[1] is the separator row (|---|---|) — skip it
|
|
110
|
+
const dataRows = lines.slice(2).map(parseRow);
|
|
111
|
+
const colCount = headerCells.length;
|
|
112
|
+
|
|
113
|
+
// Normalize rows to exactly colCount columns
|
|
114
|
+
const normalize = (cells: string[]): string[] =>
|
|
115
|
+
Array.from({ length: colCount }, (_, i) => cells[i] ?? "");
|
|
116
|
+
|
|
117
|
+
const rawHeader = normalize(headerCells);
|
|
118
|
+
const rawDataRows = dataRows.map(normalize);
|
|
119
|
+
const allRows = [rawHeader, ...rawDataRows];
|
|
120
|
+
|
|
121
|
+
// Display width of a single Unicode code point in a monospace font.
|
|
122
|
+
// Emoji and CJK characters typically occupy 2 columns.
|
|
123
|
+
const codePointWidth = (cp: number): number => {
|
|
124
|
+
// Zero-width characters
|
|
125
|
+
if (cp === 0x200B || cp === 0x200C || cp === 0x200D || cp === 0xFEFF) return 0;
|
|
126
|
+
// Combining marks (zero-width modifiers)
|
|
127
|
+
if (cp >= 0x0300 && cp <= 0x036F) return 0; // Combining Diacritical Marks
|
|
128
|
+
if (cp >= 0x1AB0 && cp <= 0x1AFF) return 0; // Combining Diacritical Marks Extended
|
|
129
|
+
if (cp >= 0x1DC0 && cp <= 0x1DFF) return 0; // Combining Diacritical Marks Supplement
|
|
130
|
+
if (cp >= 0x20D0 && cp <= 0x20FF) return 0; // Combining Diacritical Marks for Symbols (includes U+20E3 keycap)
|
|
131
|
+
if (cp >= 0xFE20 && cp <= 0xFE2F) return 0; // Combining Half Marks
|
|
132
|
+
// Variation selectors
|
|
133
|
+
if (cp >= 0xFE00 && cp <= 0xFE0F) return 0;
|
|
134
|
+
// Tags block (used in flag sequences etc)
|
|
135
|
+
if (cp >= 0xE0001 && cp <= 0xE007F) return 0;
|
|
136
|
+
// Emoji (common ranges)
|
|
137
|
+
if (cp >= 0x1F100 && cp <= 0x1FAFF) return 2;
|
|
138
|
+
if (cp >= 0x231A && cp <= 0x23FF) return 2;
|
|
139
|
+
if (cp >= 0x2600 && cp <= 0x27BF) return 2;
|
|
140
|
+
if (cp >= 0x2B50 && cp <= 0x2B55) return 2;
|
|
141
|
+
// CJK Unified Ideographs
|
|
142
|
+
if (cp >= 0x3400 && cp <= 0x4DBF) return 2;
|
|
143
|
+
if (cp >= 0x4E00 && cp <= 0x9FFF) return 2;
|
|
144
|
+
if (cp >= 0xF900 && cp <= 0xFAFF) return 2;
|
|
145
|
+
if (cp >= 0x20000 && cp <= 0x2FA1F) return 2;
|
|
146
|
+
// Fullwidth forms
|
|
147
|
+
if (cp >= 0xFF01 && cp <= 0xFF60) return 2;
|
|
148
|
+
if (cp >= 0xFFE0 && cp <= 0xFFE6) return 2;
|
|
149
|
+
// Hangul
|
|
150
|
+
if (cp >= 0xAC00 && cp <= 0xD7AF) return 2;
|
|
151
|
+
return 1;
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// Display width of a grapheme cluster (accounts for ZWJ sequences, emoji, CJK)
|
|
155
|
+
const segmenter = new Intl.Segmenter();
|
|
156
|
+
const graphemeDisplayWidth = (grapheme: string): number => {
|
|
157
|
+
// ZWJ emoji sequences: multiple code points but render as a single wide emoji
|
|
158
|
+
if (grapheme.includes('\u200D')) return 2;
|
|
159
|
+
// Single code point: use lookup
|
|
160
|
+
const cps = Array.from(grapheme);
|
|
161
|
+
if (cps.length === 1) return codePointWidth(cps[0].codePointAt(0)!);
|
|
162
|
+
// Multi-codepoint grapheme (e.g. emoji + variation selector): width of the base
|
|
163
|
+
let width = 0;
|
|
164
|
+
for (const cp of cps) {
|
|
165
|
+
width = Math.max(width, codePointWidth(cp.codePointAt(0)!));
|
|
166
|
+
}
|
|
167
|
+
return width || 1;
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
// Display width of a full string (sum of grapheme display widths)
|
|
171
|
+
const displayWidth = (s: string): number => {
|
|
172
|
+
let w = 0;
|
|
173
|
+
for (const { segment } of segmenter.segment(s)) {
|
|
174
|
+
w += graphemeDisplayWidth(segment);
|
|
175
|
+
}
|
|
176
|
+
return w;
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// Calculate max *display* width for each column (on unescaped text,
|
|
180
|
+
// since Telegram renders entities back to their visual form in <pre>)
|
|
181
|
+
const colWidths: number[] = [];
|
|
182
|
+
for (let c = 0; c < colCount; c++) {
|
|
183
|
+
let max = 0;
|
|
184
|
+
for (const row of allRows) {
|
|
185
|
+
max = Math.max(max, displayWidth(row[c]));
|
|
186
|
+
}
|
|
187
|
+
colWidths.push(max);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
// Pad an escaped cell so it visually aligns to `width` display columns.
|
|
191
|
+
// Spaces are 1 display column each, so we add (target - actual) spaces.
|
|
192
|
+
const padCell = (rawText: string, width: number): string => {
|
|
193
|
+
const escaped = escapeHtml(rawText);
|
|
194
|
+
const dw = displayWidth(rawText);
|
|
195
|
+
return escaped + " ".repeat(Math.max(0, width - dw));
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
// Build formatted rows
|
|
199
|
+
const formatRow = (cells: string[]): string =>
|
|
200
|
+
"│ " + cells.map((cell, i) => padCell(cell, colWidths[i])).join(" │ ") + " │";
|
|
201
|
+
|
|
202
|
+
const separator = "├─" + colWidths.map(w => "─".repeat(w)).join("─┼─") + "─┤";
|
|
203
|
+
const topBorder = "┌─" + colWidths.map(w => "─".repeat(w)).join("─┬─") + "─┐";
|
|
204
|
+
const bottomBorder = "└─" + colWidths.map(w => "─".repeat(w)).join("─┴─") + "─┘";
|
|
205
|
+
|
|
206
|
+
// Cells are escaped inside padCell; box-drawing chars are HTML-safe.
|
|
207
|
+
const result = [
|
|
208
|
+
topBorder,
|
|
209
|
+
formatRow(rawHeader),
|
|
210
|
+
separator,
|
|
211
|
+
...rawDataRows.map(formatRow),
|
|
212
|
+
bottomBorder,
|
|
213
|
+
].join("\n");
|
|
214
|
+
|
|
215
|
+
return `<pre>${result}</pre>`;
|
|
216
|
+
}
|
|
217
|
+
|
|
96
218
|
/**
|
|
97
219
|
* Convert markdown text to Telegram-compatible HTML.
|
|
98
220
|
* Handles code blocks first (to avoid processing markdown inside them),
|
|
@@ -105,6 +227,7 @@ export function markdownToTelegramHtml(md: string): string {
|
|
|
105
227
|
const RE = (kind: string) => new RegExp(`\\x00${sentinel}_${kind}_(\\d+)\\x00`, "g");
|
|
106
228
|
|
|
107
229
|
// Extract fenced code blocks first to protect their contents
|
|
230
|
+
// (must happen before table extraction to avoid nested <pre> tags)
|
|
108
231
|
const codeBlocks: string[] = [];
|
|
109
232
|
let processed = md.replace(/```(\w*)\n?([\s\S]*?)```/g, (_match, _lang, code) => {
|
|
110
233
|
const idx = codeBlocks.length;
|
|
@@ -112,6 +235,20 @@ export function markdownToTelegramHtml(md: string): string {
|
|
|
112
235
|
return S("CB", idx);
|
|
113
236
|
});
|
|
114
237
|
|
|
238
|
+
// Extract markdown tables (now safe — code blocks are already sentinelled out)
|
|
239
|
+
const tables: string[] = [];
|
|
240
|
+
processed = processed.replace(
|
|
241
|
+
/(?:^|\n)(\|.+\|\n\|[-| :]+\|\n(?:\|.+\|(?:\n|$))+)/g,
|
|
242
|
+
(match) => {
|
|
243
|
+
const idx = tables.length;
|
|
244
|
+
const leadingNewline = match.startsWith("\n") ? "\n" : "";
|
|
245
|
+
const trailingNewline = match.endsWith("\n") ? "\n" : "";
|
|
246
|
+
const tableContent = match.replace(/^\n/, "").replace(/\n$/, "");
|
|
247
|
+
tables.push(formatTable(tableContent));
|
|
248
|
+
return leadingNewline + S("TB", idx) + trailingNewline;
|
|
249
|
+
},
|
|
250
|
+
);
|
|
251
|
+
|
|
115
252
|
// Extract inline code to protect contents
|
|
116
253
|
const inlineCodes: string[] = [];
|
|
117
254
|
processed = processed.replace(/`([^`\n]+)`/g, (_match, code) => {
|
|
@@ -166,6 +303,7 @@ export function markdownToTelegramHtml(md: string): string {
|
|
|
166
303
|
processed = processed.replace(RE("LK"), (_match, idx) => links[parseInt(idx, 10)]);
|
|
167
304
|
processed = processed.replace(RE("IC"), (_match, idx) => inlineCodes[parseInt(idx, 10)]);
|
|
168
305
|
processed = processed.replace(RE("CB"), (_match, idx) => codeBlocks[parseInt(idx, 10)]);
|
|
306
|
+
processed = processed.replace(RE("TB"), (_match, idx) => tables[parseInt(idx, 10)]);
|
|
169
307
|
|
|
170
308
|
return processed;
|
|
171
309
|
}
|