@inceptionstack/roundhouse 0.3.22 → 0.3.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/telegram-format.ts +62 -9
package/package.json
CHANGED
package/src/telegram-format.ts
CHANGED
|
@@ -118,28 +118,81 @@ function formatTable(tableMd: string): string {
|
|
|
118
118
|
const rawDataRows = dataRows.map(normalize);
|
|
119
119
|
const allRows = [rawHeader, ...rawDataRows];
|
|
120
120
|
|
|
121
|
-
//
|
|
121
|
+
// Display width of a single Unicode code point in a monospace font.
|
|
122
|
+
// Emoji and CJK characters typically occupy 2 columns.
|
|
123
|
+
const codePointWidth = (cp: number): number => {
|
|
124
|
+
// Zero-width characters
|
|
125
|
+
if (cp === 0x200B || cp === 0x200C || cp === 0x200D || cp === 0xFEFF) return 0;
|
|
126
|
+
// Combining marks (zero-width modifiers)
|
|
127
|
+
if (cp >= 0x0300 && cp <= 0x036F) return 0; // Combining Diacritical Marks
|
|
128
|
+
if (cp >= 0x1AB0 && cp <= 0x1AFF) return 0; // Combining Diacritical Marks Extended
|
|
129
|
+
if (cp >= 0x1DC0 && cp <= 0x1DFF) return 0; // Combining Diacritical Marks Supplement
|
|
130
|
+
if (cp >= 0x20D0 && cp <= 0x20FF) return 0; // Combining Diacritical Marks for Symbols (includes U+20E3 keycap)
|
|
131
|
+
if (cp >= 0xFE20 && cp <= 0xFE2F) return 0; // Combining Half Marks
|
|
132
|
+
// Variation selectors
|
|
133
|
+
if (cp >= 0xFE00 && cp <= 0xFE0F) return 0;
|
|
134
|
+
// Tags block (used in flag sequences etc)
|
|
135
|
+
if (cp >= 0xE0001 && cp <= 0xE007F) return 0;
|
|
136
|
+
// Emoji (common ranges)
|
|
137
|
+
if (cp >= 0x1F100 && cp <= 0x1FAFF) return 2;
|
|
138
|
+
if (cp >= 0x231A && cp <= 0x23FF) return 2;
|
|
139
|
+
if (cp >= 0x2600 && cp <= 0x27BF) return 2;
|
|
140
|
+
if (cp >= 0x2B50 && cp <= 0x2B55) return 2;
|
|
141
|
+
// CJK Unified Ideographs
|
|
142
|
+
if (cp >= 0x3400 && cp <= 0x4DBF) return 2;
|
|
143
|
+
if (cp >= 0x4E00 && cp <= 0x9FFF) return 2;
|
|
144
|
+
if (cp >= 0xF900 && cp <= 0xFAFF) return 2;
|
|
145
|
+
if (cp >= 0x20000 && cp <= 0x2FA1F) return 2;
|
|
146
|
+
// Fullwidth forms
|
|
147
|
+
if (cp >= 0xFF01 && cp <= 0xFF60) return 2;
|
|
148
|
+
if (cp >= 0xFFE0 && cp <= 0xFFE6) return 2;
|
|
149
|
+
// Hangul
|
|
150
|
+
if (cp >= 0xAC00 && cp <= 0xD7AF) return 2;
|
|
151
|
+
return 1;
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// Display width of a grapheme cluster (accounts for ZWJ sequences, emoji, CJK)
|
|
122
155
|
const segmenter = new Intl.Segmenter();
|
|
123
|
-
const
|
|
156
|
+
const graphemeDisplayWidth = (grapheme: string): number => {
|
|
157
|
+
// ZWJ emoji sequences: multiple code points but render as a single wide emoji
|
|
158
|
+
if (grapheme.includes('\u200D')) return 2;
|
|
159
|
+
// Single code point: use lookup
|
|
160
|
+
const cps = Array.from(grapheme);
|
|
161
|
+
if (cps.length === 1) return codePointWidth(cps[0].codePointAt(0)!);
|
|
162
|
+
// Multi-codepoint grapheme (e.g. emoji + variation selector): width of the base
|
|
163
|
+
let width = 0;
|
|
164
|
+
for (const cp of cps) {
|
|
165
|
+
width = Math.max(width, codePointWidth(cp.codePointAt(0)!));
|
|
166
|
+
}
|
|
167
|
+
return width || 1;
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
// Display width of a full string (sum of grapheme display widths)
|
|
171
|
+
const displayWidth = (s: string): number => {
|
|
172
|
+
let w = 0;
|
|
173
|
+
for (const { segment } of segmenter.segment(s)) {
|
|
174
|
+
w += graphemeDisplayWidth(segment);
|
|
175
|
+
}
|
|
176
|
+
return w;
|
|
177
|
+
};
|
|
124
178
|
|
|
125
|
-
// Calculate max *
|
|
179
|
+
// Calculate max *display* width for each column (on unescaped text,
|
|
126
180
|
// since Telegram renders entities back to their visual form in <pre>)
|
|
127
181
|
const colWidths: number[] = [];
|
|
128
182
|
for (let c = 0; c < colCount; c++) {
|
|
129
183
|
let max = 0;
|
|
130
184
|
for (const row of allRows) {
|
|
131
|
-
max = Math.max(max,
|
|
185
|
+
max = Math.max(max, displayWidth(row[c]));
|
|
132
186
|
}
|
|
133
187
|
colWidths.push(max);
|
|
134
188
|
}
|
|
135
189
|
|
|
136
|
-
// Pad an escaped cell so it visually aligns to `width`
|
|
137
|
-
//
|
|
138
|
-
// 1 char both in HTML source and visually.
|
|
190
|
+
// Pad an escaped cell so it visually aligns to `width` display columns.
|
|
191
|
+
// Spaces are 1 display column each, so we add (target - actual) spaces.
|
|
139
192
|
const padCell = (rawText: string, width: number): string => {
|
|
140
193
|
const escaped = escapeHtml(rawText);
|
|
141
|
-
const
|
|
142
|
-
return escaped + " ".repeat(Math.max(0, width -
|
|
194
|
+
const dw = displayWidth(rawText);
|
|
195
|
+
return escaped + " ".repeat(Math.max(0, width - dw));
|
|
143
196
|
};
|
|
144
197
|
|
|
145
198
|
// Build formatted rows
|