npm - @inceptionstack/roundhouse - Versions diffs - 0.3.21 → 0.3.23 - Mend

@inceptionstack/roundhouse 0.3.21 → 0.3.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/src/telegram-format.ts +138 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@inceptionstack/roundhouse",
-  "version": "0.3.21",
+  "version": "0.3.23",
   "type": "module",
   "description": "Multi-platform chat gateway that routes messages through a configured AI agent",
   "license": "MIT",

package/src/telegram-format.ts CHANGED Viewed

@@ -93,6 +93,128 @@ export function truncateHtmlSafe(html: string, limit: number): string {
   return html.slice(0, safeEnd) + "...";
 }
+/**
+ * Convert a markdown table into a <pre>-wrapped, column-aligned monospace table.
+ * Parses the header row, skips the separator row, and pads all columns to uniform width.
+ */
+function formatTable(tableMd: string): string {
+  const lines = tableMd.trim().split("\n");
+  if (lines.length < 2) return `<pre>${escapeHtml(tableMd)}</pre>`;
+  // Parse rows: split by | and trim each cell
+  const parseRow = (line: string): string[] =>
+    line.replace(/^\|/, "").replace(/\|$/, "").split("|").map(c => c.trim());
+  const headerCells = parseRow(lines[0]);
+  // lines[1] is the separator row (|---|---|) — skip it
+  const dataRows = lines.slice(2).map(parseRow);
+  const colCount = headerCells.length;
+  // Normalize rows to exactly colCount columns
+  const normalize = (cells: string[]): string[] =>
+    Array.from({ length: colCount }, (_, i) => cells[i] ?? "");
+  const rawHeader = normalize(headerCells);
+  const rawDataRows = dataRows.map(normalize);
+  const allRows = [rawHeader, ...rawDataRows];
+  // Display width of a single Unicode code point in a monospace font.
+  // Emoji and CJK characters typically occupy 2 columns.
+  const codePointWidth = (cp: number): number => {
+    // Zero-width characters
+    if (cp === 0x200B || cp === 0x200C || cp === 0x200D || cp === 0xFEFF) return 0;
+    // Combining marks (zero-width modifiers)
+    if (cp >= 0x0300 && cp <= 0x036F) return 0;  // Combining Diacritical Marks
+    if (cp >= 0x1AB0 && cp <= 0x1AFF) return 0;  // Combining Diacritical Marks Extended
+    if (cp >= 0x1DC0 && cp <= 0x1DFF) return 0;  // Combining Diacritical Marks Supplement
+    if (cp >= 0x20D0 && cp <= 0x20FF) return 0;  // Combining Diacritical Marks for Symbols (includes U+20E3 keycap)
+    if (cp >= 0xFE20 && cp <= 0xFE2F) return 0;  // Combining Half Marks
+    // Variation selectors
+    if (cp >= 0xFE00 && cp <= 0xFE0F) return 0;
+    // Tags block (used in flag sequences etc)
+    if (cp >= 0xE0001 && cp <= 0xE007F) return 0;
+    // Emoji (common ranges)
+    if (cp >= 0x1F100 && cp <= 0x1FAFF) return 2;
+    if (cp >= 0x231A && cp <= 0x23FF) return 2;
+    if (cp >= 0x2600 && cp <= 0x27BF) return 2;
+    if (cp >= 0x2B50 && cp <= 0x2B55) return 2;
+    // CJK Unified Ideographs
+    if (cp >= 0x3400 && cp <= 0x4DBF) return 2;
+    if (cp >= 0x4E00 && cp <= 0x9FFF) return 2;
+    if (cp >= 0xF900 && cp <= 0xFAFF) return 2;
+    if (cp >= 0x20000 && cp <= 0x2FA1F) return 2;
+    // Fullwidth forms
+    if (cp >= 0xFF01 && cp <= 0xFF60) return 2;
+    if (cp >= 0xFFE0 && cp <= 0xFFE6) return 2;
+    // Hangul
+    if (cp >= 0xAC00 && cp <= 0xD7AF) return 2;
+    return 1;
+  };
+  // Display width of a grapheme cluster (accounts for ZWJ sequences, emoji, CJK)
+  const segmenter = new Intl.Segmenter();
+  const graphemeDisplayWidth = (grapheme: string): number => {
+    // ZWJ emoji sequences: multiple code points but render as a single wide emoji
+    if (grapheme.includes('\u200D')) return 2;
+    // Single code point: use lookup
+    const cps = Array.from(grapheme);
+    if (cps.length === 1) return codePointWidth(cps[0].codePointAt(0)!);
+    // Multi-codepoint grapheme (e.g. emoji + variation selector): width of the base
+    let width = 0;
+    for (const cp of cps) {
+      width = Math.max(width, codePointWidth(cp.codePointAt(0)!));
+    }
+    return width || 1;
+  };
+  // Display width of a full string (sum of grapheme display widths)
+  const displayWidth = (s: string): number => {
+    let w = 0;
+    for (const { segment } of segmenter.segment(s)) {
+      w += graphemeDisplayWidth(segment);
+    }
+    return w;
+  };
+  // Calculate max *display* width for each column (on unescaped text,
+  // since Telegram renders entities back to their visual form in <pre>)
+  const colWidths: number[] = [];
+  for (let c = 0; c < colCount; c++) {
+    let max = 0;
+    for (const row of allRows) {
+      max = Math.max(max, displayWidth(row[c]));
+    }
+    colWidths.push(max);
+  }
+  // Pad an escaped cell so it visually aligns to `width` display columns.
+  // Spaces are 1 display column each, so we add (target - actual) spaces.
+  const padCell = (rawText: string, width: number): string => {
+    const escaped = escapeHtml(rawText);
+    const dw = displayWidth(rawText);
+    return escaped + " ".repeat(Math.max(0, width - dw));
+  };
+  // Build formatted rows
+  const formatRow = (cells: string[]): string =>
+    "│ " + cells.map((cell, i) => padCell(cell, colWidths[i])).join(" │ ") + " │";
+  const separator = "├─" + colWidths.map(w => "─".repeat(w)).join("─┼─") + "─┤";
+  const topBorder = "┌─" + colWidths.map(w => "─".repeat(w)).join("─┬─") + "─┐";
+  const bottomBorder = "└─" + colWidths.map(w => "─".repeat(w)).join("─┴─") + "─┘";
+  // Cells are escaped inside padCell; box-drawing chars are HTML-safe.
+  const result = [
+    topBorder,
+    formatRow(rawHeader),
+    separator,
+    ...rawDataRows.map(formatRow),
+    bottomBorder,
+  ].join("\n");
+  return `<pre>${result}</pre>`;
+}
 /**
  * Convert markdown text to Telegram-compatible HTML.
  * Handles code blocks first (to avoid processing markdown inside them),
@@ -105,6 +227,7 @@ export function markdownToTelegramHtml(md: string): string {
   const RE = (kind: string) => new RegExp(`\\x00${sentinel}_${kind}_(\\d+)\\x00`, "g");
   // Extract fenced code blocks first to protect their contents
+  // (must happen before table extraction to avoid nested <pre> tags)
   const codeBlocks: string[] = [];
   let processed = md.replace(/```(\w*)\n?([\s\S]*?)```/g, (_match, _lang, code) => {
     const idx = codeBlocks.length;
@@ -112,6 +235,20 @@ export function markdownToTelegramHtml(md: string): string {
     return S("CB", idx);
   });
+  // Extract markdown tables (now safe — code blocks are already sentinelled out)
+  const tables: string[] = [];
+  processed = processed.replace(
+    /(?:^|\n)(\|.+\|\n\|[-| :]+\|\n(?:\|.+\|(?:\n|$))+)/g,
+    (match) => {
+      const idx = tables.length;
+      const leadingNewline = match.startsWith("\n") ? "\n" : "";
+      const trailingNewline = match.endsWith("\n") ? "\n" : "";
+      const tableContent = match.replace(/^\n/, "").replace(/\n$/, "");
+      tables.push(formatTable(tableContent));
+      return leadingNewline + S("TB", idx) + trailingNewline;
+    },
+  );
   // Extract inline code to protect contents
   const inlineCodes: string[] = [];
   processed = processed.replace(/`([^`\n]+)`/g, (_match, code) => {
@@ -166,6 +303,7 @@ export function markdownToTelegramHtml(md: string): string {
   processed = processed.replace(RE("LK"), (_match, idx) => links[parseInt(idx, 10)]);
   processed = processed.replace(RE("IC"), (_match, idx) => inlineCodes[parseInt(idx, 10)]);
   processed = processed.replace(RE("CB"), (_match, idx) => codeBlocks[parseInt(idx, 10)]);
+  processed = processed.replace(RE("TB"), (_match, idx) => tables[parseInt(idx, 10)]);
   return processed;
 }