npm - @polotno/pdf-export - Versions diffs - 0.1.30 → 0.1.31 - Mend

@polotno/pdf-export 0.1.30 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/lib/text.js +203 -14
package/package.json +1 -1

package/lib/text.js CHANGED Viewed

@@ -3,12 +3,129 @@ import getUrls from 'get-urls';
 import fetch from 'node-fetch';
 import { stripHtml } from 'string-strip-html';
 import { decode as decodeEntities } from 'html-entities';
+/**
+ * Expand tabs to spaces based on tab stops (every 8 characters by default, matching HTML behavior)
+ * This ensures that tabs align to tab stops, so deleting characters before tabs doesn't affect
+ * the position of text after tabs.
+ *
+ * TODO: KNOWN LIMITATION - This doesn't match Chrome/browser behavior correctly!
+ *
+ * CURRENT LOGIC (character-based):
+ * - Counts characters: "01\t" → "01      " (6 spaces to reach position 8)
+ * - Problem: In proportional fonts, "01" visually takes ~15px but we treat it as 2 chars
+ * - Result: Tabs misalign because visual width ≠ character count
+ *
+ * ACTUAL CHROME BEHAVIOR (visual/pixel-based):
+ * - Measures visual width: "01" = 15px, single space = 5px
+ * - Tab stop at: 8 spaces × 5px = 40px
+ * - "01\t" should advance from 15px → 40px (add 25px, or ~5 spaces)
+ * - "\t" should advance from 0px → 40px (add 40px, or 8 spaces)
+ * - Both end at same VISUAL position (40px), not same character position
+ *
+ * HOW TO FIX (future work):
+ * 1. Create `expandTabsWithVisualWidth(text, doc, textOptions)` that:
+ *    - Measures actual text width character-by-character using doc.widthOfString()
+ *    - Calculates tab stops as multiples of (spaceWidth × 8)
+ *    - For each tab, determines visual advance needed to reach next tab stop
+ * 2. In rendering (renderTextFill, renderStandardStroke, renderPDFX1aStroke):
+ *    - Split segments at tab characters
+ *    - Replace each tab with N spaces
+ *    - Use PDFKit's wordSpacing option to stretch/shrink those spaces to exact width
+ *    - Example: Need 25px advance → use 5 spaces + wordSpacing adjustment
+ * 3. In line breaking (splitTextIntoLines):
+ *    - Use visual width measurement for all width calculations
+ *    - Ensure wrapped lines maintain accurate widths
+ *
+ * CHALLENGES:
+ * - Must measure with correct font for each styled segment (bold/italic affects width)
+ * - wordSpacing interacts with justify alignment - need careful handling
+ * - Line breaking must use same width calculations as rendering
+ * - Performance: width measurement is expensive, may need caching
+ *
+ * For now, we use character-based expansion which approximately matches monospace fonts
+ * but misaligns in proportional fonts like Roboto/Arial. This is a known issue.
+ *
+ * @param text - Text containing tabs to expand
+ * @param tabSize - Size of tab stops (default 8, matching HTML)
+ * @param startPosition - Starting character position for tab stop calculation (default 0)
+ * @returns Text with tabs expanded to spaces (character-based approximation)
+ */
+function expandTabsToTabStops(text, tabSize = 8, startPosition = 0) {
+    if (!text) {
+        return text;
+    }
+    let result = '';
+    let position = startPosition; // Current character position
+    for (let i = 0; i < text.length; i++) {
+        const char = text[i];
+        if (char === '\t') {
+            // Calculate how many spaces needed to reach next tab stop
+            const spacesNeeded = tabSize - (position % tabSize);
+            result += ' '.repeat(spacesNeeded);
+            position += spacesNeeded;
+        }
+        else if (char === '\n') {
+            // Reset position on newline (tab stops reset at line start)
+            result += char;
+            position = 0;
+        }
+        else {
+            result += char;
+            position++;
+        }
+    }
+    return result;
+}
+/**
+ * Expand tabs to spaces based on actual text width measurements (for PDF rendering)
+ * This ensures tabs align to visual tab stops based on actual font metrics, not character count.
+ * @param text - Text containing tabs to expand
+ * @param doc - PDFKit document for measuring text width
+ * @param textOptions - PDFKit text options (font, size, etc.)
+ * @param tabSizeInSpaces - Number of spaces per tab stop (default 8)
+ * @param currentWidth - Current text width in points (default 0)
+ * @returns Object with expanded text and final width
+ */
+function expandTabsToTabStopsByWidth(text, doc, textOptions, tabSizeInSpaces = 8, currentWidth = 0) {
+    if (!text) {
+        return { text, width: currentWidth };
+    }
+    // Measure the width of one space character
+    const spaceWidth = doc.widthOfString(' ', textOptions);
+    const tabStopWidth = spaceWidth * tabSizeInSpaces;
+    let result = '';
+    let width = currentWidth;
+    for (let i = 0; i < text.length; i++) {
+        const char = text[i];
+        if (char === '\t') {
+            // Calculate how many spaces needed to reach next tab stop based on actual width
+            const currentTabPosition = width % tabStopWidth;
+            const spacesNeeded = Math.ceil((tabStopWidth - currentTabPosition) / spaceWidth);
+            const spaces = ' '.repeat(spacesNeeded);
+            result += spaces;
+            width += doc.widthOfString(spaces, textOptions);
+        }
+        else if (char === '\n') {
+            // Reset width on newline (tab stops reset at line start)
+            result += char;
+            width = 0;
+        }
+        else {
+            result += char;
+            // Measure the actual width of this character
+            const charWidth = doc.widthOfString(char, textOptions);
+            width += charWidth;
+        }
+    }
+    return { text: result, width };
+}
 function decodeHtmlEntities(text) {
     if (!text) {
         return text;
     }
     const decoded = decodeEntities(text);
-    return decoded.replace(/\t/g, ' ');
+    // Don't replace tabs here - we'll handle them with expandTabsToTabStops
+    return decoded;
 }
 /**
  * Check if text contains HTML tags
@@ -26,8 +143,6 @@ function normalizeRichText(text) {
         return text;
     }
     let normalized = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
-    // Normalize tab characters into 8 spaces
-    normalized = normalized.replace(/\t/g, ' '.repeat(8));
     // Convert explicit HTML break tags into newline characters
     normalized = normalized.replace(/<br\s*\/?>/gi, '\n');
     // Treat paragraph boundaries as newlines and drop opening tags
@@ -37,6 +152,11 @@ function normalizeRichText(text) {
     normalized = normalized.replace(/\n{3,}/g, '\n\n');
     // Trim stray leading/trailing newlines introduced by paragraph conversion
     normalized = normalized.replace(/^\n+/, '').replace(/\n+$/, '');
+    // Expand tabs to tab stops AFTER processing HTML structure
+    // This preserves HTML-like tab behavior where tabs align to fixed positions
+    // so deleting characters before tabs doesn't affect the position of text after tabs
+    // Tabs are expanded in the text content only, not in HTML tags
+    normalized = expandTabsToTabStops(normalized, 8);
     // Decode common HTML non-breaking space entities into their unicode counterpart
     normalized = normalized.replace(/&(nbsp|#160|#xA0);/gi, '\u00A0');
     // Strip zero-width characters that can create missing-glyph boxes in PDF output
@@ -551,10 +671,11 @@ function splitTextIntoLines(doc, element, props) {
         // Tokenize the paragraph
         const tokens = tokenizeHTML(paragraph.html);
         // Extract plain text for width calculation
-        const plainText = tokens
+        // Expand tabs to tab stops for accurate width measurement
+        const plainText = expandTabsToTabStops(tokens
             .filter((t) => t.type === 'text')
             .map((t) => t.decodedContent ?? decodeHtmlEntities(t.content))
-            .join('');
+            .join(''), 8);
         const baseMeta = paragraph.listMeta
             ? createListLineMeta(doc, element, props, paragraph.listMeta)
             : undefined;
@@ -587,8 +708,10 @@ function splitTextIntoLines(doc, element, props) {
                     continue;
                 }
                 // Text token - split by words
+                // Don't expand tabs here - we need to preserve tabs for proper alignment
                 const rawWords = token.content.split(' ');
-                const decodedWords = (token.decodedContent ?? decodeHtmlEntities(token.content)).split(' ');
+                const decodedText = token.decodedContent ?? decodeHtmlEntities(token.content);
+                const decodedWords = decodedText.split(' ');
                 for (let i = 0; i < rawWords.length; i++) {
                     const rawWord = rawWords[i];
                     const decodedWord = decodedWords[i] ?? decodeHtmlEntities(rawWord);
@@ -597,7 +720,10 @@ function splitTextIntoLines(doc, element, props) {
                     const testLineDecoded = hasCurrentLine
                         ? `${currentLineDecoded}${separator}${decodedWord}`
                         : decodedWord;
-                    const testWidth = doc.widthOfString(testLineDecoded, props);
+                    // Expand tabs in test line for accurate width measurement
+                    // Tabs are expanded based on the full line position, maintaining tab stop alignment
+                    const testLineExpanded = expandTabsToTabStops(testLineDecoded, 8);
+                    const testWidth = doc.widthOfString(testLineExpanded, props);
                     if (testWidth <= availableWidth) {
                         currentLineDecoded = testLineDecoded;
                         currentWidth = testWidth;
@@ -626,7 +752,9 @@ function splitTextIntoLines(doc, element, props) {
                             showMarkerForLine = false;
                         }
                         currentLineDecoded = decodedWord;
-                        currentWidth = doc.widthOfString(decodedWord, props);
+                        // Expand tabs for accurate width measurement
+                        const decodedWordExpanded = expandTabsToTabStops(decodedWord, 8);
+                        currentWidth = doc.widthOfString(decodedWordExpanded, props);
                         currentTokens.push({
                             type: 'text',
                             content: rawWord,
@@ -878,8 +1006,37 @@ async function renderPDFX1aStroke(doc, element, textLines, yOffset, lineHeightPx
                 width: 0,
             });
             const segments = parseHTMLToSegments(line.text, element);
-            for (let segmentIndex = 0; segmentIndex < segments.length; segmentIndex++) {
-                const segment = segments[segmentIndex];
+            // Expand tabs in segments while tracking actual width across segments
+            // This maintains tab stop alignment based on actual font metrics, not character count
+            let currentLineWidth = 0;
+            const segmentsWithExpandedTabs = [];
+            for (const segment of segments) {
+                // Check if segment has tabs
+                const hasTabs = segment.text.includes('\t');
+                if (hasTabs) {
+                    // Load font for this segment to get accurate measurements
+                    await loadFontForSegment(doc, segment, element, fonts);
+                    doc.fontSize(element.fontSize);
+                    // Create text options for this segment
+                    const segmentTextOptions = {
+                        ...textOptions,
+                    };
+                    // Expand tabs based on actual width
+                    const expanded = expandTabsToTabStopsByWidth(segment.text, doc, segmentTextOptions, 8, currentLineWidth);
+                    currentLineWidth = expanded.width;
+                    segmentsWithExpandedTabs.push({ ...segment, text: expanded.text });
+                }
+                else {
+                    // No tabs, just measure the width and update position
+                    await loadFontForSegment(doc, segment, element, fonts);
+                    doc.fontSize(element.fontSize);
+                    const segmentWidth = doc.widthOfString(segment.text, textOptions);
+                    currentLineWidth += segmentWidth;
+                    segmentsWithExpandedTabs.push(segment);
+                }
+            }
+            for (let segmentIndex = 0; segmentIndex < segmentsWithExpandedTabs.length; segmentIndex++) {
+                const segment = segmentsWithExpandedTabs[segmentIndex];
                 const fontKey = await loadFontForSegment(doc, segment, element, fonts);
                 doc.font(fontKey);
                 doc.fontSize(element.fontSize);
@@ -888,7 +1045,7 @@ async function renderPDFX1aStroke(doc, element, textLines, yOffset, lineHeightPx
                     width: widthOption,
                     stroke: false,
                     fill: true,
-                    continued: segmentIndex !== segments.length - 1,
+                    continued: segmentIndex !== segmentsWithExpandedTabs.length - 1,
                     underline: segment.underline || textOptions.underline || false,
                     lineBreak: !!segment.underline,
                 });
@@ -981,10 +1138,42 @@ async function renderTextFill(doc, element, textLines, yOffset, lineHeightPx, te
         doc.text('', contentStartX, lineYOffset, { height: 0, width: 0 });
         // Parse line into styled segments
         const segments = parseHTMLToSegments(line.text, element);
+        // Expand tabs in segments while tracking actual width across segments
+        // This maintains tab stop alignment based on actual font metrics, not character count
+        // Note: Tabs should already be expanded by normalizeRichText, but we handle them here
+        // in case line.text still contains tabs (e.g., from HTML parsing that preserves tabs)
+        let currentLineWidth = 0;
+        const segmentsWithExpandedTabs = [];
+        for (const segment of segments) {
+            // Check if segment has tabs
+            const hasTabs = segment.text.includes('\t');
+            if (hasTabs) {
+                // Load font for this segment to get accurate measurements
+                await loadFontForSegment(doc, segment, element, fonts);
+                doc.fontSize(element.fontSize);
+                // Create text options for this segment
+                const segmentTextOptions = {
+                    ...textOptions,
+                };
+                // Expand tabs based on actual width
+                const expanded = expandTabsToTabStopsByWidth(segment.text, doc, segmentTextOptions, 8, currentLineWidth);
+                currentLineWidth = expanded.width;
+                segmentsWithExpandedTabs.push({ ...segment, text: expanded.text });
+            }
+            else {
+                // No tabs, just measure the width and update position
+                // Load font to measure correctly
+                await loadFontForSegment(doc, segment, element, fonts);
+                doc.fontSize(element.fontSize);
+                const segmentWidth = doc.widthOfString(segment.text, textOptions);
+                currentLineWidth += segmentWidth;
+                segmentsWithExpandedTabs.push(segment);
+            }
+        }
         // Render each segment with its own styling
-        for (let segmentIndex = 0; segmentIndex < segments.length; segmentIndex++) {
-            const segment = segments[segmentIndex];
-            const isLastSegment = segmentIndex === segments.length - 1;
+        for (let segmentIndex = 0; segmentIndex < segmentsWithExpandedTabs.length; segmentIndex++) {
+            const segment = segmentsWithExpandedTabs[segmentIndex];
+            const isLastSegment = segmentIndex === segmentsWithExpandedTabs.length - 1;
             // Load appropriate font for this segment
             await loadFontForSegment(doc, segment, element, fonts);
             doc.fontSize(element.fontSize);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@polotno/pdf-export",
-  "version": "0.1.30",
+  "version": "0.1.31",
   "description": "Convert Polotno JSON into vector PDF",
   "type": "module",
   "main": "./lib/index.js",