npm - @polotno/pdf-export - Versions diffs - 0.1.27 → 0.1.29 - Mend

@polotno/pdf-export 0.1.27 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/lib/text.d.ts CHANGED Viewed

@@ -36,10 +36,14 @@ export interface TextSegment {
  * while preserving inline formatting tags.
  */
 declare function normalizeRichText(text: string): string;
+/**
+ * Parse HTML text into styled segments
+ */
+declare function parseHTMLToSegments(html: string, baseElement: TextElement): TextSegment[];
 export declare function getGoogleFontPath(fontFamily: string, fontWeight?: string, italic?: boolean): Promise<string>;
 export declare function loadFontIfNeeded(doc: any, element: TextElement, fonts: Record<string, boolean>): Promise<string>;
 /**
  * Main text rendering function
  */
 export declare function renderText(doc: PDFKit.PDFDocument, element: TextElement, fonts: Record<string, boolean>, attrs?: RenderAttrs): Promise<void>;
-export { normalizeRichText as __normalizeRichTextForTests };
+export { normalizeRichText as __normalizeRichTextForTests, parseHTMLToSegments as __parseHTMLToSegmentsForTests, };

package/lib/text.js CHANGED Viewed

@@ -2,6 +2,14 @@ import { parseColor, srcToBuffer } from './utils.js';
 import getUrls from 'get-urls';
 import fetch from 'node-fetch';
 import { stripHtml } from 'string-strip-html';
+import { decode as decodeEntities } from 'html-entities';
+function decodeHtmlEntities(text) {
+    if (!text) {
+        return text;
+    }
+    const decoded = decodeEntities(text);
+    return decoded.replace(/\t/g, ' ');
+}
 /**
  * Check if text contains HTML tags
  */
@@ -18,6 +26,8 @@ function normalizeRichText(text) {
         return text;
     }
     let normalized = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
+    // Normalize tab characters into 8 spaces
+    normalized = normalized.replace(/\t/g, ' '.repeat(8));
     // Convert explicit HTML break tags into newline characters
     normalized = normalized.replace(/<br\s*\/?>/gi, '\n');
     // Treat paragraph boundaries as newlines and drop opening tags
@@ -29,6 +39,8 @@ function normalizeRichText(text) {
     normalized = normalized.replace(/^\n+/, '').replace(/\n+$/, '');
     // Decode common HTML non-breaking space entities into their unicode counterpart
     normalized = normalized.replace(/&(nbsp|#160|#xA0);/gi, '\u00A0');
+    // Strip zero-width characters that can create missing-glyph boxes in PDF output
+    normalized = normalized.replace(/[\u200B\u200C\u200D\uFEFF\u2060]/g, '');
     return normalized;
 }
 /**
@@ -43,7 +55,7 @@ function parseHTMLToSegments(html, baseElement) {
     while ((match = regex.exec(html)) !== null) {
         if (match[4]) {
             // Text content
-            const text = match[4];
+            const text = decodeHtmlEntities(match[4]);
             // Calculate current styles from tag stack
             let bold = false;
             let italic = false;
@@ -177,9 +189,11 @@ function tokenizeHTML(html) {
     while ((match = regex.exec(html)) !== null) {
         if (match[4]) {
             // Text content
+            const decodedContent = decodeHtmlEntities(match[4]);
             tokens.push({
                 type: 'text',
                 content: match[4],
+                decodedContent,
             });
         }
         else {
@@ -539,7 +553,7 @@ function splitTextIntoLines(doc, element, props) {
         // Extract plain text for width calculation
         const plainText = tokens
             .filter((t) => t.type === 'text')
-            .map((t) => t.content)
+            .map((t) => t.decodedContent ?? decodeHtmlEntities(t.content))
             .join('');
         const baseMeta = paragraph.listMeta
             ? createListLineMeta(doc, element, props, paragraph.listMeta)
@@ -563,7 +577,7 @@ function splitTextIntoLines(doc, element, props) {
         }
         else {
             // Need to split paragraph into multiple lines
-            let currentLine = '';
+            let currentLineDecoded = '';
             let currentWidth = 0;
             let currentTokens = [];
             let openTags = [];
@@ -573,34 +587,32 @@ function splitTextIntoLines(doc, element, props) {
                     continue;
                 }
                 // Text token - split by words
-                const textWords = token.content.split(' ');
-                for (let i = 0; i < textWords.length; i++) {
-                    const word = textWords[i];
-                    const testLine = currentLine
-                        ? `${currentLine}${i > 0 ? ' ' : ''}${word}`
-                        : word;
-                    const testWidth = doc.widthOfString(testLine, props);
+                const rawWords = token.content.split(' ');
+                const decodedWords = (token.decodedContent ?? decodeHtmlEntities(token.content)).split(' ');
+                for (let i = 0; i < rawWords.length; i++) {
+                    const rawWord = rawWords[i];
+                    const decodedWord = decodedWords[i] ?? decodeHtmlEntities(rawWord);
+                    const separator = i > 0 ? ' ' : '';
+                    const hasCurrentLine = currentLineDecoded.length > 0;
+                    const testLineDecoded = hasCurrentLine
+                        ? `${currentLineDecoded}${separator}${decodedWord}`
+                        : decodedWord;
+                    const testWidth = doc.widthOfString(testLineDecoded, props);
                     if (testWidth <= availableWidth) {
-                        currentLine = testLine;
+                        currentLineDecoded = testLineDecoded;
                         currentWidth = testWidth;
                         // Add text token (with space if not first word in token)
-                        if (i > 0 || currentTokens.length > 0) {
-                            let content = (i > 0 ? ' ' : '') + word;
-                            currentTokens.push({
-                                type: 'text',
-                                content: content,
-                            });
-                        }
-                        else {
-                            currentTokens.push({
-                                type: 'text',
-                                content: word,
-                            });
-                        }
+                        const rawContent = separator.length > 0 ? `${separator}${rawWord}` : rawWord;
+                        const decodedContent = separator.length > 0 ? `${separator}${decodedWord}` : decodedWord;
+                        currentTokens.push({
+                            type: 'text',
+                            content: rawContent,
+                            decodedContent,
+                        });
                     }
                     else {
                         // Line is too long, save current line and start new one
-                        if (currentLine) {
+                        if (currentLineDecoded.length > 0) {
                             const result = tokensToHTML(currentTokens, openTags);
                             const listMeta = cloneListMetaForLine(baseMeta, showMarkerForLine);
                             lines.push({
@@ -613,17 +625,18 @@ function splitTextIntoLines(doc, element, props) {
                             currentTokens = [];
                             showMarkerForLine = false;
                         }
-                        currentLine = word;
-                        currentWidth = doc.widthOfString(word, props);
+                        currentLineDecoded = decodedWord;
+                        currentWidth = doc.widthOfString(decodedWord, props);
                         currentTokens.push({
                             type: 'text',
-                            content: word,
+                            content: rawWord,
+                            decodedContent: decodedWord,
                         });
                     }
                 }
             }
             // Add the last line
-            if (currentLine) {
+            if (currentLineDecoded.length > 0) {
                 const result = tokensToHTML(currentTokens, openTags);
                 const listMeta = cloneListMetaForLine(baseMeta, showMarkerForLine);
                 lines.push({
@@ -1034,4 +1047,4 @@ export async function renderText(doc, element, fonts, attrs = {}) {
     }
 }
 // Internal exports for testing
-export { normalizeRichText as __normalizeRichTextForTests };
+export { normalizeRichText as __normalizeRichTextForTests, parseHTMLToSegments as __parseHTMLToSegmentsForTests, };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@polotno/pdf-export",
-  "version": "0.1.27",
+  "version": "0.1.29",
   "description": "Convert Polotno JSON into vector PDF",
   "type": "module",
   "main": "./lib/index.js",