@polotno/pdf-export 0.1.27 → 0.1.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/text.d.ts CHANGED
@@ -36,10 +36,14 @@ export interface TextSegment {
36
36
  * while preserving inline formatting tags.
37
37
  */
38
38
  declare function normalizeRichText(text: string): string;
39
+ /**
40
+ * Parse HTML text into styled segments
41
+ */
42
+ declare function parseHTMLToSegments(html: string, baseElement: TextElement): TextSegment[];
39
43
  export declare function getGoogleFontPath(fontFamily: string, fontWeight?: string, italic?: boolean): Promise<string>;
40
44
  export declare function loadFontIfNeeded(doc: any, element: TextElement, fonts: Record<string, boolean>): Promise<string>;
41
45
  /**
42
46
  * Main text rendering function
43
47
  */
44
48
  export declare function renderText(doc: PDFKit.PDFDocument, element: TextElement, fonts: Record<string, boolean>, attrs?: RenderAttrs): Promise<void>;
45
- export { normalizeRichText as __normalizeRichTextForTests };
49
+ export { normalizeRichText as __normalizeRichTextForTests, parseHTMLToSegments as __parseHTMLToSegmentsForTests, };
package/lib/text.js CHANGED
@@ -2,6 +2,14 @@ import { parseColor, srcToBuffer } from './utils.js';
2
2
  import getUrls from 'get-urls';
3
3
  import fetch from 'node-fetch';
4
4
  import { stripHtml } from 'string-strip-html';
5
+ import { decode as decodeEntities } from 'html-entities';
6
+ function decodeHtmlEntities(text) {
7
+ if (!text) {
8
+ return text;
9
+ }
10
+ const decoded = decodeEntities(text);
11
+ return decoded.replace(/\t/g, ' ');
12
+ }
5
13
  /**
6
14
  * Check if text contains HTML tags
7
15
  */
@@ -18,6 +26,8 @@ function normalizeRichText(text) {
18
26
  return text;
19
27
  }
20
28
  let normalized = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
29
+ // Normalize tab characters into 8 spaces
30
+ normalized = normalized.replace(/\t/g, ' '.repeat(8));
21
31
  // Convert explicit HTML break tags into newline characters
22
32
  normalized = normalized.replace(/<br\s*\/?>/gi, '\n');
23
33
  // Treat paragraph boundaries as newlines and drop opening tags
@@ -29,6 +39,8 @@ function normalizeRichText(text) {
29
39
  normalized = normalized.replace(/^\n+/, '').replace(/\n+$/, '');
30
40
  // Decode common HTML non-breaking space entities into their unicode counterpart
31
41
  normalized = normalized.replace(/&(nbsp|#160|#xA0);/gi, '\u00A0');
42
+ // Strip zero-width characters that can create missing-glyph boxes in PDF output
43
+ normalized = normalized.replace(/[\u200B\u200C\u200D\uFEFF\u2060]/g, '');
32
44
  return normalized;
33
45
  }
34
46
  /**
@@ -43,7 +55,7 @@ function parseHTMLToSegments(html, baseElement) {
43
55
  while ((match = regex.exec(html)) !== null) {
44
56
  if (match[4]) {
45
57
  // Text content
46
- const text = match[4];
58
+ const text = decodeHtmlEntities(match[4]);
47
59
  // Calculate current styles from tag stack
48
60
  let bold = false;
49
61
  let italic = false;
@@ -177,9 +189,11 @@ function tokenizeHTML(html) {
177
189
  while ((match = regex.exec(html)) !== null) {
178
190
  if (match[4]) {
179
191
  // Text content
192
+ const decodedContent = decodeHtmlEntities(match[4]);
180
193
  tokens.push({
181
194
  type: 'text',
182
195
  content: match[4],
196
+ decodedContent,
183
197
  });
184
198
  }
185
199
  else {
@@ -539,7 +553,7 @@ function splitTextIntoLines(doc, element, props) {
539
553
  // Extract plain text for width calculation
540
554
  const plainText = tokens
541
555
  .filter((t) => t.type === 'text')
542
- .map((t) => t.content)
556
+ .map((t) => t.decodedContent ?? decodeHtmlEntities(t.content))
543
557
  .join('');
544
558
  const baseMeta = paragraph.listMeta
545
559
  ? createListLineMeta(doc, element, props, paragraph.listMeta)
@@ -563,7 +577,7 @@ function splitTextIntoLines(doc, element, props) {
563
577
  }
564
578
  else {
565
579
  // Need to split paragraph into multiple lines
566
- let currentLine = '';
580
+ let currentLineDecoded = '';
567
581
  let currentWidth = 0;
568
582
  let currentTokens = [];
569
583
  let openTags = [];
@@ -573,34 +587,32 @@ function splitTextIntoLines(doc, element, props) {
573
587
  continue;
574
588
  }
575
589
  // Text token - split by words
576
- const textWords = token.content.split(' ');
577
- for (let i = 0; i < textWords.length; i++) {
578
- const word = textWords[i];
579
- const testLine = currentLine
580
- ? `${currentLine}${i > 0 ? ' ' : ''}${word}`
581
- : word;
582
- const testWidth = doc.widthOfString(testLine, props);
590
+ const rawWords = token.content.split(' ');
591
+ const decodedWords = (token.decodedContent ?? decodeHtmlEntities(token.content)).split(' ');
592
+ for (let i = 0; i < rawWords.length; i++) {
593
+ const rawWord = rawWords[i];
594
+ const decodedWord = decodedWords[i] ?? decodeHtmlEntities(rawWord);
595
+ const separator = i > 0 ? ' ' : '';
596
+ const hasCurrentLine = currentLineDecoded.length > 0;
597
+ const testLineDecoded = hasCurrentLine
598
+ ? `${currentLineDecoded}${separator}${decodedWord}`
599
+ : decodedWord;
600
+ const testWidth = doc.widthOfString(testLineDecoded, props);
583
601
  if (testWidth <= availableWidth) {
584
- currentLine = testLine;
602
+ currentLineDecoded = testLineDecoded;
585
603
  currentWidth = testWidth;
586
604
  // Add text token (with space if not first word in token)
587
- if (i > 0 || currentTokens.length > 0) {
588
- let content = (i > 0 ? ' ' : '') + word;
589
- currentTokens.push({
590
- type: 'text',
591
- content: content,
592
- });
593
- }
594
- else {
595
- currentTokens.push({
596
- type: 'text',
597
- content: word,
598
- });
599
- }
605
+ const rawContent = separator.length > 0 ? `${separator}${rawWord}` : rawWord;
606
+ const decodedContent = separator.length > 0 ? `${separator}${decodedWord}` : decodedWord;
607
+ currentTokens.push({
608
+ type: 'text',
609
+ content: rawContent,
610
+ decodedContent,
611
+ });
600
612
  }
601
613
  else {
602
614
  // Line is too long, save current line and start new one
603
- if (currentLine) {
615
+ if (currentLineDecoded.length > 0) {
604
616
  const result = tokensToHTML(currentTokens, openTags);
605
617
  const listMeta = cloneListMetaForLine(baseMeta, showMarkerForLine);
606
618
  lines.push({
@@ -613,17 +625,18 @@ function splitTextIntoLines(doc, element, props) {
613
625
  currentTokens = [];
614
626
  showMarkerForLine = false;
615
627
  }
616
- currentLine = word;
617
- currentWidth = doc.widthOfString(word, props);
628
+ currentLineDecoded = decodedWord;
629
+ currentWidth = doc.widthOfString(decodedWord, props);
618
630
  currentTokens.push({
619
631
  type: 'text',
620
- content: word,
632
+ content: rawWord,
633
+ decodedContent: decodedWord,
621
634
  });
622
635
  }
623
636
  }
624
637
  }
625
638
  // Add the last line
626
- if (currentLine) {
639
+ if (currentLineDecoded.length > 0) {
627
640
  const result = tokensToHTML(currentTokens, openTags);
628
641
  const listMeta = cloneListMetaForLine(baseMeta, showMarkerForLine);
629
642
  lines.push({
@@ -1034,4 +1047,4 @@ export async function renderText(doc, element, fonts, attrs = {}) {
1034
1047
  }
1035
1048
  }
1036
1049
  // Internal exports for testing
1037
- export { normalizeRichText as __normalizeRichTextForTests };
1050
+ export { normalizeRichText as __normalizeRichTextForTests, parseHTMLToSegments as __parseHTMLToSegmentsForTests, };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polotno/pdf-export",
3
- "version": "0.1.27",
3
+ "version": "0.1.29",
4
4
  "description": "Convert Polotno JSON into vector PDF",
5
5
  "type": "module",
6
6
  "main": "./lib/index.js",