@polotno/pdf-export 0.1.30 → 0.1.31

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/lib/text.js +203 -14
  2. package/package.json +1 -1
package/lib/text.js CHANGED
@@ -3,12 +3,129 @@ import getUrls from 'get-urls';
3
3
  import fetch from 'node-fetch';
4
4
  import { stripHtml } from 'string-strip-html';
5
5
  import { decode as decodeEntities } from 'html-entities';
6
+ /**
7
+ * Expand tabs to spaces based on tab stops (every 8 characters by default, matching HTML behavior)
8
+ * This ensures that tabs align to tab stops, so deleting characters before tabs doesn't affect
9
+ * the position of text after tabs.
10
+ *
11
+ * TODO: KNOWN LIMITATION - This doesn't match Chrome/browser behavior correctly!
12
+ *
13
+ * CURRENT LOGIC (character-based):
14
+ * - Counts characters: "01\t" → "01 " (6 spaces to reach position 8)
15
+ * - Problem: In proportional fonts, "01" visually takes ~15px but we treat it as 2 chars
16
+ * - Result: Tabs misalign because visual width ≠ character count
17
+ *
18
+ * ACTUAL CHROME BEHAVIOR (visual/pixel-based):
19
+ * - Measures visual width: "01" = 15px, single space = 5px
20
+ * - Tab stop at: 8 spaces × 5px = 40px
21
+ * - "01\t" should advance from 15px → 40px (add 25px, or ~5 spaces)
22
+ * - "\t" should advance from 0px → 40px (add 40px, or 8 spaces)
23
+ * - Both end at same VISUAL position (40px), not same character position
24
+ *
25
+ * HOW TO FIX (future work):
26
+ * 1. Create `expandTabsWithVisualWidth(text, doc, textOptions)` that:
27
+ * - Measures actual text width character-by-character using doc.widthOfString()
28
+ * - Calculates tab stops as multiples of (spaceWidth × 8)
29
+ * - For each tab, determines visual advance needed to reach next tab stop
30
+ * 2. In rendering (renderTextFill, renderStandardStroke, renderPDFX1aStroke):
31
+ * - Split segments at tab characters
32
+ * - Replace each tab with N spaces
33
+ * - Use PDFKit's wordSpacing option to stretch/shrink those spaces to exact width
34
+ * - Example: Need 25px advance → use 5 spaces + wordSpacing adjustment
35
+ * 3. In line breaking (splitTextIntoLines):
36
+ * - Use visual width measurement for all width calculations
37
+ * - Ensure wrapped lines maintain accurate widths
38
+ *
39
+ * CHALLENGES:
40
+ * - Must measure with correct font for each styled segment (bold/italic affects width)
41
+ * - wordSpacing interacts with justify alignment - need careful handling
42
+ * - Line breaking must use same width calculations as rendering
43
+ * - Performance: width measurement is expensive, may need caching
44
+ *
45
+ * For now, we use character-based expansion which approximately matches monospace fonts
46
+ * but misaligns in proportional fonts like Roboto/Arial. This is a known issue.
47
+ *
48
+ * @param text - Text containing tabs to expand
49
+ * @param tabSize - Size of tab stops (default 8, matching HTML)
50
+ * @param startPosition - Starting character position for tab stop calculation (default 0)
51
+ * @returns Text with tabs expanded to spaces (character-based approximation)
52
+ */
53
+ function expandTabsToTabStops(text, tabSize = 8, startPosition = 0) {
54
+ if (!text) {
55
+ return text;
56
+ }
57
+ let result = '';
58
+ let position = startPosition; // Current character position
59
+ for (let i = 0; i < text.length; i++) {
60
+ const char = text[i];
61
+ if (char === '\t') {
62
+ // Calculate how many spaces needed to reach next tab stop
63
+ const spacesNeeded = tabSize - (position % tabSize);
64
+ result += ' '.repeat(spacesNeeded);
65
+ position += spacesNeeded;
66
+ }
67
+ else if (char === '\n') {
68
+ // Reset position on newline (tab stops reset at line start)
69
+ result += char;
70
+ position = 0;
71
+ }
72
+ else {
73
+ result += char;
74
+ position++;
75
+ }
76
+ }
77
+ return result;
78
+ }
79
+ /**
80
+ * Expand tabs to spaces based on actual text width measurements (for PDF rendering)
81
+ * This ensures tabs align to visual tab stops based on actual font metrics, not character count.
82
+ * @param text - Text containing tabs to expand
83
+ * @param doc - PDFKit document for measuring text width
84
+ * @param textOptions - PDFKit text options (font, size, etc.)
85
+ * @param tabSizeInSpaces - Number of spaces per tab stop (default 8)
86
+ * @param currentWidth - Current text width in points (default 0)
87
+ * @returns Object with expanded text and final width
88
+ */
89
+ function expandTabsToTabStopsByWidth(text, doc, textOptions, tabSizeInSpaces = 8, currentWidth = 0) {
90
+ if (!text) {
91
+ return { text, width: currentWidth };
92
+ }
93
+ // Measure the width of one space character
94
+ const spaceWidth = doc.widthOfString(' ', textOptions);
95
+ const tabStopWidth = spaceWidth * tabSizeInSpaces;
96
+ let result = '';
97
+ let width = currentWidth;
98
+ for (let i = 0; i < text.length; i++) {
99
+ const char = text[i];
100
+ if (char === '\t') {
101
+ // Calculate how many spaces needed to reach next tab stop based on actual width
102
+ const currentTabPosition = width % tabStopWidth;
103
+ const spacesNeeded = Math.ceil((tabStopWidth - currentTabPosition) / spaceWidth);
104
+ const spaces = ' '.repeat(spacesNeeded);
105
+ result += spaces;
106
+ width += doc.widthOfString(spaces, textOptions);
107
+ }
108
+ else if (char === '\n') {
109
+ // Reset width on newline (tab stops reset at line start)
110
+ result += char;
111
+ width = 0;
112
+ }
113
+ else {
114
+ result += char;
115
+ // Measure the actual width of this character
116
+ const charWidth = doc.widthOfString(char, textOptions);
117
+ width += charWidth;
118
+ }
119
+ }
120
+ return { text: result, width };
121
+ }
6
122
  function decodeHtmlEntities(text) {
7
123
  if (!text) {
8
124
  return text;
9
125
  }
10
126
  const decoded = decodeEntities(text);
11
- return decoded.replace(/\t/g, ' ');
127
+ // Don't replace tabs here - we'll handle them with expandTabsToTabStops
128
+ return decoded;
12
129
  }
13
130
  /**
14
131
  * Check if text contains HTML tags
@@ -26,8 +143,6 @@ function normalizeRichText(text) {
26
143
  return text;
27
144
  }
28
145
  let normalized = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
29
- // Normalize tab characters into 8 spaces
30
- normalized = normalized.replace(/\t/g, ' '.repeat(8));
31
146
  // Convert explicit HTML break tags into newline characters
32
147
  normalized = normalized.replace(/<br\s*\/?>/gi, '\n');
33
148
  // Treat paragraph boundaries as newlines and drop opening tags
@@ -37,6 +152,11 @@ function normalizeRichText(text) {
37
152
  normalized = normalized.replace(/\n{3,}/g, '\n\n');
38
153
  // Trim stray leading/trailing newlines introduced by paragraph conversion
39
154
  normalized = normalized.replace(/^\n+/, '').replace(/\n+$/, '');
155
+ // Expand tabs to tab stops AFTER processing HTML structure
156
+ // This preserves HTML-like tab behavior where tabs align to fixed positions
157
+ // so deleting characters before tabs doesn't affect the position of text after tabs
158
+ // Tabs are expanded in the text content only, not in HTML tags
159
+ normalized = expandTabsToTabStops(normalized, 8);
40
160
  // Decode common HTML non-breaking space entities into their unicode counterpart
41
161
  normalized = normalized.replace(/&(nbsp|#160|#xA0);/gi, '\u00A0');
42
162
  // Strip zero-width characters that can create missing-glyph boxes in PDF output
@@ -551,10 +671,11 @@ function splitTextIntoLines(doc, element, props) {
551
671
  // Tokenize the paragraph
552
672
  const tokens = tokenizeHTML(paragraph.html);
553
673
  // Extract plain text for width calculation
554
- const plainText = tokens
674
+ // Expand tabs to tab stops for accurate width measurement
675
+ const plainText = expandTabsToTabStops(tokens
555
676
  .filter((t) => t.type === 'text')
556
677
  .map((t) => t.decodedContent ?? decodeHtmlEntities(t.content))
557
- .join('');
678
+ .join(''), 8);
558
679
  const baseMeta = paragraph.listMeta
559
680
  ? createListLineMeta(doc, element, props, paragraph.listMeta)
560
681
  : undefined;
@@ -587,8 +708,10 @@ function splitTextIntoLines(doc, element, props) {
587
708
  continue;
588
709
  }
589
710
  // Text token - split by words
711
+ // Don't expand tabs here - we need to preserve tabs for proper alignment
590
712
  const rawWords = token.content.split(' ');
591
- const decodedWords = (token.decodedContent ?? decodeHtmlEntities(token.content)).split(' ');
713
+ const decodedText = token.decodedContent ?? decodeHtmlEntities(token.content);
714
+ const decodedWords = decodedText.split(' ');
592
715
  for (let i = 0; i < rawWords.length; i++) {
593
716
  const rawWord = rawWords[i];
594
717
  const decodedWord = decodedWords[i] ?? decodeHtmlEntities(rawWord);
@@ -597,7 +720,10 @@ function splitTextIntoLines(doc, element, props) {
597
720
  const testLineDecoded = hasCurrentLine
598
721
  ? `${currentLineDecoded}${separator}${decodedWord}`
599
722
  : decodedWord;
600
- const testWidth = doc.widthOfString(testLineDecoded, props);
723
+ // Expand tabs in test line for accurate width measurement
724
+ // Tabs are expanded based on the full line position, maintaining tab stop alignment
725
+ const testLineExpanded = expandTabsToTabStops(testLineDecoded, 8);
726
+ const testWidth = doc.widthOfString(testLineExpanded, props);
601
727
  if (testWidth <= availableWidth) {
602
728
  currentLineDecoded = testLineDecoded;
603
729
  currentWidth = testWidth;
@@ -626,7 +752,9 @@ function splitTextIntoLines(doc, element, props) {
626
752
  showMarkerForLine = false;
627
753
  }
628
754
  currentLineDecoded = decodedWord;
629
- currentWidth = doc.widthOfString(decodedWord, props);
755
+ // Expand tabs for accurate width measurement
756
+ const decodedWordExpanded = expandTabsToTabStops(decodedWord, 8);
757
+ currentWidth = doc.widthOfString(decodedWordExpanded, props);
630
758
  currentTokens.push({
631
759
  type: 'text',
632
760
  content: rawWord,
@@ -878,8 +1006,37 @@ async function renderPDFX1aStroke(doc, element, textLines, yOffset, lineHeightPx
878
1006
  width: 0,
879
1007
  });
880
1008
  const segments = parseHTMLToSegments(line.text, element);
881
- for (let segmentIndex = 0; segmentIndex < segments.length; segmentIndex++) {
882
- const segment = segments[segmentIndex];
1009
+ // Expand tabs in segments while tracking actual width across segments
1010
+ // This maintains tab stop alignment based on actual font metrics, not character count
1011
+ let currentLineWidth = 0;
1012
+ const segmentsWithExpandedTabs = [];
1013
+ for (const segment of segments) {
1014
+ // Check if segment has tabs
1015
+ const hasTabs = segment.text.includes('\t');
1016
+ if (hasTabs) {
1017
+ // Load font for this segment to get accurate measurements
1018
+ await loadFontForSegment(doc, segment, element, fonts);
1019
+ doc.fontSize(element.fontSize);
1020
+ // Create text options for this segment
1021
+ const segmentTextOptions = {
1022
+ ...textOptions,
1023
+ };
1024
+ // Expand tabs based on actual width
1025
+ const expanded = expandTabsToTabStopsByWidth(segment.text, doc, segmentTextOptions, 8, currentLineWidth);
1026
+ currentLineWidth = expanded.width;
1027
+ segmentsWithExpandedTabs.push({ ...segment, text: expanded.text });
1028
+ }
1029
+ else {
1030
+ // No tabs, just measure the width and update position
1031
+ await loadFontForSegment(doc, segment, element, fonts);
1032
+ doc.fontSize(element.fontSize);
1033
+ const segmentWidth = doc.widthOfString(segment.text, textOptions);
1034
+ currentLineWidth += segmentWidth;
1035
+ segmentsWithExpandedTabs.push(segment);
1036
+ }
1037
+ }
1038
+ for (let segmentIndex = 0; segmentIndex < segmentsWithExpandedTabs.length; segmentIndex++) {
1039
+ const segment = segmentsWithExpandedTabs[segmentIndex];
883
1040
  const fontKey = await loadFontForSegment(doc, segment, element, fonts);
884
1041
  doc.font(fontKey);
885
1042
  doc.fontSize(element.fontSize);
@@ -888,7 +1045,7 @@ async function renderPDFX1aStroke(doc, element, textLines, yOffset, lineHeightPx
888
1045
  width: widthOption,
889
1046
  stroke: false,
890
1047
  fill: true,
891
- continued: segmentIndex !== segments.length - 1,
1048
+ continued: segmentIndex !== segmentsWithExpandedTabs.length - 1,
892
1049
  underline: segment.underline || textOptions.underline || false,
893
1050
  lineBreak: !!segment.underline,
894
1051
  });
@@ -981,10 +1138,42 @@ async function renderTextFill(doc, element, textLines, yOffset, lineHeightPx, te
981
1138
  doc.text('', contentStartX, lineYOffset, { height: 0, width: 0 });
982
1139
  // Parse line into styled segments
983
1140
  const segments = parseHTMLToSegments(line.text, element);
1141
+ // Expand tabs in segments while tracking actual width across segments
1142
+ // This maintains tab stop alignment based on actual font metrics, not character count
1143
+ // Note: Tabs should already be expanded by normalizeRichText, but we handle them here
1144
+ // in case line.text still contains tabs (e.g., from HTML parsing that preserves tabs)
1145
+ let currentLineWidth = 0;
1146
+ const segmentsWithExpandedTabs = [];
1147
+ for (const segment of segments) {
1148
+ // Check if segment has tabs
1149
+ const hasTabs = segment.text.includes('\t');
1150
+ if (hasTabs) {
1151
+ // Load font for this segment to get accurate measurements
1152
+ await loadFontForSegment(doc, segment, element, fonts);
1153
+ doc.fontSize(element.fontSize);
1154
+ // Create text options for this segment
1155
+ const segmentTextOptions = {
1156
+ ...textOptions,
1157
+ };
1158
+ // Expand tabs based on actual width
1159
+ const expanded = expandTabsToTabStopsByWidth(segment.text, doc, segmentTextOptions, 8, currentLineWidth);
1160
+ currentLineWidth = expanded.width;
1161
+ segmentsWithExpandedTabs.push({ ...segment, text: expanded.text });
1162
+ }
1163
+ else {
1164
+ // No tabs, just measure the width and update position
1165
+ // Load font to measure correctly
1166
+ await loadFontForSegment(doc, segment, element, fonts);
1167
+ doc.fontSize(element.fontSize);
1168
+ const segmentWidth = doc.widthOfString(segment.text, textOptions);
1169
+ currentLineWidth += segmentWidth;
1170
+ segmentsWithExpandedTabs.push(segment);
1171
+ }
1172
+ }
984
1173
  // Render each segment with its own styling
985
- for (let segmentIndex = 0; segmentIndex < segments.length; segmentIndex++) {
986
- const segment = segments[segmentIndex];
987
- const isLastSegment = segmentIndex === segments.length - 1;
1174
+ for (let segmentIndex = 0; segmentIndex < segmentsWithExpandedTabs.length; segmentIndex++) {
1175
+ const segment = segmentsWithExpandedTabs[segmentIndex];
1176
+ const isLastSegment = segmentIndex === segmentsWithExpandedTabs.length - 1;
988
1177
  // Load appropriate font for this segment
989
1178
  await loadFontForSegment(doc, segment, element, fonts);
990
1179
  doc.fontSize(element.fontSize);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@polotno/pdf-export",
3
- "version": "0.1.30",
3
+ "version": "0.1.31",
4
4
  "description": "Convert Polotno JSON into vector PDF",
5
5
  "type": "module",
6
6
  "main": "./lib/index.js",