@polotno/pdf-export 0.1.30 → 0.1.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/lib/text.js +203 -14
- package/package.json +1 -1
package/lib/text.js
CHANGED
|
@@ -3,12 +3,129 @@ import getUrls from 'get-urls';
|
|
|
3
3
|
import fetch from 'node-fetch';
|
|
4
4
|
import { stripHtml } from 'string-strip-html';
|
|
5
5
|
import { decode as decodeEntities } from 'html-entities';
|
|
6
|
+
/**
|
|
7
|
+
* Expand tabs to spaces based on tab stops (every 8 characters by default, matching HTML behavior)
|
|
8
|
+
* This ensures that tabs align to tab stops, so deleting characters before tabs doesn't affect
|
|
9
|
+
* the position of text after tabs.
|
|
10
|
+
*
|
|
11
|
+
* TODO: KNOWN LIMITATION - This doesn't match Chrome/browser behavior correctly!
|
|
12
|
+
*
|
|
13
|
+
* CURRENT LOGIC (character-based):
|
|
14
|
+
* - Counts characters: "01\t" → "01 " (6 spaces to reach position 8)
|
|
15
|
+
* - Problem: In proportional fonts, "01" visually takes ~15px but we treat it as 2 chars
|
|
16
|
+
* - Result: Tabs misalign because visual width ≠ character count
|
|
17
|
+
*
|
|
18
|
+
* ACTUAL CHROME BEHAVIOR (visual/pixel-based):
|
|
19
|
+
* - Measures visual width: "01" = 15px, single space = 5px
|
|
20
|
+
* - Tab stop at: 8 spaces × 5px = 40px
|
|
21
|
+
* - "01\t" should advance from 15px → 40px (add 25px, or ~5 spaces)
|
|
22
|
+
* - "\t" should advance from 0px → 40px (add 40px, or 8 spaces)
|
|
23
|
+
* - Both end at same VISUAL position (40px), not same character position
|
|
24
|
+
*
|
|
25
|
+
* HOW TO FIX (future work):
|
|
26
|
+
* 1. Create `expandTabsWithVisualWidth(text, doc, textOptions)` that:
|
|
27
|
+
* - Measures actual text width character-by-character using doc.widthOfString()
|
|
28
|
+
* - Calculates tab stops as multiples of (spaceWidth × 8)
|
|
29
|
+
* - For each tab, determines visual advance needed to reach next tab stop
|
|
30
|
+
* 2. In rendering (renderTextFill, renderStandardStroke, renderPDFX1aStroke):
|
|
31
|
+
* - Split segments at tab characters
|
|
32
|
+
* - Replace each tab with N spaces
|
|
33
|
+
* - Use PDFKit's wordSpacing option to stretch/shrink those spaces to exact width
|
|
34
|
+
* - Example: Need 25px advance → use 5 spaces + wordSpacing adjustment
|
|
35
|
+
* 3. In line breaking (splitTextIntoLines):
|
|
36
|
+
* - Use visual width measurement for all width calculations
|
|
37
|
+
* - Ensure wrapped lines maintain accurate widths
|
|
38
|
+
*
|
|
39
|
+
* CHALLENGES:
|
|
40
|
+
* - Must measure with correct font for each styled segment (bold/italic affects width)
|
|
41
|
+
* - wordSpacing interacts with justify alignment - need careful handling
|
|
42
|
+
* - Line breaking must use same width calculations as rendering
|
|
43
|
+
* - Performance: width measurement is expensive, may need caching
|
|
44
|
+
*
|
|
45
|
+
* For now, we use character-based expansion which approximately matches monospace fonts
|
|
46
|
+
* but misaligns in proportional fonts like Roboto/Arial. This is a known issue.
|
|
47
|
+
*
|
|
48
|
+
* @param text - Text containing tabs to expand
|
|
49
|
+
* @param tabSize - Size of tab stops (default 8, matching HTML)
|
|
50
|
+
* @param startPosition - Starting character position for tab stop calculation (default 0)
|
|
51
|
+
* @returns Text with tabs expanded to spaces (character-based approximation)
|
|
52
|
+
*/
|
|
53
|
+
function expandTabsToTabStops(text, tabSize = 8, startPosition = 0) {
|
|
54
|
+
if (!text) {
|
|
55
|
+
return text;
|
|
56
|
+
}
|
|
57
|
+
let result = '';
|
|
58
|
+
let position = startPosition; // Current character position
|
|
59
|
+
for (let i = 0; i < text.length; i++) {
|
|
60
|
+
const char = text[i];
|
|
61
|
+
if (char === '\t') {
|
|
62
|
+
// Calculate how many spaces needed to reach next tab stop
|
|
63
|
+
const spacesNeeded = tabSize - (position % tabSize);
|
|
64
|
+
result += ' '.repeat(spacesNeeded);
|
|
65
|
+
position += spacesNeeded;
|
|
66
|
+
}
|
|
67
|
+
else if (char === '\n') {
|
|
68
|
+
// Reset position on newline (tab stops reset at line start)
|
|
69
|
+
result += char;
|
|
70
|
+
position = 0;
|
|
71
|
+
}
|
|
72
|
+
else {
|
|
73
|
+
result += char;
|
|
74
|
+
position++;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return result;
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Expand tabs to spaces based on actual text width measurements (for PDF rendering)
|
|
81
|
+
* This ensures tabs align to visual tab stops based on actual font metrics, not character count.
|
|
82
|
+
* @param text - Text containing tabs to expand
|
|
83
|
+
* @param doc - PDFKit document for measuring text width
|
|
84
|
+
* @param textOptions - PDFKit text options (font, size, etc.)
|
|
85
|
+
* @param tabSizeInSpaces - Number of spaces per tab stop (default 8)
|
|
86
|
+
* @param currentWidth - Current text width in points (default 0)
|
|
87
|
+
* @returns Object with expanded text and final width
|
|
88
|
+
*/
|
|
89
|
+
function expandTabsToTabStopsByWidth(text, doc, textOptions, tabSizeInSpaces = 8, currentWidth = 0) {
|
|
90
|
+
if (!text) {
|
|
91
|
+
return { text, width: currentWidth };
|
|
92
|
+
}
|
|
93
|
+
// Measure the width of one space character
|
|
94
|
+
const spaceWidth = doc.widthOfString(' ', textOptions);
|
|
95
|
+
const tabStopWidth = spaceWidth * tabSizeInSpaces;
|
|
96
|
+
let result = '';
|
|
97
|
+
let width = currentWidth;
|
|
98
|
+
for (let i = 0; i < text.length; i++) {
|
|
99
|
+
const char = text[i];
|
|
100
|
+
if (char === '\t') {
|
|
101
|
+
// Calculate how many spaces needed to reach next tab stop based on actual width
|
|
102
|
+
const currentTabPosition = width % tabStopWidth;
|
|
103
|
+
const spacesNeeded = Math.ceil((tabStopWidth - currentTabPosition) / spaceWidth);
|
|
104
|
+
const spaces = ' '.repeat(spacesNeeded);
|
|
105
|
+
result += spaces;
|
|
106
|
+
width += doc.widthOfString(spaces, textOptions);
|
|
107
|
+
}
|
|
108
|
+
else if (char === '\n') {
|
|
109
|
+
// Reset width on newline (tab stops reset at line start)
|
|
110
|
+
result += char;
|
|
111
|
+
width = 0;
|
|
112
|
+
}
|
|
113
|
+
else {
|
|
114
|
+
result += char;
|
|
115
|
+
// Measure the actual width of this character
|
|
116
|
+
const charWidth = doc.widthOfString(char, textOptions);
|
|
117
|
+
width += charWidth;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
return { text: result, width };
|
|
121
|
+
}
|
|
6
122
|
function decodeHtmlEntities(text) {
|
|
7
123
|
if (!text) {
|
|
8
124
|
return text;
|
|
9
125
|
}
|
|
10
126
|
const decoded = decodeEntities(text);
|
|
11
|
-
|
|
127
|
+
// Don't replace tabs here - we'll handle them with expandTabsToTabStops
|
|
128
|
+
return decoded;
|
|
12
129
|
}
|
|
13
130
|
/**
|
|
14
131
|
* Check if text contains HTML tags
|
|
@@ -26,8 +143,6 @@ function normalizeRichText(text) {
|
|
|
26
143
|
return text;
|
|
27
144
|
}
|
|
28
145
|
let normalized = text.replace(/\r\n/g, '\n').replace(/\r/g, '\n');
|
|
29
|
-
// Normalize tab characters into 8 spaces
|
|
30
|
-
normalized = normalized.replace(/\t/g, ' '.repeat(8));
|
|
31
146
|
// Convert explicit HTML break tags into newline characters
|
|
32
147
|
normalized = normalized.replace(/<br\s*\/?>/gi, '\n');
|
|
33
148
|
// Treat paragraph boundaries as newlines and drop opening tags
|
|
@@ -37,6 +152,11 @@ function normalizeRichText(text) {
|
|
|
37
152
|
normalized = normalized.replace(/\n{3,}/g, '\n\n');
|
|
38
153
|
// Trim stray leading/trailing newlines introduced by paragraph conversion
|
|
39
154
|
normalized = normalized.replace(/^\n+/, '').replace(/\n+$/, '');
|
|
155
|
+
// Expand tabs to tab stops AFTER processing HTML structure
|
|
156
|
+
// This preserves HTML-like tab behavior where tabs align to fixed positions
|
|
157
|
+
// so deleting characters before tabs doesn't affect the position of text after tabs
|
|
158
|
+
// Tabs are expanded in the text content only, not in HTML tags
|
|
159
|
+
normalized = expandTabsToTabStops(normalized, 8);
|
|
40
160
|
// Decode common HTML non-breaking space entities into their unicode counterpart
|
|
41
161
|
normalized = normalized.replace(/&(nbsp|#160|#xA0);/gi, '\u00A0');
|
|
42
162
|
// Strip zero-width characters that can create missing-glyph boxes in PDF output
|
|
@@ -551,10 +671,11 @@ function splitTextIntoLines(doc, element, props) {
|
|
|
551
671
|
// Tokenize the paragraph
|
|
552
672
|
const tokens = tokenizeHTML(paragraph.html);
|
|
553
673
|
// Extract plain text for width calculation
|
|
554
|
-
|
|
674
|
+
// Expand tabs to tab stops for accurate width measurement
|
|
675
|
+
const plainText = expandTabsToTabStops(tokens
|
|
555
676
|
.filter((t) => t.type === 'text')
|
|
556
677
|
.map((t) => t.decodedContent ?? decodeHtmlEntities(t.content))
|
|
557
|
-
.join('');
|
|
678
|
+
.join(''), 8);
|
|
558
679
|
const baseMeta = paragraph.listMeta
|
|
559
680
|
? createListLineMeta(doc, element, props, paragraph.listMeta)
|
|
560
681
|
: undefined;
|
|
@@ -587,8 +708,10 @@ function splitTextIntoLines(doc, element, props) {
|
|
|
587
708
|
continue;
|
|
588
709
|
}
|
|
589
710
|
// Text token - split by words
|
|
711
|
+
// Don't expand tabs here - we need to preserve tabs for proper alignment
|
|
590
712
|
const rawWords = token.content.split(' ');
|
|
591
|
-
const
|
|
713
|
+
const decodedText = token.decodedContent ?? decodeHtmlEntities(token.content);
|
|
714
|
+
const decodedWords = decodedText.split(' ');
|
|
592
715
|
for (let i = 0; i < rawWords.length; i++) {
|
|
593
716
|
const rawWord = rawWords[i];
|
|
594
717
|
const decodedWord = decodedWords[i] ?? decodeHtmlEntities(rawWord);
|
|
@@ -597,7 +720,10 @@ function splitTextIntoLines(doc, element, props) {
|
|
|
597
720
|
const testLineDecoded = hasCurrentLine
|
|
598
721
|
? `${currentLineDecoded}${separator}${decodedWord}`
|
|
599
722
|
: decodedWord;
|
|
600
|
-
|
|
723
|
+
// Expand tabs in test line for accurate width measurement
|
|
724
|
+
// Tabs are expanded based on the full line position, maintaining tab stop alignment
|
|
725
|
+
const testLineExpanded = expandTabsToTabStops(testLineDecoded, 8);
|
|
726
|
+
const testWidth = doc.widthOfString(testLineExpanded, props);
|
|
601
727
|
if (testWidth <= availableWidth) {
|
|
602
728
|
currentLineDecoded = testLineDecoded;
|
|
603
729
|
currentWidth = testWidth;
|
|
@@ -626,7 +752,9 @@ function splitTextIntoLines(doc, element, props) {
|
|
|
626
752
|
showMarkerForLine = false;
|
|
627
753
|
}
|
|
628
754
|
currentLineDecoded = decodedWord;
|
|
629
|
-
|
|
755
|
+
// Expand tabs for accurate width measurement
|
|
756
|
+
const decodedWordExpanded = expandTabsToTabStops(decodedWord, 8);
|
|
757
|
+
currentWidth = doc.widthOfString(decodedWordExpanded, props);
|
|
630
758
|
currentTokens.push({
|
|
631
759
|
type: 'text',
|
|
632
760
|
content: rawWord,
|
|
@@ -878,8 +1006,37 @@ async function renderPDFX1aStroke(doc, element, textLines, yOffset, lineHeightPx
|
|
|
878
1006
|
width: 0,
|
|
879
1007
|
});
|
|
880
1008
|
const segments = parseHTMLToSegments(line.text, element);
|
|
881
|
-
|
|
882
|
-
|
|
1009
|
+
// Expand tabs in segments while tracking actual width across segments
|
|
1010
|
+
// This maintains tab stop alignment based on actual font metrics, not character count
|
|
1011
|
+
let currentLineWidth = 0;
|
|
1012
|
+
const segmentsWithExpandedTabs = [];
|
|
1013
|
+
for (const segment of segments) {
|
|
1014
|
+
// Check if segment has tabs
|
|
1015
|
+
const hasTabs = segment.text.includes('\t');
|
|
1016
|
+
if (hasTabs) {
|
|
1017
|
+
// Load font for this segment to get accurate measurements
|
|
1018
|
+
await loadFontForSegment(doc, segment, element, fonts);
|
|
1019
|
+
doc.fontSize(element.fontSize);
|
|
1020
|
+
// Create text options for this segment
|
|
1021
|
+
const segmentTextOptions = {
|
|
1022
|
+
...textOptions,
|
|
1023
|
+
};
|
|
1024
|
+
// Expand tabs based on actual width
|
|
1025
|
+
const expanded = expandTabsToTabStopsByWidth(segment.text, doc, segmentTextOptions, 8, currentLineWidth);
|
|
1026
|
+
currentLineWidth = expanded.width;
|
|
1027
|
+
segmentsWithExpandedTabs.push({ ...segment, text: expanded.text });
|
|
1028
|
+
}
|
|
1029
|
+
else {
|
|
1030
|
+
// No tabs, just measure the width and update position
|
|
1031
|
+
await loadFontForSegment(doc, segment, element, fonts);
|
|
1032
|
+
doc.fontSize(element.fontSize);
|
|
1033
|
+
const segmentWidth = doc.widthOfString(segment.text, textOptions);
|
|
1034
|
+
currentLineWidth += segmentWidth;
|
|
1035
|
+
segmentsWithExpandedTabs.push(segment);
|
|
1036
|
+
}
|
|
1037
|
+
}
|
|
1038
|
+
for (let segmentIndex = 0; segmentIndex < segmentsWithExpandedTabs.length; segmentIndex++) {
|
|
1039
|
+
const segment = segmentsWithExpandedTabs[segmentIndex];
|
|
883
1040
|
const fontKey = await loadFontForSegment(doc, segment, element, fonts);
|
|
884
1041
|
doc.font(fontKey);
|
|
885
1042
|
doc.fontSize(element.fontSize);
|
|
@@ -888,7 +1045,7 @@ async function renderPDFX1aStroke(doc, element, textLines, yOffset, lineHeightPx
|
|
|
888
1045
|
width: widthOption,
|
|
889
1046
|
stroke: false,
|
|
890
1047
|
fill: true,
|
|
891
|
-
continued: segmentIndex !==
|
|
1048
|
+
continued: segmentIndex !== segmentsWithExpandedTabs.length - 1,
|
|
892
1049
|
underline: segment.underline || textOptions.underline || false,
|
|
893
1050
|
lineBreak: !!segment.underline,
|
|
894
1051
|
});
|
|
@@ -981,10 +1138,42 @@ async function renderTextFill(doc, element, textLines, yOffset, lineHeightPx, te
|
|
|
981
1138
|
doc.text('', contentStartX, lineYOffset, { height: 0, width: 0 });
|
|
982
1139
|
// Parse line into styled segments
|
|
983
1140
|
const segments = parseHTMLToSegments(line.text, element);
|
|
1141
|
+
// Expand tabs in segments while tracking actual width across segments
|
|
1142
|
+
// This maintains tab stop alignment based on actual font metrics, not character count
|
|
1143
|
+
// Note: Tabs should already be expanded by normalizeRichText, but we handle them here
|
|
1144
|
+
// in case line.text still contains tabs (e.g., from HTML parsing that preserves tabs)
|
|
1145
|
+
let currentLineWidth = 0;
|
|
1146
|
+
const segmentsWithExpandedTabs = [];
|
|
1147
|
+
for (const segment of segments) {
|
|
1148
|
+
// Check if segment has tabs
|
|
1149
|
+
const hasTabs = segment.text.includes('\t');
|
|
1150
|
+
if (hasTabs) {
|
|
1151
|
+
// Load font for this segment to get accurate measurements
|
|
1152
|
+
await loadFontForSegment(doc, segment, element, fonts);
|
|
1153
|
+
doc.fontSize(element.fontSize);
|
|
1154
|
+
// Create text options for this segment
|
|
1155
|
+
const segmentTextOptions = {
|
|
1156
|
+
...textOptions,
|
|
1157
|
+
};
|
|
1158
|
+
// Expand tabs based on actual width
|
|
1159
|
+
const expanded = expandTabsToTabStopsByWidth(segment.text, doc, segmentTextOptions, 8, currentLineWidth);
|
|
1160
|
+
currentLineWidth = expanded.width;
|
|
1161
|
+
segmentsWithExpandedTabs.push({ ...segment, text: expanded.text });
|
|
1162
|
+
}
|
|
1163
|
+
else {
|
|
1164
|
+
// No tabs, just measure the width and update position
|
|
1165
|
+
// Load font to measure correctly
|
|
1166
|
+
await loadFontForSegment(doc, segment, element, fonts);
|
|
1167
|
+
doc.fontSize(element.fontSize);
|
|
1168
|
+
const segmentWidth = doc.widthOfString(segment.text, textOptions);
|
|
1169
|
+
currentLineWidth += segmentWidth;
|
|
1170
|
+
segmentsWithExpandedTabs.push(segment);
|
|
1171
|
+
}
|
|
1172
|
+
}
|
|
984
1173
|
// Render each segment with its own styling
|
|
985
|
-
for (let segmentIndex = 0; segmentIndex <
|
|
986
|
-
const segment =
|
|
987
|
-
const isLastSegment = segmentIndex ===
|
|
1174
|
+
for (let segmentIndex = 0; segmentIndex < segmentsWithExpandedTabs.length; segmentIndex++) {
|
|
1175
|
+
const segment = segmentsWithExpandedTabs[segmentIndex];
|
|
1176
|
+
const isLastSegment = segmentIndex === segmentsWithExpandedTabs.length - 1;
|
|
988
1177
|
// Load appropriate font for this segment
|
|
989
1178
|
await loadFontForSegment(doc, segment, element, fonts);
|
|
990
1179
|
doc.fontSize(element.fontSize);
|