pretext-pdfjs 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/reflow.js +153 -3
package/package.json
CHANGED
package/src/reflow.js
CHANGED
|
@@ -25,7 +25,16 @@ function drawJustifiedLine(ctx, text, x, y, availWidth) {
|
|
|
25
25
|
}
|
|
26
26
|
let totalWordWidth = 0;
|
|
27
27
|
for (const w of words) totalWordWidth += ctx.measureText(w).width;
|
|
28
|
+
|
|
29
|
+
const normalSpaceWidth = ctx.measureText(" ").width;
|
|
28
30
|
const extraSpace = (availWidth - totalWordWidth) / (words.length - 1);
|
|
31
|
+
|
|
32
|
+
// Fall back to left-aligned if gaps would be too large
|
|
33
|
+
if (extraSpace > normalSpaceWidth * 3 || totalWordWidth < availWidth * 0.7) {
|
|
34
|
+
ctx.fillText(text, x, y);
|
|
35
|
+
return;
|
|
36
|
+
}
|
|
37
|
+
|
|
29
38
|
let xPos = x;
|
|
30
39
|
for (const w of words) {
|
|
31
40
|
ctx.fillText(w, xPos, y);
|
|
@@ -33,6 +42,70 @@ function drawJustifiedLine(ctx, text, x, y, availWidth) {
|
|
|
33
42
|
}
|
|
34
43
|
}
|
|
35
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Draw a line of text with per-span coloring (for inline colored text like links).
|
|
47
|
+
*/
|
|
48
|
+
function drawColoredLine(ctx, text, charOffset, spans, defaultColor, x, y) {
|
|
49
|
+
const lineStart = charOffset;
|
|
50
|
+
const lineEnd = charOffset + text.length;
|
|
51
|
+
let xPos = x;
|
|
52
|
+
let pos = 0;
|
|
53
|
+
|
|
54
|
+
for (const span of spans) {
|
|
55
|
+
if (span.charEnd <= lineStart || span.charStart >= lineEnd) continue;
|
|
56
|
+
const overlapStart = Math.max(span.charStart - lineStart, 0);
|
|
57
|
+
const overlapEnd = Math.min(span.charEnd - lineStart, text.length);
|
|
58
|
+
|
|
59
|
+
if (overlapStart > pos) {
|
|
60
|
+
const gapText = text.slice(pos, overlapStart);
|
|
61
|
+
ctx.fillStyle = defaultColor;
|
|
62
|
+
ctx.fillText(gapText, xPos, y);
|
|
63
|
+
xPos += ctx.measureText(gapText).width;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
const spanText = text.slice(overlapStart, overlapEnd);
|
|
67
|
+
ctx.fillStyle = span.color;
|
|
68
|
+
ctx.fillText(spanText, xPos, y);
|
|
69
|
+
xPos += ctx.measureText(spanText).width;
|
|
70
|
+
pos = overlapEnd;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
if (pos < text.length) {
|
|
74
|
+
ctx.fillStyle = defaultColor;
|
|
75
|
+
ctx.fillText(text.slice(pos), xPos, y);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/**
|
|
80
|
+
* Draw a line of justified text with per-span coloring.
|
|
81
|
+
*/
|
|
82
|
+
function drawColoredJustifiedLine(ctx, text, charOffset, spans, defaultColor, x, y, availWidth) {
|
|
83
|
+
const words = text.split(" ");
|
|
84
|
+
if (words.length <= 1) {
|
|
85
|
+
drawColoredLine(ctx, text, charOffset, spans, defaultColor, x, y);
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
let totalWordWidth = 0;
|
|
89
|
+
for (const w of words) totalWordWidth += ctx.measureText(w).width;
|
|
90
|
+
const normalSpaceWidth = ctx.measureText(" ").width;
|
|
91
|
+
const extraSpace = (availWidth - totalWordWidth) / (words.length - 1);
|
|
92
|
+
|
|
93
|
+
if (extraSpace > normalSpaceWidth * 3 || totalWordWidth < availWidth * 0.7) {
|
|
94
|
+
drawColoredLine(ctx, text, charOffset, spans, defaultColor, x, y);
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Draw word by word with per-span coloring and justified spacing
|
|
99
|
+
let xPos = x;
|
|
100
|
+
let charPos = 0;
|
|
101
|
+
for (let wi = 0; wi < words.length; wi++) {
|
|
102
|
+
const word = words[wi];
|
|
103
|
+
drawColoredLine(ctx, word, charOffset + charPos, spans, defaultColor, xPos, y);
|
|
104
|
+
xPos += ctx.measureText(word).width + extraSpace;
|
|
105
|
+
charPos += word.length + 1; // +1 for space
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
36
109
|
function bboxOverlap(a, b) {
|
|
37
110
|
const x1 = Math.max(a.x, b.x);
|
|
38
111
|
const y1 = Math.max(a.y, b.y);
|
|
@@ -301,6 +374,29 @@ function groupTextBlocks(textItems, pageHeight, styles, fontMap, textColors) {
|
|
|
301
374
|
}
|
|
302
375
|
}
|
|
303
376
|
block.color = dominantColor;
|
|
377
|
+
|
|
378
|
+
// Build color spans — contiguous runs of items sharing the same color
|
|
379
|
+
// Character indices map to the concatenated text produced by blockToText
|
|
380
|
+
block.colorSpans = [];
|
|
381
|
+
if (block.items.length > 0) {
|
|
382
|
+
let spanColor = block.items[0]._color || "#000000";
|
|
383
|
+
let spanCharStart = 0;
|
|
384
|
+
let charCount = 0;
|
|
385
|
+
|
|
386
|
+
for (let i = 0; i < block.items.length; i++) {
|
|
387
|
+
const c = block.items[i]._color || "#000000";
|
|
388
|
+
const itemLen = (block.items[i].str || "").length;
|
|
389
|
+
if (c !== spanColor) {
|
|
390
|
+
block.colorSpans.push({ charStart: spanCharStart, charEnd: charCount, color: spanColor });
|
|
391
|
+
spanCharStart = charCount;
|
|
392
|
+
spanColor = c;
|
|
393
|
+
}
|
|
394
|
+
charCount += itemLen;
|
|
395
|
+
// Account for spaces inserted between items by blockToText
|
|
396
|
+
if (i < block.items.length - 1) charCount++;
|
|
397
|
+
}
|
|
398
|
+
block.colorSpans.push({ charStart: spanCharStart, charEnd: charCount, color: spanColor });
|
|
399
|
+
}
|
|
304
400
|
}
|
|
305
401
|
|
|
306
402
|
return blocks;
|
|
@@ -649,6 +745,28 @@ function buildRegionMap(textBlocks, graphicRegions, pageHeight) {
|
|
|
649
745
|
});
|
|
650
746
|
}
|
|
651
747
|
|
|
748
|
+
// ── Compute inter-block vertical gaps from original PDF layout ──
|
|
749
|
+
for (let i = 1; i < regions.length; i++) {
|
|
750
|
+
const prev = regions[i - 1];
|
|
751
|
+
const curr = regions[i];
|
|
752
|
+
const prevBottom = prev.bbox.y + prev.bbox.h;
|
|
753
|
+
const currTop = curr.bbox.y;
|
|
754
|
+
curr.gapBefore = Math.max(0, currTop - prevBottom);
|
|
755
|
+
}
|
|
756
|
+
if (regions.length > 0) {
|
|
757
|
+
regions[0].gapBefore = regions[0].bbox.y;
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
// Normalize gaps relative to average body line height
|
|
761
|
+
const bodyBlocks = regions.filter(r =>
|
|
762
|
+
r.type === "text" && r.block?.fontScale && Math.abs(r.block.fontScale - 1) < 0.15);
|
|
763
|
+
const avgBodyLH = bodyBlocks.length > 0
|
|
764
|
+
? bodyBlocks.reduce((s, r) => s + r.block.avgFontSize * 1.6, 0) / bodyBlocks.length
|
|
765
|
+
: 12 * 1.6;
|
|
766
|
+
for (const region of regions) {
|
|
767
|
+
region.gapRatio = (region.gapBefore || 0) / avgBodyLH;
|
|
768
|
+
}
|
|
769
|
+
|
|
652
770
|
return regions;
|
|
653
771
|
}
|
|
654
772
|
|
|
@@ -809,6 +927,7 @@ async function analyzePage(page, OPS) {
|
|
|
809
927
|
graphicRegions,
|
|
810
928
|
offCanvas,
|
|
811
929
|
fontMap,
|
|
930
|
+
bodyFontSize,
|
|
812
931
|
};
|
|
813
932
|
}
|
|
814
933
|
|
|
@@ -877,6 +996,7 @@ function reflowAndComposite(analysis, opts) {
|
|
|
877
996
|
fontFamily: blockFamily,
|
|
878
997
|
align: block.align || "left",
|
|
879
998
|
color: block.color,
|
|
999
|
+
colorSpans: block.colorSpans || [],
|
|
880
1000
|
region,
|
|
881
1001
|
});
|
|
882
1002
|
} else {
|
|
@@ -908,12 +1028,13 @@ function reflowAndComposite(analysis, opts) {
|
|
|
908
1028
|
}
|
|
909
1029
|
}
|
|
910
1030
|
|
|
911
|
-
// Total height
|
|
1031
|
+
// Total height — use original PDF gap ratios between regions
|
|
912
1032
|
const baseLH = fontSize * lineHeight;
|
|
913
1033
|
let totalHeight = padding;
|
|
914
1034
|
for (const r of reflowedRegions) {
|
|
915
1035
|
totalHeight += r.height;
|
|
916
|
-
|
|
1036
|
+
const gapRatio = r.region?.gapRatio ?? 0.4;
|
|
1037
|
+
totalHeight += baseLH * Math.max(0.2, Math.min(gapRatio, 2.0));
|
|
917
1038
|
}
|
|
918
1039
|
totalHeight += padding;
|
|
919
1040
|
|
|
@@ -946,6 +1067,7 @@ export function createReflowRenderer(container, options = {}) {
|
|
|
946
1067
|
let pdfjs = null;
|
|
947
1068
|
let pdfDoc = null;
|
|
948
1069
|
let currentPage = 0;
|
|
1070
|
+
const userSetFontSize = options.fontSize != null;
|
|
949
1071
|
let fontSize = options.fontSize ?? 16;
|
|
950
1072
|
let destroyed = false;
|
|
951
1073
|
|
|
@@ -1050,11 +1172,15 @@ export function createReflowRenderer(container, options = {}) {
|
|
|
1050
1172
|
const justified = r.align === "justify";
|
|
1051
1173
|
const availW = W - padding * 2;
|
|
1052
1174
|
|
|
1175
|
+
const hasMultipleColors = r.colorSpans && r.colorSpans.length > 1 &&
|
|
1176
|
+
!r.colorSpans.every(s => s.color === r.colorSpans[0].color);
|
|
1177
|
+
|
|
1053
1178
|
if (!enableMorph) {
|
|
1054
1179
|
ctx.fillStyle = r.color || textColor;
|
|
1055
1180
|
ctx.font = `${style} ${weight} ${fs * d}px ${rFamily}`;
|
|
1056
1181
|
}
|
|
1057
1182
|
|
|
1183
|
+
let lineCharOffset = 0;
|
|
1058
1184
|
for (let lineIdx = 0; lineIdx < r.lines.length; lineIdx++) {
|
|
1059
1185
|
const line = r.lines[lineIdx];
|
|
1060
1186
|
const screenY = cursorY - scrollY;
|
|
@@ -1098,6 +1224,21 @@ export function createReflowRenderer(container, options = {}) {
|
|
|
1098
1224
|
ctx.fillText(line.text, padding * d, screenY * d);
|
|
1099
1225
|
}
|
|
1100
1226
|
ctx.restore();
|
|
1227
|
+
} else if (hasMultipleColors) {
|
|
1228
|
+
// Per-span coloring for inline colored text (links, emphasis)
|
|
1229
|
+
if (shouldJustify) {
|
|
1230
|
+
drawColoredJustifiedLine(ctx, line.text, lineCharOffset, r.colorSpans,
|
|
1231
|
+
r.color || textColor, padding * d, screenY * d, availW * d);
|
|
1232
|
+
} else if (centered) {
|
|
1233
|
+
// Measure full line to center it, then draw colored from offset
|
|
1234
|
+
const lineW = ctx.measureText(line.text).width;
|
|
1235
|
+
const startX = (W * d - lineW) / 2;
|
|
1236
|
+
drawColoredLine(ctx, line.text, lineCharOffset, r.colorSpans,
|
|
1237
|
+
r.color || textColor, startX, screenY * d);
|
|
1238
|
+
} else {
|
|
1239
|
+
drawColoredLine(ctx, line.text, lineCharOffset, r.colorSpans,
|
|
1240
|
+
r.color || textColor, padding * d, screenY * d);
|
|
1241
|
+
}
|
|
1101
1242
|
} else {
|
|
1102
1243
|
if (centered) {
|
|
1103
1244
|
ctx.textAlign = "center";
|
|
@@ -1110,6 +1251,7 @@ export function createReflowRenderer(container, options = {}) {
|
|
|
1110
1251
|
}
|
|
1111
1252
|
}
|
|
1112
1253
|
}
|
|
1254
|
+
lineCharOffset += line.text.length;
|
|
1113
1255
|
cursorY += lh;
|
|
1114
1256
|
}
|
|
1115
1257
|
} else if (r.type === "graphic" && r.bitmap) {
|
|
@@ -1132,7 +1274,8 @@ export function createReflowRenderer(container, options = {}) {
|
|
|
1132
1274
|
}
|
|
1133
1275
|
cursorY += r.drawH;
|
|
1134
1276
|
}
|
|
1135
|
-
|
|
1277
|
+
const gapRatio = r.region?.gapRatio ?? 0.4;
|
|
1278
|
+
cursorY += baseLH * Math.max(0.2, Math.min(gapRatio, 2.0));
|
|
1136
1279
|
}
|
|
1137
1280
|
}
|
|
1138
1281
|
|
|
@@ -1268,6 +1411,12 @@ export function createReflowRenderer(container, options = {}) {
|
|
|
1268
1411
|
|
|
1269
1412
|
currentAnalysis = analysisCache.get(pageNum);
|
|
1270
1413
|
currentPage = pageNum;
|
|
1414
|
+
|
|
1415
|
+
// Auto-match PDF body font size when user hasn't set an explicit fontSize
|
|
1416
|
+
if (!userSetFontSize && currentAnalysis.bodyFontSize) {
|
|
1417
|
+
fontSize = clamp(Math.round(currentAnalysis.bodyFontSize), minFont, maxFont);
|
|
1418
|
+
}
|
|
1419
|
+
|
|
1271
1420
|
scrollY = 0;
|
|
1272
1421
|
scrollVelocity = 0;
|
|
1273
1422
|
reflow();
|
|
@@ -1279,6 +1428,7 @@ export function createReflowRenderer(container, options = {}) {
|
|
|
1279
1428
|
graphicRegions: currentAnalysis.graphicRegions,
|
|
1280
1429
|
pageWidth: currentAnalysis.pageWidth,
|
|
1281
1430
|
pageHeight: currentAnalysis.pageHeight,
|
|
1431
|
+
bodyFontSize: currentAnalysis.bodyFontSize,
|
|
1282
1432
|
});
|
|
1283
1433
|
},
|
|
1284
1434
|
|