pretext-pdfjs 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/reflow.js +153 -3
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pretext-pdfjs",
3
- "version": "0.3.1",
3
+ "version": "0.3.2",
4
4
  "description": "Pretext-native text layer for PDF.js — zero DOM reflows, per-block reflow with image preservation, pinch-to-zoom text",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
package/src/reflow.js CHANGED
@@ -25,7 +25,16 @@ function drawJustifiedLine(ctx, text, x, y, availWidth) {
25
25
  }
26
26
  let totalWordWidth = 0;
27
27
  for (const w of words) totalWordWidth += ctx.measureText(w).width;
28
+
29
+ const normalSpaceWidth = ctx.measureText(" ").width;
28
30
  const extraSpace = (availWidth - totalWordWidth) / (words.length - 1);
31
+
32
+ // Fall back to left-aligned if gaps would be too large
33
+ if (extraSpace > normalSpaceWidth * 3 || totalWordWidth < availWidth * 0.7) {
34
+ ctx.fillText(text, x, y);
35
+ return;
36
+ }
37
+
29
38
  let xPos = x;
30
39
  for (const w of words) {
31
40
  ctx.fillText(w, xPos, y);
@@ -33,6 +42,70 @@ function drawJustifiedLine(ctx, text, x, y, availWidth) {
33
42
  }
34
43
  }
35
44
 
45
+ /**
46
+ * Draw a line of text with per-span coloring (for inline colored text like links).
47
+ */
48
+ function drawColoredLine(ctx, text, charOffset, spans, defaultColor, x, y) {
49
+ const lineStart = charOffset;
50
+ const lineEnd = charOffset + text.length;
51
+ let xPos = x;
52
+ let pos = 0;
53
+
54
+ for (const span of spans) {
55
+ if (span.charEnd <= lineStart || span.charStart >= lineEnd) continue;
56
+ const overlapStart = Math.max(span.charStart - lineStart, 0);
57
+ const overlapEnd = Math.min(span.charEnd - lineStart, text.length);
58
+
59
+ if (overlapStart > pos) {
60
+ const gapText = text.slice(pos, overlapStart);
61
+ ctx.fillStyle = defaultColor;
62
+ ctx.fillText(gapText, xPos, y);
63
+ xPos += ctx.measureText(gapText).width;
64
+ }
65
+
66
+ const spanText = text.slice(overlapStart, overlapEnd);
67
+ ctx.fillStyle = span.color;
68
+ ctx.fillText(spanText, xPos, y);
69
+ xPos += ctx.measureText(spanText).width;
70
+ pos = overlapEnd;
71
+ }
72
+
73
+ if (pos < text.length) {
74
+ ctx.fillStyle = defaultColor;
75
+ ctx.fillText(text.slice(pos), xPos, y);
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Draw a line of justified text with per-span coloring.
81
+ */
82
+ function drawColoredJustifiedLine(ctx, text, charOffset, spans, defaultColor, x, y, availWidth) {
83
+ const words = text.split(" ");
84
+ if (words.length <= 1) {
85
+ drawColoredLine(ctx, text, charOffset, spans, defaultColor, x, y);
86
+ return;
87
+ }
88
+ let totalWordWidth = 0;
89
+ for (const w of words) totalWordWidth += ctx.measureText(w).width;
90
+ const normalSpaceWidth = ctx.measureText(" ").width;
91
+ const extraSpace = (availWidth - totalWordWidth) / (words.length - 1);
92
+
93
+ if (extraSpace > normalSpaceWidth * 3 || totalWordWidth < availWidth * 0.7) {
94
+ drawColoredLine(ctx, text, charOffset, spans, defaultColor, x, y);
95
+ return;
96
+ }
97
+
98
+ // Draw word by word with per-span coloring and justified spacing
99
+ let xPos = x;
100
+ let charPos = 0;
101
+ for (let wi = 0; wi < words.length; wi++) {
102
+ const word = words[wi];
103
+ drawColoredLine(ctx, word, charOffset + charPos, spans, defaultColor, xPos, y);
104
+ xPos += ctx.measureText(word).width + extraSpace;
105
+ charPos += word.length + 1; // +1 for space
106
+ }
107
+ }
108
+
36
109
  function bboxOverlap(a, b) {
37
110
  const x1 = Math.max(a.x, b.x);
38
111
  const y1 = Math.max(a.y, b.y);
@@ -301,6 +374,29 @@ function groupTextBlocks(textItems, pageHeight, styles, fontMap, textColors) {
301
374
  }
302
375
  }
303
376
  block.color = dominantColor;
377
+
378
+ // Build color spans — contiguous runs of items sharing the same color
379
+ // Character indices map to the concatenated text produced by blockToText
380
+ block.colorSpans = [];
381
+ if (block.items.length > 0) {
382
+ let spanColor = block.items[0]._color || "#000000";
383
+ let spanCharStart = 0;
384
+ let charCount = 0;
385
+
386
+ for (let i = 0; i < block.items.length; i++) {
387
+ const c = block.items[i]._color || "#000000";
388
+ const itemLen = (block.items[i].str || "").length;
389
+ if (c !== spanColor) {
390
+ block.colorSpans.push({ charStart: spanCharStart, charEnd: charCount, color: spanColor });
391
+ spanCharStart = charCount;
392
+ spanColor = c;
393
+ }
394
+ charCount += itemLen;
395
+ // Account for spaces inserted between items by blockToText
396
+ if (i < block.items.length - 1) charCount++;
397
+ }
398
+ block.colorSpans.push({ charStart: spanCharStart, charEnd: charCount, color: spanColor });
399
+ }
304
400
  }
305
401
 
306
402
  return blocks;
@@ -649,6 +745,28 @@ function buildRegionMap(textBlocks, graphicRegions, pageHeight) {
649
745
  });
650
746
  }
651
747
 
748
+ // ── Compute inter-block vertical gaps from original PDF layout ──
749
+ for (let i = 1; i < regions.length; i++) {
750
+ const prev = regions[i - 1];
751
+ const curr = regions[i];
752
+ const prevBottom = prev.bbox.y + prev.bbox.h;
753
+ const currTop = curr.bbox.y;
754
+ curr.gapBefore = Math.max(0, currTop - prevBottom);
755
+ }
756
+ if (regions.length > 0) {
757
+ regions[0].gapBefore = regions[0].bbox.y;
758
+ }
759
+
760
+ // Normalize gaps relative to average body line height
761
+ const bodyBlocks = regions.filter(r =>
762
+ r.type === "text" && r.block?.fontScale && Math.abs(r.block.fontScale - 1) < 0.15);
763
+ const avgBodyLH = bodyBlocks.length > 0
764
+ ? bodyBlocks.reduce((s, r) => s + r.block.avgFontSize * 1.6, 0) / bodyBlocks.length
765
+ : 12 * 1.6;
766
+ for (const region of regions) {
767
+ region.gapRatio = (region.gapBefore || 0) / avgBodyLH;
768
+ }
769
+
652
770
  return regions;
653
771
  }
654
772
 
@@ -809,6 +927,7 @@ async function analyzePage(page, OPS) {
809
927
  graphicRegions,
810
928
  offCanvas,
811
929
  fontMap,
930
+ bodyFontSize,
812
931
  };
813
932
  }
814
933
 
@@ -877,6 +996,7 @@ function reflowAndComposite(analysis, opts) {
877
996
  fontFamily: blockFamily,
878
997
  align: block.align || "left",
879
998
  color: block.color,
999
+ colorSpans: block.colorSpans || [],
880
1000
  region,
881
1001
  });
882
1002
  } else {
@@ -908,12 +1028,13 @@ function reflowAndComposite(analysis, opts) {
908
1028
  }
909
1029
  }
910
1030
 
911
- // Total height
1031
+ // Total height — use original PDF gap ratios between regions
912
1032
  const baseLH = fontSize * lineHeight;
913
1033
  let totalHeight = padding;
914
1034
  for (const r of reflowedRegions) {
915
1035
  totalHeight += r.height;
916
- totalHeight += baseLH * 0.4;
1036
+ const gapRatio = r.region?.gapRatio ?? 0.4;
1037
+ totalHeight += baseLH * Math.max(0.2, Math.min(gapRatio, 2.0));
917
1038
  }
918
1039
  totalHeight += padding;
919
1040
 
@@ -946,6 +1067,7 @@ export function createReflowRenderer(container, options = {}) {
946
1067
  let pdfjs = null;
947
1068
  let pdfDoc = null;
948
1069
  let currentPage = 0;
1070
+ const userSetFontSize = options.fontSize != null;
949
1071
  let fontSize = options.fontSize ?? 16;
950
1072
  let destroyed = false;
951
1073
 
@@ -1050,11 +1172,15 @@ export function createReflowRenderer(container, options = {}) {
1050
1172
  const justified = r.align === "justify";
1051
1173
  const availW = W - padding * 2;
1052
1174
 
1175
+ const hasMultipleColors = r.colorSpans && r.colorSpans.length > 1 &&
1176
+ !r.colorSpans.every(s => s.color === r.colorSpans[0].color);
1177
+
1053
1178
  if (!enableMorph) {
1054
1179
  ctx.fillStyle = r.color || textColor;
1055
1180
  ctx.font = `${style} ${weight} ${fs * d}px ${rFamily}`;
1056
1181
  }
1057
1182
 
1183
+ let lineCharOffset = 0;
1058
1184
  for (let lineIdx = 0; lineIdx < r.lines.length; lineIdx++) {
1059
1185
  const line = r.lines[lineIdx];
1060
1186
  const screenY = cursorY - scrollY;
@@ -1098,6 +1224,21 @@ export function createReflowRenderer(container, options = {}) {
1098
1224
  ctx.fillText(line.text, padding * d, screenY * d);
1099
1225
  }
1100
1226
  ctx.restore();
1227
+ } else if (hasMultipleColors) {
1228
+ // Per-span coloring for inline colored text (links, emphasis)
1229
+ if (shouldJustify) {
1230
+ drawColoredJustifiedLine(ctx, line.text, lineCharOffset, r.colorSpans,
1231
+ r.color || textColor, padding * d, screenY * d, availW * d);
1232
+ } else if (centered) {
1233
+ // Measure full line to center it, then draw colored from offset
1234
+ const lineW = ctx.measureText(line.text).width;
1235
+ const startX = (W * d - lineW) / 2;
1236
+ drawColoredLine(ctx, line.text, lineCharOffset, r.colorSpans,
1237
+ r.color || textColor, startX, screenY * d);
1238
+ } else {
1239
+ drawColoredLine(ctx, line.text, lineCharOffset, r.colorSpans,
1240
+ r.color || textColor, padding * d, screenY * d);
1241
+ }
1101
1242
  } else {
1102
1243
  if (centered) {
1103
1244
  ctx.textAlign = "center";
@@ -1110,6 +1251,7 @@ export function createReflowRenderer(container, options = {}) {
1110
1251
  }
1111
1252
  }
1112
1253
  }
1254
+ lineCharOffset += line.text.length;
1113
1255
  cursorY += lh;
1114
1256
  }
1115
1257
  } else if (r.type === "graphic" && r.bitmap) {
@@ -1132,7 +1274,8 @@ export function createReflowRenderer(container, options = {}) {
1132
1274
  }
1133
1275
  cursorY += r.drawH;
1134
1276
  }
1135
- cursorY += baseLH * 0.4;
1277
+ const gapRatio = r.region?.gapRatio ?? 0.4;
1278
+ cursorY += baseLH * Math.max(0.2, Math.min(gapRatio, 2.0));
1136
1279
  }
1137
1280
  }
1138
1281
 
@@ -1268,6 +1411,12 @@ export function createReflowRenderer(container, options = {}) {
1268
1411
 
1269
1412
  currentAnalysis = analysisCache.get(pageNum);
1270
1413
  currentPage = pageNum;
1414
+
1415
+ // Auto-match PDF body font size when user hasn't set an explicit fontSize
1416
+ if (!userSetFontSize && currentAnalysis.bodyFontSize) {
1417
+ fontSize = clamp(Math.round(currentAnalysis.bodyFontSize), minFont, maxFont);
1418
+ }
1419
+
1271
1420
  scrollY = 0;
1272
1421
  scrollVelocity = 0;
1273
1422
  reflow();
@@ -1279,6 +1428,7 @@ export function createReflowRenderer(container, options = {}) {
1279
1428
  graphicRegions: currentAnalysis.graphicRegions,
1280
1429
  pageWidth: currentAnalysis.pageWidth,
1281
1430
  pageHeight: currentAnalysis.pageHeight,
1431
+ bodyFontSize: currentAnalysis.bodyFontSize,
1282
1432
  });
1283
1433
  },
1284
1434