pretext-pdfjs 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -86,7 +86,40 @@ await TextLayer.enableReflow(container, fullText, {
86
86
  });
87
87
  ```
88
88
 
89
- ### Per-block reflow (images + text)
89
+ ### Reflow Mode (images preserved)
90
+
91
+ ```js
92
+ import { createReflowRenderer } from "pretext-pdfjs/reflow";
93
+
94
+ const renderer = createReflowRenderer(container, {
95
+ fontSize: 16,
96
+ enablePinchZoom: true,
97
+ enableMorph: false, // set true for fisheye scroll
98
+ fontFamily: '"Literata", Georgia, serif',
99
+ });
100
+ await renderer.open("document.pdf");
101
+ await renderer.showPage(1);
102
+ // Pinch to zoom — text reflows, images stay in place
103
+ ```
104
+
105
+ Unlike the text-only reader modes, reflow mode preserves images, vector graphics,
106
+ and document structure. It uses PDF.js's `operationsFilter` to render non-text
107
+ elements separately, then composites Pretext-reflowed text on top.
108
+
109
+ ### Pinch reader with preserved layout
110
+
111
+ ```js
112
+ import { createPDFPinchReader } from "pretext-pdfjs/pinch";
113
+
114
+ const reader = createPDFPinchReader(container, {
115
+ mode: "pinchType",
116
+ preserveLayout: true, // images stay in place
117
+ });
118
+ await reader.open("document.pdf");
119
+ await reader.showPage(1);
120
+ ```
121
+
122
+ ### Per-block reflow (full options)
90
123
 
91
124
  The reflow module bridges PDF mode (images preserved, no reflow) and reader modes (text reflows, images stripped). Text blocks reflow with Pretext at the target font size while images and vector graphics render as scaled bitmaps in their original positions.
92
125
 
@@ -101,8 +134,12 @@ const renderer = createReflowRenderer(container, {
101
134
  background: "#f4f1eb",
102
135
  textColor: "#252320",
103
136
  imageFit: "proportional", // "proportional" | "original" | "full-width"
137
+ maxWidth: Infinity, // max canvas width (default: full container)
104
138
  enablePinchZoom: true,
105
139
  enableMomentumScroll: true,
140
+ enableMorph: false, // fisheye scroll effect on text + images
141
+ morphRadius: 300, // morph effect radius in px
142
+ edgeFontRatio: 0.5, // edge font = 50% of center font
106
143
  onZoom: (fontSize) => console.log("Font size:", fontSize),
107
144
  onPageReady: ({ pageNum, textBlocks, graphicRegions }) => {
108
145
  console.log(`Page ${pageNum}: ${textBlocks.length} text blocks, ${graphicRegions.length} graphics`);
@@ -127,9 +164,9 @@ renderer.destroy();
127
164
 
128
165
  **How it works:**
129
166
 
130
- 1. **Analyze** — extracts text blocks (grouped by proximity) and graphic regions (images, vector paths) from the PDF page via `getTextContent()` and `getOperatorList()`. Renders the full page to an offscreen canvas and captures bitmap snippets for each graphic region.
167
+ 1. **Analyze** — extracts text blocks (grouped by proximity) and graphic regions (images, vector paths) from the PDF page via `getTextContent()` and `getOperatorList()`. Uses `operationsFilter` to render only non-text content to an offscreen canvas, and `recordImages` for precise image coordinates.
131
168
  2. **Reflow** — each text block is reflowed with Pretext's `prepareWithSegments()` + `layoutWithLines()` at the current font size. Graphic bitmaps are scaled proportionally.
132
- 3. **Composite** — walks the region map in reading order, drawing reflowed text lines and graphic bitmaps onto a single output canvas.
169
+ 3. **Composite** — walks the region map in reading order, drawing reflowed text lines and graphic bitmaps onto a single output canvas. With `enableMorph`, applies fisheye interpolation to both text and images.
133
170
 
134
171
  Steps 1 runs once per page (cached). Steps 2-3 re-run on font size change, which is what makes pinch-to-zoom fast.
135
172
 
@@ -142,18 +179,19 @@ pretext-pdfjs/
142
179
  │ ├── pretext-text-layer.js # PretextTextLayer (drop-in replacement)
143
180
  │ ├── measurement-cache.js # Pretext-style Canvas measurement cache
144
181
  │ ├── viewer.js # PretextPDFViewer helper
145
- │ ├── pinch.js # Pinch-type PDF reader integration
146
- │ └── reflow.js # Per-block reflow (text + images)
147
- ├── demo.html # Self-contained demo page
182
+ │ ├── pinch.js # Pinch-type reading modes
183
+ │ └── reflow.js # Per-block reflow with image preservation
184
+ ├── demo.html # Library landing page
185
+ ├── reader.html # Full PDF reader demo
148
186
  ├── package.json
149
187
  └── README.md
150
188
  ```
151
189
 
152
- **Kept from PDF.js** (via `pdfjs-dist` dependency): core parser, canvas renderer, annotation layer, worker architecture, font loading.
190
+ **Kept from PDF.js**: core parser, canvas renderer, annotation layer, worker, font loading.
153
191
 
154
- **Replaced**: `TextLayer` class — measurement cache, ascent detection, width scaling.
192
+ **Replaced**: TextLayer — measurement cache, ascent detection, width scaling.
155
193
 
156
- **Added**: `pretextMetrics`, `enableReflow()`, pinch/morph/combined reading modes, per-block reflow with image preservation.
194
+ **Added**: pretextMetrics, enableReflow(), pinch/morph reading modes, per-block reflow with image preservation.
157
195
 
158
196
  ## Built on
159
197
 
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pretext-pdfjs",
3
- "version": "0.2.1",
4
- "description": "Fork of PDF.js with @chenglou/pretext-native text layer — zero DOM reflows for text measurement",
3
+ "version": "0.3.1",
4
+ "description": "Pretext-native text layer for PDF.js — zero DOM reflows, per-block reflow with image preservation, pinch-to-zoom text",
5
5
  "type": "module",
6
6
  "main": "./src/index.js",
7
7
  "exports": {
package/src/pinch.js CHANGED
@@ -339,13 +339,51 @@ function createTextCanvas(container, opts = {}) {
339
339
  * @param {Function} [options.onPageLoad] - called with { pageNum, text, numPages }
340
340
  */
341
341
  export function createPDFPinchReader(container, options = {}) {
342
+ const preserveLayout = options.preserveLayout ?? false;
343
+ const mode = options.mode || "pinchType";
344
+
345
+ // When preserveLayout is true, delegate to the reflow engine
346
+ if (preserveLayout) {
347
+ let reflowRenderer = null;
348
+
349
+ return {
350
+ async open(source) {
351
+ const { createReflowRenderer } = await import("./reflow.js");
352
+ reflowRenderer = createReflowRenderer(container, {
353
+ fontSize: options.fontSize ?? 18,
354
+ minFontSize: options.minFontSize ?? 8,
355
+ maxFontSize: options.maxFontSize ?? 60,
356
+ fontFamily: options.fontFamily,
357
+ lineHeight: options.lineHeight ?? 1.6,
358
+ padding: options.padding ?? 28,
359
+ background: options.background ?? "#0a0a0a",
360
+ textColor: options.textColor ?? "#e5e5e5",
361
+ enablePinchZoom: true,
362
+ enableMorph: mode === "pinchMorph" || mode === "scrollMorph",
363
+ friction: options.friction ?? 0.95,
364
+ workerSrc: options.workerSrc,
365
+ onZoom: options.onZoom,
366
+ });
367
+ return reflowRenderer.open(source);
368
+ },
369
+ async showPage(pageNum) { return reflowRenderer.showPage(pageNum); },
370
+ async showAll() { return reflowRenderer.showAll(); },
371
+ async nextPage() { return reflowRenderer.nextPage(); },
372
+ async prevPage() { return reflowRenderer.prevPage(); },
373
+ resize() { /* handled by ResizeObserver in reflow */ },
374
+ destroy() { reflowRenderer?.destroy(); },
375
+ get currentPage() { return reflowRenderer?.currentPage ?? 0; },
376
+ get numPages() { return reflowRenderer?.numPages ?? 0; },
377
+ get canvas() { return reflowRenderer?.canvas ?? null; },
378
+ get mode() { return mode; },
379
+ };
380
+ }
381
+
342
382
  let pdfjs = null;
343
383
  let pdfDoc = null;
344
384
  let textInstance = null;
345
385
  let currentPage = 0;
346
386
 
347
- const mode = options.mode || "pinchType";
348
-
349
387
  async function ensurePdfjs() {
350
388
  if (pdfjs) return;
351
389
  pdfjs = await import("pdfjs-dist");
package/src/reflow.js CHANGED
@@ -14,6 +14,25 @@ function clamp(v, min, max) {
14
14
  return Math.max(min, Math.min(max, v));
15
15
  }
16
16
 
17
+ /**
18
+ * Draw a line of text with justified spacing (equal space between words).
19
+ */
20
+ function drawJustifiedLine(ctx, text, x, y, availWidth) {
21
+ const words = text.split(" ");
22
+ if (words.length <= 1) {
23
+ ctx.fillText(text, x, y);
24
+ return;
25
+ }
26
+ let totalWordWidth = 0;
27
+ for (const w of words) totalWordWidth += ctx.measureText(w).width;
28
+ const extraSpace = (availWidth - totalWordWidth) / (words.length - 1);
29
+ let xPos = x;
30
+ for (const w of words) {
31
+ ctx.fillText(w, xPos, y);
32
+ xPos += ctx.measureText(w).width + extraSpace;
33
+ }
34
+ }
35
+
17
36
  function bboxOverlap(a, b) {
18
37
  const x1 = Math.max(a.x, b.x);
19
38
  const y1 = Math.max(a.y, b.y);
@@ -25,13 +44,106 @@ function bboxOverlap(a, b) {
25
44
  return smaller > 0 ? intersection / smaller : 0;
26
45
  }
27
46
 
47
+ // ─── Font metadata extraction ────────────────────────────────────────────
48
+
49
+ /**
50
+ * Extract real font metadata (bold, italic, weight, loadedName) from
51
+ * page.commonObjs. Must be called AFTER page.render() so fonts are loaded.
52
+ */
53
+ async function extractFontMetadata(page, opList, OPS) {
54
+ const fontMap = new Map();
55
+
56
+ for (let i = 0; i < opList.fnArray.length; i++) {
57
+ if (opList.fnArray[i] === OPS.setFont) {
58
+ const fontRefName = opList.argsArray[i][0];
59
+ if (fontMap.has(fontRefName)) continue;
60
+
61
+ try {
62
+ const fontObj = page.commonObjs.get(fontRefName);
63
+ if (fontObj) {
64
+ fontMap.set(fontRefName, {
65
+ bold: fontObj.bold || false,
66
+ black: fontObj.black || false,
67
+ italic: fontObj.italic || false,
68
+ loadedName: fontObj.loadedName || null,
69
+ fallbackName: fontObj.fallbackName || "sans-serif",
70
+ css: fontObj.systemFontInfo?.css || null,
71
+ isMonospace: fontObj.isMonospace || false,
72
+ isSerifFont: fontObj.isSerifFont || false,
73
+ });
74
+ }
75
+ } catch (_) {
76
+ // Font not yet loaded — skip
77
+ }
78
+ }
79
+ }
80
+ return fontMap;
81
+ }
82
+
83
+ // ─── Text color extraction ───────────────────────────────────────────────
84
+
85
+ /**
86
+ * Extract fill colors from the operator list, indexed by text-drawing op.
87
+ * The evaluator normalizes all fill-color commands to setFillRGBColor with
88
+ * a hex string, so that's the primary path. Returns an array parallel to
89
+ * the text items from getTextContent().
90
+ */
91
+ function extractTextColors(opList, OPS) {
92
+ const textColors = [];
93
+ let currentColor = "#000000";
94
+
95
+ const textDrawOps = new Set([
96
+ OPS.showText,
97
+ OPS.showSpacedText,
98
+ OPS.nextLineShowText,
99
+ OPS.nextLineSetSpacingShowText,
100
+ ]);
101
+
102
+ for (let i = 0; i < opList.fnArray.length; i++) {
103
+ const fn = opList.fnArray[i];
104
+
105
+ if (fn === OPS.setFillRGBColor) {
106
+ currentColor = opList.argsArray[i][0];
107
+ } else if (fn === OPS.setFillTransparent) {
108
+ currentColor = "transparent";
109
+ } else if (
110
+ fn === OPS.setFillGray ||
111
+ fn === OPS.setFillColor ||
112
+ fn === OPS.setFillCMYKColor ||
113
+ fn === OPS.setFillColorN
114
+ ) {
115
+ const args = opList.argsArray[i];
116
+ if (args?.[0] && typeof args[0] === "string" && args[0].startsWith("#")) {
117
+ currentColor = args[0];
118
+ }
119
+ }
120
+
121
+ if (textDrawOps.has(fn)) {
122
+ textColors.push(currentColor);
123
+ }
124
+ }
125
+
126
+ return textColors;
127
+ }
128
+
28
129
  // ─── Page analysis ────────────────────────────────────────────────────────
29
130
 
30
131
  /**
31
132
  * Group adjacent text items into text blocks by proximity.
32
133
  * Also extracts font metadata: average size, italic, bold.
33
134
  */
34
- function groupTextBlocks(textItems, pageHeight, styles) {
135
+ function groupTextBlocks(textItems, pageHeight, styles, fontMap, textColors) {
136
+ // Attach colors to text items before filtering (textColors is parallel to
137
+ // the full items array from getTextContent, including empty items)
138
+ if (textColors) {
139
+ let colorIdx = 0;
140
+ for (const item of textItems) {
141
+ if (item.str !== undefined) {
142
+ item._color = textColors[colorIdx++] || "#000000";
143
+ }
144
+ }
145
+ }
146
+
35
147
  const sorted = [...textItems].filter(i => i.str?.trim()).sort((a, b) => {
36
148
  const ay = pageHeight - a.transform[5];
37
149
  const by = pageHeight - b.transform[5];
@@ -73,10 +185,20 @@ function groupTextBlocks(textItems, pageHeight, styles) {
73
185
  const isSuperscript = isShortItem && isHorizAdjacent && sizeRatio > 1.3;
74
186
  const sizeOk = sizeRatio < 1.3 || isSuperscript;
75
187
 
188
+ // Large horizontal gap between consecutive items → likely column break
189
+ // Only for substantive text (skip short items like superscript markers)
190
+ const isLongItem = (item.str || "").trim().length > 3;
191
+ if (isLongItem && (hGap > lastFH * 1.5 ||
192
+ (current.bbox.w > lastFH * 10 && x < current.bbox.x - lastFH * 3))) {
193
+ blocks.push(current);
194
+ current = { items: [item], bbox: { x, y, w: item.width || 0, h: fontHeight } };
195
+ continue;
196
+ }
197
+
76
198
  if (
77
199
  sizeOk &&
78
200
  verticalGap < lastFH * 2.5 &&
79
- x < current.bbox.x + current.bbox.w + lastFH * 2
201
+ x < current.bbox.x + current.bbox.w + lastFH * 1.5
80
202
  ) {
81
203
  current.items.push(item);
82
204
  current.bbox.x = Math.min(current.bbox.x, x);
@@ -94,7 +216,48 @@ function groupTextBlocks(textItems, pageHeight, styles) {
94
216
  }
95
217
  if (current) blocks.push(current);
96
218
 
97
- // Compute font metadata per block
219
+ // Post-process: merge orphan tiny blocks (superscripts, markers like *, +, #)
220
+ // into the nearest larger block if vertically close
221
+ for (let i = blocks.length - 1; i >= 0; i--) {
222
+ const block = blocks[i];
223
+ if (block.items.length > 2) continue;
224
+ const text = block.items.map(it => (it.str || "").trim()).join("");
225
+ if (text.length > 3 || text.length === 0) continue;
226
+
227
+ let bestIdx = -1, bestDist = Infinity;
228
+ for (let j = 0; j < blocks.length; j++) {
229
+ if (j === i) continue;
230
+ const o = blocks[j];
231
+ // Skip other orphans (short text blocks)
232
+ const oText = o.items.map(it => (it.str || "").trim()).join("");
233
+ if (oText.length <= 3) continue;
234
+ // Check vertical proximity: orphan center within 30pt of target block
235
+ const bcy = block.bbox.y + block.bbox.h / 2;
236
+ if (bcy < o.bbox.y - 30 || bcy > o.bbox.y + o.bbox.h + 30) continue;
237
+ // Horizontal edge-to-edge distance (0 if overlapping)
238
+ const hDist = Math.max(0,
239
+ block.bbox.x > o.bbox.x + o.bbox.w ? block.bbox.x - (o.bbox.x + o.bbox.w) :
240
+ o.bbox.x > block.bbox.x + block.bbox.w ? o.bbox.x - (block.bbox.x + block.bbox.w) : 0);
241
+ if (hDist < bestDist) {
242
+ bestDist = hDist;
243
+ bestIdx = j;
244
+ }
245
+ }
246
+
247
+ if (bestIdx >= 0 && bestDist < Math.max(blocks[bestIdx].bbox.h, 20)) {
248
+ const target = blocks[bestIdx];
249
+ target.items.push(...block.items);
250
+ const newX = Math.min(target.bbox.x, block.bbox.x);
251
+ const newRight = Math.max(target.bbox.x + target.bbox.w, block.bbox.x + block.bbox.w);
252
+ const newBottom = Math.max(target.bbox.y + target.bbox.h, block.bbox.y + block.bbox.h);
253
+ target.bbox.x = newX;
254
+ target.bbox.w = newRight - newX;
255
+ target.bbox.h = newBottom - target.bbox.y;
256
+ blocks.splice(i, 1);
257
+ }
258
+ }
259
+
260
+ // Compute font metadata per block using real font objects from commonObjs
98
261
  for (const block of blocks) {
99
262
  const sizes = [];
100
263
  let italicCount = 0;
@@ -104,18 +267,10 @@ function groupTextBlocks(textItems, pageHeight, styles) {
104
267
  const fh = Math.hypot(item.transform[2], item.transform[3]);
105
268
  if (fh > 0) sizes.push(fh);
106
269
 
107
- // Detect italic/bold from fontName and style
108
- const name = (item.fontName || "").toLowerCase();
109
- const style = styles?.[item.fontName];
110
- const family = (style?.fontFamily || "").toLowerCase();
111
- const combined = name + " " + family;
112
-
113
- if (combined.includes("italic") || combined.includes("oblique")) italicCount++;
114
- if (combined.includes("bold") || combined.includes("black") || combined.includes("heavy")) boldCount++;
115
-
116
- // Also detect italic from transform skew
117
- if (Math.abs(item.transform[2]) > 0.1 && Math.abs(item.transform[1]) < 0.1) {
118
- italicCount++;
270
+ const fontMeta = fontMap?.get(item.fontName);
271
+ if (fontMeta) {
272
+ if (fontMeta.italic) italicCount++;
273
+ if (fontMeta.bold || fontMeta.black) boldCount++;
119
274
  }
120
275
  }
121
276
 
@@ -124,10 +279,28 @@ function groupTextBlocks(textItems, pageHeight, styles) {
124
279
  : 12;
125
280
  block.isItalic = italicCount > block.items.length * 0.4;
126
281
  block.isBold = boldCount > block.items.length * 0.4;
282
+ block.isBlack = block.items.some(it => fontMap?.get(it.fontName)?.black);
283
+
284
+ // Store the font metadata for the dominant font in this block
285
+ block.fontMeta = fontMap?.get(block.items[0]?.fontName) || null;
127
286
 
128
- // Detect font family from the PDF's style metadata
129
- const sampleStyle = styles?.[block.items[0]?.fontName];
130
- block.pdfFontFamily = sampleStyle?.fontFamily || null;
287
+ // Compute dominant fill color for the block
288
+ const colorFreq = {};
289
+ for (const item of block.items) {
290
+ const c = item._color || "#000000";
291
+ if (c !== "transparent") {
292
+ colorFreq[c] = (colorFreq[c] || 0) + 1;
293
+ }
294
+ }
295
+ let dominantColor = "#000000";
296
+ let maxColorFreq = 0;
297
+ for (const [c, freq] of Object.entries(colorFreq)) {
298
+ if (freq > maxColorFreq) {
299
+ maxColorFreq = freq;
300
+ dominantColor = c;
301
+ }
302
+ }
303
+ block.color = dominantColor;
131
304
  }
132
305
 
133
306
  return blocks;
@@ -138,8 +311,7 @@ function groupTextBlocks(textItems, pageHeight, styles) {
138
311
  * Only captures image operators (paintImageXObject etc).
139
312
  * Skips path/fill/stroke to avoid false positives from text decorations.
140
313
  */
141
- async function extractGraphicRegions(page, OPS) {
142
- const ops = await page.getOperatorList();
314
+ function extractGraphicRegions(opList, OPS) {
143
315
  const regions = [];
144
316
  const ctmStack = [];
145
317
  let ctm = [1, 0, 0, 1, 0, 0];
@@ -165,9 +337,9 @@ async function extractGraphicRegions(page, OPS) {
165
337
  return [ctm[0] * x + ctm[2] * y + ctm[4], ctm[1] * x + ctm[3] * y + ctm[5]];
166
338
  }
167
339
 
168
- for (let i = 0; i < ops.fnArray.length; i++) {
169
- const fn = ops.fnArray[i];
170
- const args = ops.argsArray[i];
340
+ for (let i = 0; i < opList.fnArray.length; i++) {
341
+ const fn = opList.fnArray[i];
342
+ const args = opList.argsArray[i];
171
343
 
172
344
  if (fn === OPS.save) {
173
345
  ctmStack.push(ctm.slice());
@@ -200,31 +372,132 @@ async function extractGraphicRegions(page, OPS) {
200
372
  return regions;
201
373
  }
202
374
 
375
+ /**
376
+ * Detect graphic regions by scanning the rendered canvas for non-text content.
377
+ * Complements op-based detection by also finding vector graphics (charts, diagrams).
378
+ */
379
+ function detectGraphicRegionsFromRender(offCanvas, textBlocks, renderScale) {
380
+ const w = offCanvas.width;
381
+ const h = offCanvas.height;
382
+ const ctx = offCanvas.getContext("2d");
383
+
384
+ const cellPx = 16;
385
+ const cols = Math.ceil(w / cellPx);
386
+ const rows = Math.ceil(h / cellPx);
387
+ const occupied = new Uint8Array(cols * rows);
388
+
389
+ // Mark cells covered by text blocks
390
+ for (const block of textBlocks) {
391
+ const margin = 4 * renderScale;
392
+ const x0 = Math.floor(Math.max(0, block.bbox.x * renderScale - margin) / cellPx);
393
+ const y0 = Math.floor(Math.max(0, block.bbox.y * renderScale - margin) / cellPx);
394
+ const x1 = Math.ceil(Math.min(w, (block.bbox.x + block.bbox.w) * renderScale + margin) / cellPx);
395
+ const y1 = Math.ceil(Math.min(h, (block.bbox.y + block.bbox.h) * renderScale + margin) / cellPx);
396
+ for (let cy = y0; cy < y1 && cy < rows; cy++)
397
+ for (let cx = x0; cx < x1 && cx < cols; cx++)
398
+ occupied[cy * cols + cx] = 1;
399
+ }
400
+
401
+ // Scan non-text cells for visible content
402
+ const imgData = ctx.getImageData(0, 0, w, h);
403
+ const pixels = imgData.data;
404
+ const hasContent = new Uint8Array(cols * rows);
405
+
406
+ for (let cy = 0; cy < rows; cy++) {
407
+ for (let cx = 0; cx < cols; cx++) {
408
+ if (occupied[cy * cols + cx]) continue;
409
+ const px0 = cx * cellPx, py0 = cy * cellPx;
410
+ const px1 = Math.min(px0 + cellPx, w), py1 = Math.min(py0 + cellPx, h);
411
+ let dark = 0, total = 0;
412
+ for (let py = py0; py < py1; py += 2) {
413
+ for (let px = px0; px < px1; px += 2) {
414
+ const idx = (py * w + px) * 4;
415
+ if (pixels[idx + 3] > 20) {
416
+ const lum = 0.299 * pixels[idx] + 0.587 * pixels[idx + 1] + 0.114 * pixels[idx + 2];
417
+ if (lum < 240) dark++;
418
+ }
419
+ total++;
420
+ }
421
+ }
422
+ if (total > 0 && dark / total > 0.05) hasContent[cy * cols + cx] = 1;
423
+ }
424
+ }
425
+
426
+ // Connected-component labeling to find graphic regions
427
+ const visited = new Uint8Array(cols * rows);
428
+ const regions = [];
429
+ for (let cy = 0; cy < rows; cy++) {
430
+ for (let cx = 0; cx < cols; cx++) {
431
+ if (!hasContent[cy * cols + cx] || visited[cy * cols + cx]) continue;
432
+ const queue = [[cx, cy]];
433
+ visited[cy * cols + cx] = 1;
434
+ let minX = cx, maxX = cx, minY = cy, maxY = cy, count = 0;
435
+ while (queue.length > 0) {
436
+ const [qx, qy] = queue.shift();
437
+ minX = Math.min(minX, qx); maxX = Math.max(maxX, qx);
438
+ minY = Math.min(minY, qy); maxY = Math.max(maxY, qy);
439
+ count++;
440
+ for (const [dx, dy] of [[-1,0],[1,0],[0,-1],[0,1]]) {
441
+ const nx = qx + dx, ny = qy + dy;
442
+ if (nx >= 0 && nx < cols && ny >= 0 && ny < rows &&
443
+ hasContent[ny * cols + nx] && !visited[ny * cols + nx]) {
444
+ visited[ny * cols + nx] = 1;
445
+ queue.push([nx, ny]);
446
+ }
447
+ }
448
+ }
449
+ const rx = minX * cellPx / renderScale;
450
+ const ry = minY * cellPx / renderScale;
451
+ const rw = (maxX - minX + 1) * cellPx / renderScale;
452
+ const rh = (maxY - minY + 1) * cellPx / renderScale;
453
+ if (rw > 30 && rh > 30 && count > 4) {
454
+ regions.push({ type: "graphic", bbox: { x: rx, y: ry, w: rw, h: rh }, screenCoords: true });
455
+ }
456
+ }
457
+ }
458
+ return regions;
459
+ }
460
+
203
461
  /**
204
462
  * Build text content for a block, preserving paragraph breaks.
205
463
  */
206
464
  function blockToText(block, pageHeight) {
207
465
  let result = "";
208
466
  let lastY = null;
467
+ let lastX = null;
468
+ let lastW = 0;
209
469
  let lastFontSize = 12;
210
470
 
211
471
  for (const item of block.items) {
212
472
  if (!item.str) continue;
473
+ const currentX = item.transform[4];
213
474
  const currentY = pageHeight - item.transform[5];
214
475
  const fontHeight = Math.hypot(item.transform[2], item.transform[3]);
215
476
  if (fontHeight > 0) lastFontSize = fontHeight;
216
477
 
217
478
  if (lastY !== null) {
218
- const gap = Math.abs(currentY - lastY);
219
- if (gap > lastFontSize * 1.8) {
479
+ const vGap = Math.abs(currentY - lastY);
480
+ const isShortItem = (item.str || "").trim().length <= 2;
481
+ if (vGap > lastFontSize * 1.8 && !isShortItem) {
220
482
  result += "\n\n";
221
- } else if (gap > lastFontSize * 0.3) {
483
+ } else if (vGap > lastFontSize * 0.3) {
484
+ // Different line — insert space
222
485
  if (!result.endsWith(" ") && !result.endsWith("\n")) {
223
486
  result += " ";
224
487
  }
488
+ } else if (lastX !== null) {
489
+ // Same line — check horizontal gap between items
490
+ const hGap = currentX - (lastX + lastW);
491
+ if (hGap > lastFontSize * 0.15) {
492
+ if (!result.endsWith(" ") && !result.endsWith("\n")) {
493
+ result += " ";
494
+ }
495
+ }
225
496
  }
226
497
  }
227
498
  lastY = currentY;
499
+ lastX = currentX;
500
+ lastW = item.width || 0;
228
501
  result += item.str;
229
502
  }
230
503
  return result.trim();
@@ -245,9 +518,10 @@ function buildRegionMap(textBlocks, graphicRegions, pageHeight) {
245
518
  }
246
519
 
247
520
  for (const gr of graphicRegions) {
248
- // PDF coords: y is from bottom convert to top-down
249
- const topY = pageHeight - gr.bbox.y - gr.bbox.h;
250
- const bbox = { x: gr.bbox.x, y: topY, w: gr.bbox.w, h: gr.bbox.h };
521
+ // Render-based regions are already in screen coords; op-based need conversion
522
+ const bbox = gr.screenCoords
523
+ ? { ...gr.bbox }
524
+ : { x: gr.bbox.x, y: pageHeight - gr.bbox.y - gr.bbox.h, w: gr.bbox.w, h: gr.bbox.h };
251
525
 
252
526
  // Skip if this graphic region overlaps significantly with any text block
253
527
  const overlapsText = textBboxes.some(tb => bboxOverlap(bbox, tb) > 0.3);
@@ -256,28 +530,119 @@ function buildRegionMap(textBlocks, graphicRegions, pageHeight) {
256
530
  }
257
531
  }
258
532
 
259
- // Detect columns: find the midpoint X where blocks cluster on left vs right
533
+ // ── Column detection via histogram gap-finding ──
260
534
  const pageWidth = Math.max(...regions.map(r => r.bbox.x + r.bbox.w), 1);
261
- const midX = pageWidth / 2;
262
- const leftBlocks = regions.filter(r => r.bbox.x + r.bbox.w / 2 < midX);
263
- const rightBlocks = regions.filter(r => r.bbox.x + r.bbox.w / 2 >= midX);
264
- const hasColumns = leftBlocks.length > 2 && rightBlocks.length > 2 &&
265
- rightBlocks.some(r => leftBlocks.some(l => Math.abs(l.bbox.y - r.bbox.y) < 20));
535
+ const narrowBlocks = regions.filter(r => r.bbox.w <= pageWidth * 0.6);
536
+ let gapX = pageWidth / 2;
537
+ let hasColumns = false;
538
+
539
+ if (narrowBlocks.length >= 4) {
540
+ // Build horizontal coverage histogram
541
+ const binCount = 100;
542
+ const binWidth = pageWidth / binCount;
543
+ const coverage = new Uint8Array(binCount);
544
+ for (const r of narrowBlocks) {
545
+ const b0 = Math.max(0, Math.floor(r.bbox.x / binWidth));
546
+ const b1 = Math.min(binCount, Math.ceil((r.bbox.x + r.bbox.w) / binWidth));
547
+ for (let b = b0; b < b1; b++) coverage[b]++;
548
+ }
549
+
550
+ // Find widest empty gap in middle 60% of page
551
+ const searchStart = Math.floor(binCount * 0.2);
552
+ const searchEnd = Math.ceil(binCount * 0.8);
553
+ let gapStart = -1, gapLen = 0, bestStart = -1, bestLen = 0;
554
+ for (let b = searchStart; b < searchEnd; b++) {
555
+ if (coverage[b] === 0) {
556
+ if (gapStart < 0) gapStart = b;
557
+ gapLen = b - gapStart + 1;
558
+ } else {
559
+ if (gapLen > bestLen) { bestLen = gapLen; bestStart = gapStart; }
560
+ gapStart = -1; gapLen = 0;
561
+ }
562
+ }
563
+ if (gapLen > bestLen) { bestLen = gapLen; bestStart = gapStart; }
564
+
565
+ if (bestLen >= 2) {
566
+ gapX = (bestStart + bestLen / 2) * binWidth;
567
+ const leftCount = narrowBlocks.filter(r => r.bbox.x + r.bbox.w / 2 < gapX).length;
568
+ const rightCount = narrowBlocks.filter(r => r.bbox.x + r.bbox.w / 2 >= gapX).length;
569
+ hasColumns = leftCount > 2 && rightCount > 2;
570
+ }
571
+ }
266
572
 
573
+ // ── Detect text alignment per block (including justified) ──
574
+ for (const region of regions) {
575
+ if (region.type !== "text") continue;
576
+ const block = region.block;
577
+ const leftMargin = block.bbox.x;
578
+ const rightMargin = pageWidth - (block.bbox.x + block.bbox.w);
579
+ const marginDiff = Math.abs(leftMargin - rightMargin);
580
+
581
+ // Detect justified text: multiple lines with consistent right edges
582
+ let isJustified = false;
583
+ if (block.items.length >= 3) {
584
+ const lines = [];
585
+ let lineItems = [];
586
+ let lastLineY = null;
587
+ for (const item of block.items) {
588
+ const y = pageHeight - item.transform[5];
589
+ if (lastLineY !== null && Math.abs(y - lastLineY) > 2) {
590
+ if (lineItems.length > 0) lines.push(lineItems);
591
+ lineItems = [];
592
+ }
593
+ lineItems.push(item);
594
+ lastLineY = y;
595
+ }
596
+ if (lineItems.length > 0) lines.push(lineItems);
597
+
598
+ if (lines.length >= 3) {
599
+ // Compute right edge of each line (except last — last line is usually ragged)
600
+ const rightEdges = [];
601
+ for (let li = 0; li < lines.length - 1; li++) {
602
+ const lastItem = lines[li][lines[li].length - 1];
603
+ const rightX = lastItem.transform[4] + (lastItem.width || 0);
604
+ rightEdges.push(rightX);
605
+ }
606
+ if (rightEdges.length >= 2) {
607
+ const maxRight = Math.max(...rightEdges);
608
+ const consistent = rightEdges.filter(r => Math.abs(r - maxRight) < pageWidth * 0.02);
609
+ isJustified = consistent.length > rightEdges.length * 0.7;
610
+ }
611
+ }
612
+ }
613
+
614
+ if (hasColumns && block.bbox.w <= pageWidth * 0.6) {
615
+ block.align = isJustified ? "justify" : "left";
616
+ } else if (isJustified) {
617
+ block.align = "justify";
618
+ } else if (leftMargin > pageWidth * 0.05 && marginDiff < pageWidth * 0.1) {
619
+ block.align = "center";
620
+ } else {
621
+ block.align = "left";
622
+ }
623
+ }
624
+
625
+ // ── Sort in reading order ──
267
626
  if (hasColumns) {
268
- // Two-column: sort each column top-to-bottom, then concatenate
269
- // Full-width blocks (spanning > 60% of page) go first, sorted by Y
270
627
  const fullWidth = regions.filter(r => r.bbox.w > pageWidth * 0.6);
271
- const leftCol = regions.filter(r => r.bbox.w <= pageWidth * 0.6 && r.bbox.x + r.bbox.w / 2 < midX);
272
- const rightCol = regions.filter(r => r.bbox.w <= pageWidth * 0.6 && r.bbox.x + r.bbox.w / 2 >= midX);
628
+ const leftCol = regions.filter(r => r.bbox.w <= pageWidth * 0.6 && r.bbox.x + r.bbox.w / 2 < gapX);
629
+ const rightCol = regions.filter(r => r.bbox.w <= pageWidth * 0.6 && r.bbox.x + r.bbox.w / 2 >= gapX);
273
630
  const byY = (a, b) => a.bbox.y - b.bbox.y;
274
631
  fullWidth.sort(byY);
275
632
  leftCol.sort(byY);
276
633
  rightCol.sort(byY);
634
+
635
+ // Interleave: full-width blocks mark section boundaries
277
636
  regions.length = 0;
278
- regions.push(...fullWidth, ...leftCol, ...rightCol);
637
+ let li = 0, ri = 0;
638
+ for (const fw of fullWidth) {
639
+ while (li < leftCol.length && leftCol[li].bbox.y < fw.bbox.y) regions.push(leftCol[li++]);
640
+ while (ri < rightCol.length && rightCol[ri].bbox.y < fw.bbox.y) regions.push(rightCol[ri++]);
641
+ regions.push(fw);
642
+ }
643
+ while (li < leftCol.length) regions.push(leftCol[li++]);
644
+ while (ri < rightCol.length) regions.push(rightCol[ri++]);
279
645
  } else {
280
- // Single column: sort by Y then X
281
646
  regions.sort((a, b) => {
282
647
  if (Math.abs(a.bbox.y - b.bbox.y) > 10) return a.bbox.y - b.bbox.y;
283
648
  return a.bbox.x - b.bbox.x;
@@ -296,26 +661,24 @@ async function analyzePage(page, OPS) {
296
661
 
297
662
  // Get text content with styles
298
663
  const textContent = await page.getTextContent();
299
- const textBlocks = groupTextBlocks(textContent.items, pageHeight, textContent.styles);
300
664
 
301
- // Compute body font size (most common size = body text)
302
- const allSizes = textBlocks.map(b => Math.round(b.avgFontSize * 10) / 10);
303
- const freq = {};
304
- for (const s of allSizes) freq[s] = (freq[s] || 0) + 1;
305
- let bodyFontSize = 12;
306
- let maxFreq = 0;
307
- for (const [s, f] of Object.entries(freq)) {
308
- if (f > maxFreq) { maxFreq = f; bodyFontSize = parseFloat(s); }
309
- }
310
- // Compute fontScale per block
311
- for (const block of textBlocks) {
312
- block.fontScale = block.avgFontSize / bodyFontSize;
665
+ // Get operator list once (reused for text/non-text classification + image extraction + font metadata)
666
+ const opList = await page.getOperatorList();
667
+
668
+ // Identify text operation indices for operationsFilter
669
+ const textOpIndices = new Set();
670
+ let inTextBlock = false;
671
+ for (let i = 0; i < opList.fnArray.length; i++) {
672
+ const fn = opList.fnArray[i];
673
+ if (fn === OPS.beginText) inTextBlock = true;
674
+ if (inTextBlock) textOpIndices.add(i);
675
+ if (fn === OPS.endText) inTextBlock = false;
313
676
  }
314
677
 
315
- // Get graphic regions (images only, no paths)
316
- const graphicRegions = await extractGraphicRegions(page, OPS);
678
+ // Extract graphic regions from operator list CTM tracking
679
+ const opGraphicRegions = extractGraphicRegions(opList, OPS);
317
680
 
318
- // Render full page to offscreen canvas for bitmap extraction
681
+ // Render non-text only (images, paths, fills, backgrounds)
319
682
  const renderScale = 2;
320
683
  const offCanvas = document.createElement("canvas");
321
684
  offCanvas.width = Math.floor(pageWidth * renderScale);
@@ -326,9 +689,100 @@ async function analyzePage(page, OPS) {
326
689
  await page.render({
327
690
  canvasContext: offCtx,
328
691
  viewport: renderViewport,
692
+ operationsFilter: (index) => !textOpIndices.has(index),
329
693
  }).promise;
330
694
 
331
- // Build region map (filters overlapping graphics)
695
+ // Get precise image coordinates via recordImages (supplements CTM detection).
696
+ // This full render also loads fonts into commonObjs as a side effect.
697
+ let imageCoordRegions = [];
698
+ let fullRenderDone = false;
699
+ try {
700
+ const imgTrackCanvas = document.createElement("canvas");
701
+ imgTrackCanvas.width = offCanvas.width;
702
+ imgTrackCanvas.height = offCanvas.height;
703
+ const imgRenderTask = page.render({
704
+ canvasContext: imgTrackCanvas.getContext("2d"),
705
+ viewport: renderViewport,
706
+ recordImages: true,
707
+ });
708
+ await imgRenderTask.promise;
709
+ fullRenderDone = true;
710
+ const imageCoords = imgRenderTask.imageCoordinates;
711
+ if (imageCoords && imageCoords.length > 0) {
712
+ for (let j = 0; j < imageCoords.length; j += 6) {
713
+ const x1 = imageCoords[j], y1 = imageCoords[j + 1];
714
+ const x2 = imageCoords[j + 2], y2 = imageCoords[j + 3];
715
+ const x3 = imageCoords[j + 4], y3 = imageCoords[j + 5];
716
+ const xs = [x1, x2, x3];
717
+ const ys = [y1, y2, y3];
718
+ const minX = Math.min(...xs) / renderScale;
719
+ const maxX = Math.max(...xs) / renderScale;
720
+ const minY = Math.min(...ys) / renderScale;
721
+ const maxY = Math.max(...ys) / renderScale;
722
+ if (maxX - minX > 10 && maxY - minY > 10) {
723
+ imageCoordRegions.push({
724
+ type: "graphic",
725
+ bbox: { x: minX, y: minY, w: maxX - minX, h: maxY - minY },
726
+ screenCoords: true,
727
+ });
728
+ }
729
+ }
730
+ }
731
+ } catch (_) {
732
+ // recordImages not supported — CTM fallback is used
733
+ }
734
+
735
+ // Ensure fonts are loaded for commonObjs access. If the recordImages render
736
+ // above didn't run, do a minimal full render to trigger font loading.
737
+ if (!fullRenderDone) {
738
+ const fontCanvas = document.createElement("canvas");
739
+ fontCanvas.width = 1;
740
+ fontCanvas.height = 1;
741
+ const fontViewport = page.getViewport({ scale: 0.1 });
742
+ try {
743
+ await page.render({ canvasContext: fontCanvas.getContext("2d"), viewport: fontViewport }).promise;
744
+ } catch (_) {}
745
+ }
746
+
747
+ // Extract real font metadata from commonObjs (bold, italic, weight, loadedName)
748
+ const fontMap = await extractFontMetadata(page, opList, OPS);
749
+
750
+ // Extract text colors from operator list (parallel to text items)
751
+ const textColors = extractTextColors(opList, OPS);
752
+
753
+ // Now group text blocks with real font data and colors
754
+ const textBlocks = groupTextBlocks(textContent.items, pageHeight, textContent.styles, fontMap, textColors);
755
+
756
+ // Compute body font size (most common size = body text)
757
+ const allSizes = textBlocks.map(b => Math.round(b.avgFontSize * 10) / 10);
758
+ const freq = {};
759
+ for (const s of allSizes) freq[s] = (freq[s] || 0) + 1;
760
+ let bodyFontSize = 12;
761
+ let maxFreq = 0;
762
+ for (const [s, f] of Object.entries(freq)) {
763
+ if (f > maxFreq) { maxFreq = f; bodyFontSize = parseFloat(s); }
764
+ }
765
+ // Compute fontScale per block
766
+ for (const block of textBlocks) {
767
+ block.fontScale = block.avgFontSize / bodyFontSize;
768
+ }
769
+
770
+ // Detect graphics from rendered non-text canvas (catches vector graphics)
771
+ const renderGraphicRegions = detectGraphicRegionsFromRender(offCanvas, textBlocks, renderScale);
772
+
773
+ // Merge all sources, deduplicating by overlap
774
+ const graphicRegions = [...opGraphicRegions];
775
+ for (const rg of [...imageCoordRegions, ...renderGraphicRegions]) {
776
+ const overlapsExisting = graphicRegions.some(og => {
777
+ const ogBbox = og.screenCoords
778
+ ? og.bbox
779
+ : { x: og.bbox.x, y: pageHeight - og.bbox.y - og.bbox.h, w: og.bbox.w, h: og.bbox.h };
780
+ return bboxOverlap(rg.bbox, ogBbox) > 0.3;
781
+ });
782
+ if (!overlapsExisting) graphicRegions.push(rg);
783
+ }
784
+
785
+ // Build region map (filters overlapping graphics, detects columns + alignment)
332
786
  const regionMap = buildRegionMap(textBlocks, graphicRegions, pageHeight);
333
787
 
334
788
  // Extract bitmap snippets for graphic regions only
@@ -354,13 +808,14 @@ async function analyzePage(page, OPS) {
354
808
  textBlocks,
355
809
  graphicRegions,
356
810
  offCanvas,
811
+ fontMap,
357
812
  };
358
813
  }
359
814
 
360
815
  // ─── Reflow + composite engine ────────────────────────────────────────────
361
816
 
362
817
  function reflowAndComposite(analysis, opts) {
363
- const { regionMap, bitmaps, pageWidth, pageHeight } = analysis;
818
+ const { regionMap, bitmaps, pageWidth, pageHeight, fontMap } = analysis;
364
819
  const {
365
820
  fontSize, fontFamily, lineHeight, padding, background,
366
821
  textColor, imageFit, canvasW,
@@ -389,15 +844,22 @@ function reflowAndComposite(analysis, opts) {
389
844
  continue;
390
845
  }
391
846
 
392
- // Per-block font properties
847
+ // Per-block font properties using real font metadata from commonObjs
393
848
  const blockFontSize = Math.round(fontSize * (block.fontScale || 1));
394
849
  const blockLH = blockFontSize * lineHeight;
850
+ const fm = block.fontMeta;
395
851
  const style = block.isItalic ? "italic" : "normal";
396
- const weight = block.isBold ? 700 : 400;
397
- // Use PDF's detected font family if available, otherwise fall back to configured
398
- const blockFamily = block.pdfFontFamily
399
- ? `${block.pdfFontFamily}, ${fontFamily}`
400
- : fontFamily;
852
+ const weight = block.isBlack ? 900 : block.isBold ? 700 : 400;
853
+
854
+ // Use the actual embedded PDF font if available (PDF.js loaded it via @font-face)
855
+ let blockFamily;
856
+ if (fm?.loadedName) {
857
+ blockFamily = `"${fm.loadedName}", ${fm.fallbackName || "sans-serif"}`;
858
+ } else if (fm?.css) {
859
+ blockFamily = fm.css;
860
+ } else {
861
+ blockFamily = fontFamily;
862
+ }
401
863
  const font = `${style} ${weight} ${blockFontSize}px ${blockFamily}`;
402
864
 
403
865
  const prepared = prepareWithSegments(text, font);
@@ -413,6 +875,8 @@ function reflowAndComposite(analysis, opts) {
413
875
  fontStyle: style,
414
876
  fontWeight: weight,
415
877
  fontFamily: blockFamily,
878
+ align: block.align || "left",
879
+ color: block.color,
416
880
  region,
417
881
  });
418
882
  } else {
@@ -456,73 +920,6 @@ function reflowAndComposite(analysis, opts) {
456
920
  return { totalHeight, reflowedRegions, fullPageFallback: false };
457
921
  }
458
922
 
459
- /**
460
- * Draw the reflowed content to canvas.
461
- */
462
- function drawComposite(ctx, reflowedRegions, analysis, opts, scrollY) {
463
- const {
464
- fontSize, fontFamily, lineHeight, padding,
465
- background, textColor, canvasW, canvasH, dpr,
466
- } = opts;
467
-
468
- const d = dpr;
469
- const baseLH = fontSize * lineHeight;
470
-
471
- ctx.fillStyle = background;
472
- ctx.fillRect(0, 0, canvasW * d, canvasH * d);
473
-
474
- // Full page fallback
475
- if (reflowedRegions.length === 0 && analysis.offCanvas) {
476
- const availableWidth = canvasW - padding * 2;
477
- const scale = Math.min(availableWidth / analysis.pageWidth, 1);
478
- ctx.drawImage(
479
- analysis.offCanvas,
480
- padding * d, padding * d,
481
- analysis.pageWidth * scale * d,
482
- analysis.pageHeight * scale * d
483
- );
484
- return;
485
- }
486
-
487
- let cursorY = padding;
488
- ctx.textBaseline = "top";
489
-
490
- for (const r of reflowedRegions) {
491
- if (r.type === "text" && r.lines) {
492
- const fs = r.fontSize || fontSize;
493
- const lh = r.lineHeight || baseLH;
494
- const style = r.fontStyle || "normal";
495
- const weight = r.fontWeight || 400;
496
-
497
- ctx.fillStyle = textColor;
498
- ctx.font = `${style} ${weight} ${fs * d}px ${fontFamily}`;
499
-
500
- for (const line of r.lines) {
501
- const screenY = cursorY - scrollY;
502
- if (screenY > -lh && screenY < canvasH + lh) {
503
- ctx.fillText(line.text, padding * d, screenY * d);
504
- }
505
- cursorY += lh;
506
- }
507
- } else if (r.type === "graphic" && r.bitmap) {
508
- const screenY = cursorY - scrollY;
509
- if (screenY > -r.drawH && screenY < canvasH + r.drawH) {
510
- const tmpCanvas = document.createElement("canvas");
511
- tmpCanvas.width = r.bitmap.data.width;
512
- tmpCanvas.height = r.bitmap.data.height;
513
- tmpCanvas.getContext("2d").putImageData(r.bitmap.data, 0, 0);
514
- ctx.drawImage(
515
- tmpCanvas,
516
- padding * d, screenY * d,
517
- r.drawW * d, r.drawH * d
518
- );
519
- }
520
- cursorY += r.drawH;
521
- }
522
- cursorY += baseLH * 0.4;
523
- }
524
- }
525
-
526
923
  // ─── Main API ─────────────────────────────────────────────────────────────
527
924
 
528
925
  export function createReflowRenderer(container, options = {}) {
@@ -530,7 +927,7 @@ export function createReflowRenderer(container, options = {}) {
530
927
  const maxFont = options.maxFontSize ?? 48;
531
928
  const fontFamily = options.fontFamily ?? '"Literata", Georgia, serif';
532
929
  const lhRatio = options.lineHeight ?? 1.6;
533
- const padding = options.padding ?? 24;
930
+ let padding = options.padding ?? 24;
534
931
  const bg = options.background ?? "#f4f1eb";
535
932
  const textColor = options.textColor ?? "#252320";
536
933
  const imageFit = options.imageFit ?? "proportional";
@@ -539,6 +936,12 @@ export function createReflowRenderer(container, options = {}) {
539
936
  const friction = options.friction ?? 0.95;
540
937
  const onZoom = options.onZoom;
541
938
  const onPageReady = options.onPageReady;
939
+ const enableMorph = options.enableMorph ?? false;
940
+ const morphRadius = options.morphRadius ?? 300;
941
+ const edgeFontRatio = options.edgeFontRatio ?? 0.5;
942
+ const maxWidth = options.maxWidth ?? Infinity;
943
+ const autoDetectPadding = options.autoDetectPadding ?? true;
944
+ const minPadding = options.minPadding ?? 20;
542
945
 
543
946
  let pdfjs = null;
544
947
  let pdfDoc = null;
@@ -589,6 +992,15 @@ export function createReflowRenderer(container, options = {}) {
589
992
 
590
993
  function reflow() {
591
994
  if (!currentAnalysis || W === 0) return;
995
+ // Auto-detect padding from PDF page margins
996
+ if (autoDetectPadding && currentAnalysis.textBlocks.length > 0 && currentAnalysis.pageWidth > 0) {
997
+ const minX = Math.min(...currentAnalysis.textBlocks.map(b => b.bbox.x));
998
+ const maxX = Math.max(...currentAnalysis.textBlocks.map(b => b.bbox.x + b.bbox.w));
999
+ const rightMargin = currentAnalysis.pageWidth - maxX;
1000
+ const pdfMargin = Math.min(minX, rightMargin);
1001
+ const marginRatio = pdfMargin / currentAnalysis.pageWidth;
1002
+ padding = Math.round(Math.max(minPadding, W * marginRatio));
1003
+ }
592
1004
  const result = reflowAndComposite(currentAnalysis, {
593
1005
  fontSize, fontFamily, lineHeight: lhRatio, padding,
594
1006
  background: bg, textColor, imageFit, canvasW: W, canvasH: H, dpr,
@@ -605,7 +1017,6 @@ export function createReflowRenderer(container, options = {}) {
605
1017
  ctx.fillRect(0, 0, W * dpr, H * dpr);
606
1018
  return;
607
1019
  }
608
- // Inline draw for performance (avoid function call overhead in rAF)
609
1020
  const d = dpr;
610
1021
  const baseLH = fontSize * lhRatio;
611
1022
 
@@ -626,19 +1037,78 @@ export function createReflowRenderer(container, options = {}) {
626
1037
 
627
1038
  let cursorY = padding;
628
1039
  ctx.textBaseline = "top";
1040
+ const viewCenter = H / 2;
629
1041
 
630
1042
  for (const r of reflowedRegions) {
631
1043
  if (r.type === "text" && r.lines) {
632
1044
  const fs = r.fontSize || fontSize;
633
1045
  const lh = r.lineHeight || baseLH;
634
1046
  const rFamily = r.fontFamily || fontFamily;
635
- ctx.fillStyle = textColor;
636
- ctx.font = `${r.fontStyle || "normal"} ${r.fontWeight || 400} ${fs * d}px ${rFamily}`;
1047
+ const style = r.fontStyle || "normal";
1048
+ const weight = r.fontWeight || 400;
1049
+ const centered = r.align === "center";
1050
+ const justified = r.align === "justify";
1051
+ const availW = W - padding * 2;
1052
+
1053
+ if (!enableMorph) {
1054
+ ctx.fillStyle = r.color || textColor;
1055
+ ctx.font = `${style} ${weight} ${fs * d}px ${rFamily}`;
1056
+ }
637
1057
 
638
- for (const line of r.lines) {
1058
+ for (let lineIdx = 0; lineIdx < r.lines.length; lineIdx++) {
1059
+ const line = r.lines[lineIdx];
639
1060
  const screenY = cursorY - scrollY;
640
1061
  if (screenY > -lh && screenY < H + lh) {
641
- ctx.fillText(line.text, padding * d, screenY * d);
1062
+ // Justified: distribute extra space between words (not on last line)
1063
+ const isLastLine = lineIdx === r.lines.length - 1;
1064
+ const shouldJustify = justified && !isLastLine && line.text.includes(" ");
1065
+
1066
+ if (enableMorph) {
1067
+ const dist = Math.abs(screenY - viewCenter);
1068
+ const t = Math.min(dist / morphRadius, 1);
1069
+ const ease = 1 - (1 - t) ** 3;
1070
+ const morphedFS = fs * (1 - ease * (1 - edgeFontRatio));
1071
+ const opacity = 1.0 + (0.2 - 1.0) * ease;
1072
+ // Blend the block's actual color toward gray at edges
1073
+ const blockColor = r.color || textColor;
1074
+ let morphColor;
1075
+ if (blockColor.startsWith("#") && blockColor.length === 7) {
1076
+ const br = parseInt(blockColor.slice(1, 3), 16);
1077
+ const bg_ = parseInt(blockColor.slice(3, 5), 16);
1078
+ const bb = parseInt(blockColor.slice(5, 7), 16);
1079
+ const dimR = Math.round(br + (160 - br) * ease);
1080
+ const dimG = Math.round(bg_ + (160 - bg_) * ease);
1081
+ const dimB = Math.round(bb + (160 - bb) * ease);
1082
+ morphColor = `rgb(${dimR},${dimG},${dimB})`;
1083
+ } else {
1084
+ const c = Math.round(37 - (37 - 160) * ease);
1085
+ morphColor = `rgb(${c},${c - 2},${c - 3})`;
1086
+ }
1087
+ ctx.save();
1088
+ ctx.globalAlpha = opacity;
1089
+ ctx.fillStyle = morphColor;
1090
+ ctx.font = `${style} ${weight} ${morphedFS * d}px ${rFamily}`;
1091
+ if (centered) {
1092
+ ctx.textAlign = "center";
1093
+ ctx.fillText(line.text, (W / 2) * d, screenY * d);
1094
+ ctx.textAlign = "left";
1095
+ } else if (shouldJustify) {
1096
+ drawJustifiedLine(ctx, line.text, padding * d, screenY * d, availW * d);
1097
+ } else {
1098
+ ctx.fillText(line.text, padding * d, screenY * d);
1099
+ }
1100
+ ctx.restore();
1101
+ } else {
1102
+ if (centered) {
1103
+ ctx.textAlign = "center";
1104
+ ctx.fillText(line.text, (W / 2) * d, screenY * d);
1105
+ ctx.textAlign = "left";
1106
+ } else if (shouldJustify) {
1107
+ drawJustifiedLine(ctx, line.text, padding * d, screenY * d, availW * d);
1108
+ } else {
1109
+ ctx.fillText(line.text, padding * d, screenY * d);
1110
+ }
1111
+ }
642
1112
  }
643
1113
  cursorY += lh;
644
1114
  }
@@ -646,7 +1116,19 @@ export function createReflowRenderer(container, options = {}) {
646
1116
  const screenY = cursorY - scrollY;
647
1117
  if (screenY > -r.drawH && screenY < H + r.drawH) {
648
1118
  const tmp = getTmpCanvas(r.bitmap);
649
- ctx.drawImage(tmp, padding * d, screenY * d, r.drawW * d, r.drawH * d);
1119
+ if (enableMorph) {
1120
+ const dist = Math.abs(screenY + r.drawH / 2 - viewCenter);
1121
+ const t = Math.min(dist / morphRadius, 1);
1122
+ const ease = 1 - (1 - t) ** 3;
1123
+ const imgScale = 1 - ease * (1 - edgeFontRatio);
1124
+ const opacity = 1.0 + (0.2 - 1.0) * ease;
1125
+ ctx.save();
1126
+ ctx.globalAlpha = opacity;
1127
+ ctx.drawImage(tmp, padding * d, screenY * d, r.drawW * imgScale * d, r.drawH * imgScale * d);
1128
+ ctx.restore();
1129
+ } else {
1130
+ ctx.drawImage(tmp, padding * d, screenY * d, r.drawW * d, r.drawH * d);
1131
+ }
650
1132
  }
651
1133
  cursorY += r.drawH;
652
1134
  }
@@ -742,7 +1224,7 @@ export function createReflowRenderer(container, options = {}) {
742
1224
 
743
1225
  function handleResize() {
744
1226
  dpr = Math.min(devicePixelRatio || 1, 3);
745
- W = Math.min(container.clientWidth, 680);
1227
+ W = Math.min(container.clientWidth, maxWidth);
746
1228
  H = container.clientHeight;
747
1229
  canvas.width = W * dpr;
748
1230
  canvas.height = H * dpr;
@@ -789,11 +1271,14 @@ export function createReflowRenderer(container, options = {}) {
789
1271
  scrollY = 0;
790
1272
  scrollVelocity = 0;
791
1273
  reflow();
1274
+ onZoom?.(fontSize);
792
1275
 
793
1276
  onPageReady?.({
794
1277
  pageNum,
795
1278
  textBlocks: currentAnalysis.textBlocks,
796
1279
  graphicRegions: currentAnalysis.graphicRegions,
1280
+ pageWidth: currentAnalysis.pageWidth,
1281
+ pageHeight: currentAnalysis.pageHeight,
797
1282
  });
798
1283
  },
799
1284
 
@@ -838,6 +1323,7 @@ export function createReflowRenderer(container, options = {}) {
838
1323
  scrollY = 0;
839
1324
  scrollVelocity = 0;
840
1325
  reflow();
1326
+ onZoom?.(fontSize);
841
1327
  },
842
1328
 
843
1329
  async nextPage() {
@@ -866,6 +1352,13 @@ export function createReflowRenderer(container, options = {}) {
866
1352
  pdfDoc = null;
867
1353
  },
868
1354
 
1355
+ setPadding(newPadding) {
1356
+ if (newPadding !== padding) {
1357
+ padding = newPadding;
1358
+ reflow();
1359
+ }
1360
+ },
1361
+
869
1362
  setFontSize(newSize) {
870
1363
  const clamped = clamp(newSize, minFont, maxFont);
871
1364
  if (clamped !== fontSize) {