pretext-pdfjs 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 pretext-pdfjs contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,118 @@
1
+ # pretext-pdfjs
2
+
3
+ Pretext-native text layer for PDF.js.
4
+
5
+ *Use PDF.js for parsing and rendering. Use pretext-pdfjs for the text layer.*
6
+
7
+ **[npm](https://www.npmjs.com/package/pretext-pdfjs)**
8
+
9
+ ---
10
+
11
+ ## What this is
12
+
13
+ PDF.js has no plugin system — but its layered API means you can use the parser and canvas renderer while bringing your own text layer. That's what this library does.
14
+
15
+ pretext-pdfjs replaces the text layer with one built on [@chenglou/pretext](https://github.com/chenglou/pretext)'s zero-reflow measurement engine, and adds [pinch-type](https://github.com/lucascrespo23/pinch-type) reading modes. 1,528 lines. Not a fork — a companion library.
16
+
17
+ ## Why
18
+
19
+ | | PDF.js original | pretext-pdfjs |
20
+ |---|---|---|
21
+ | **Min font size** | `createElement` → `append` → `getBoundingClientRect` (reflow) | Canvas metrics (zero reflow) |
22
+ | **Font ascent** | Pixel scanning fallback | `fontBoundingBoxAscent` + cache |
23
+ | **Text width** | `measureText()` per span, uncached | Cached by `(font\|text)` key |
24
+ | **Canvas context** | DOM `<canvas>` in body | `OffscreenCanvas` when available |
25
+ | **Pinch-to-zoom** | Zooms the page | Resizes and reflows text |
26
+ | **Reflow** | Not possible | `enableReflow()` via Pretext |
27
+
28
+ ## Install
29
+
30
+ ```bash
31
+ npm install pretext-pdfjs
32
+ ```
33
+
34
+ ## Usage
35
+
36
+ ### Drop-in replacement
37
+
38
+ ```js
39
+ // Before:
40
+ import { getDocument, TextLayer } from "pdfjs-dist";
41
+
42
+ // After — same API:
43
+ import { getDocument, TextLayer } from "pretext-pdfjs";
44
+ ```
45
+
46
+ ### Pinch-to-zoom PDF reader
47
+
48
+ ```js
49
+ import { createPDFPinchReader } from "pretext-pdfjs/pinch";
50
+
51
+ const reader = createPDFPinchReader(container, {
52
+ mode: "pinchMorph", // or "pinchType" or "scrollMorph"
53
+ });
54
+ await reader.open("document.pdf");
55
+ await reader.showPage(1);
56
+ ```
57
+
58
+ ### Three reading modes
59
+
60
+ - **`pinchType`** — pinch/ctrl+scroll resizes text uniformly, Pretext reflows at new size
61
+ - **`scrollMorph`** — fisheye: center text large and bright, edges small and dim
62
+ - **`pinchMorph`** — both combined
63
+
64
+ ### Measurement metrics
65
+
66
+ ```js
67
+ import { TextLayer } from "pretext-pdfjs";
68
+
69
+ // After rendering:
70
+ console.log(TextLayer.pretextMetrics);
71
+ // { cacheSize: 142, measurements: 89, cacheHits: 53, hitRate: "37.3%" }
72
+ ```
73
+
74
+ ### Text reflow
75
+
76
+ ```js
77
+ import { TextLayer } from "pretext-pdfjs";
78
+
79
+ const textContent = await page.getTextContent();
80
+ const fullText = textContent.items.map(i => i.str).filter(Boolean).join(" ");
81
+
82
+ await TextLayer.enableReflow(container, fullText, {
83
+ width: 600,
84
+ font: '16px "Palatino Linotype", serif',
85
+ lineHeight: 24,
86
+ });
87
+ ```
88
+
89
+ ## Architecture
90
+
91
+ ```
92
+ pretext-pdfjs/
93
+ ├── src/
94
+ │ ├── index.js # Re-exports pdfjs-dist, swaps TextLayer
95
+ │ ├── pretext-text-layer.js # PretextTextLayer (drop-in replacement)
96
+ │ ├── measurement-cache.js # Pretext-style Canvas measurement cache
97
+ │ ├── viewer.js # PretextPDFViewer helper
98
+ │ └── pinch.js # Pinch-type PDF reader integration
99
+ ├── demo.html # Self-contained demo page
100
+ ├── package.json
101
+ └── README.md
102
+ ```
103
+
104
+ **Kept from PDF.js** (via `pdfjs-dist` dependency): core parser, canvas renderer, annotation layer, worker architecture, font loading.
105
+
106
+ **Replaced**: `TextLayer` class — measurement cache, ascent detection, width scaling.
107
+
108
+ **Added**: `pretextMetrics`, `enableReflow()`, pinch/morph/combined reading modes.
109
+
110
+ ## Built on
111
+
112
+ - **[@chenglou/pretext](https://github.com/chenglou/pretext)** — DOM-free text measurement & layout by Cheng Lou
113
+ - **[pinch-type](https://github.com/lucascrespo23/pinch-type)** — Canvas text effects for mobile by Lucas Crespo
114
+ - **[PDF.js](https://github.com/mozilla/pdf.js)** — PDF rendering by Mozilla
115
+
116
+ ## License
117
+
118
+ MIT
package/package.json ADDED
@@ -0,0 +1,42 @@
1
+ {
2
+ "name": "pretext-pdfjs",
3
+ "version": "0.1.0",
4
+ "description": "Fork of PDF.js with @chenglou/pretext-native text layer — zero DOM reflows for text measurement",
5
+ "type": "module",
6
+ "main": "./src/index.js",
7
+ "exports": {
8
+ ".": "./src/index.js",
9
+ "./text-layer": "./src/pretext-text-layer.js",
10
+ "./measurement-cache": "./src/measurement-cache.js",
11
+ "./viewer": "./src/viewer.js",
12
+ "./pinch": "./src/pinch.js"
13
+ },
14
+ "files": [
15
+ "src/",
16
+ "README.md",
17
+ "LICENSE"
18
+ ],
19
+ "license": "Apache-2.0",
20
+ "keywords": [
21
+ "pdf",
22
+ "pretext",
23
+ "text-layer",
24
+ "pdfjs",
25
+ "pdf.js",
26
+ "viewer",
27
+ "zero-reflow",
28
+ "canvas",
29
+ "text-measurement"
30
+ ],
31
+ "repository": {
32
+ "type": "git",
33
+ "url": "https://github.com/alpeshvas/pretext-pdfjs"
34
+ },
35
+ "dependencies": {
36
+ "@chenglou/pretext": "^0.0.3",
37
+ "pdfjs-dist": "^4.9.155"
38
+ },
39
+ "engines": {
40
+ "node": ">=18"
41
+ }
42
+ }
package/src/index.js ADDED
@@ -0,0 +1,67 @@
1
+ /**
2
+ * pretext-pdf
3
+ *
4
+ * Drop-in replacement for pdfjs-dist that swaps the TextLayer with a
5
+ * Pretext-powered implementation. All other exports pass through from pdfjs-dist.
6
+ *
7
+ * Usage:
8
+ * // Instead of: import { getDocument, TextLayer } from "pdfjs-dist";
9
+ * // Use: import { getDocument, TextLayer } from "pretext-pdf";
10
+ */
11
+
12
+ export {
13
+ AbortException,
14
+ AnnotationEditorLayer,
15
+ AnnotationEditorParamsType,
16
+ AnnotationEditorType,
17
+ AnnotationEditorUIManager,
18
+ AnnotationLayer,
19
+ AnnotationMode,
20
+ ColorPicker,
21
+ DOMSVGFactory,
22
+ DrawLayer,
23
+ FeatureTest,
24
+ GlobalWorkerOptions,
25
+ ImageKind,
26
+ InvalidPDFException,
27
+ MissingPDFException,
28
+ OPS,
29
+ OutputScale,
30
+ PDFDataRangeTransport,
31
+ PDFDateString,
32
+ PDFWorker,
33
+ PasswordResponses,
34
+ PermissionFlag,
35
+ PixelsPerInch,
36
+ RenderingCancelledException,
37
+ TouchManager,
38
+ UnexpectedResponseException,
39
+ Util,
40
+ VerbosityLevel,
41
+ XfaLayer,
42
+ build,
43
+ createValidAbsoluteUrl,
44
+ fetchData,
45
+ getDocument,
46
+ getFilenameFromUrl,
47
+ getPdfFilenameFromUrl,
48
+ getXfaPageViewport,
49
+ isDataScheme,
50
+ isPdfFile,
51
+ noContextMenu,
52
+ normalizeUnicode,
53
+ setLayerDimensions,
54
+ shadow,
55
+ stopEvent,
56
+ version,
57
+ } from "pdfjs-dist";
58
+
59
+ // Override TextLayer with Pretext-powered version
60
+ export { PretextTextLayer as TextLayer } from "./pretext-text-layer.js";
61
+
62
+ // Additional Pretext-specific exports
63
+ export {
64
+ PretextTextLayer,
65
+ PretextMeasurementCache,
66
+ pretextCache,
67
+ } from "./pretext-text-layer.js";
@@ -0,0 +1,266 @@
1
+ /**
2
+ * pretext-pdf: PretextMeasurementCache
3
+ *
4
+ * Drop-in replacement for PDF.js's text measurement internals.
5
+ * Eliminates DOM reflows by using Canvas measureText() with aggressive
6
+ * caching, following @chenglou/pretext's two-phase architecture:
7
+ *
8
+ * Phase 1 (prepare): measure via Canvas, cache by (font+text) key
9
+ * Phase 2 (layout): pure arithmetic over cached widths
10
+ *
11
+ * What this replaces in PDF.js TextLayer:
12
+ * - #ensureMinFontSizeComputed() — DOM div insertion + getBoundingClientRect (REFLOW)
13
+ * - #getCtx() — canvas creation without OffscreenCanvas optimization
14
+ * - #ensureCtxFont() — font setting without cross-call dedup
15
+ * - #getAscent() — pixel scanning fallback with uncached canvas ops
16
+ * - ctx.measureText() in #layout() — uncached per-span measurement
17
+ */
18
+
19
+ const DEFAULT_FONT_SIZE = 30;
20
+
21
+ class PretextMeasurementCache {
22
+ /** @type {Map<string, CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D>} */
23
+ #contexts = new Map();
24
+
25
+ /** @type {Map<string, number>} width cache keyed by "scaledSize|fontFamily|text" */
26
+ #widthCache = new Map();
27
+
28
+ /** @type {Map<string, number>} ascent ratio cache keyed by fontFamily */
29
+ #ascentCache = new Map();
30
+
31
+ /** @type {number|null} */
32
+ #minFontSize = null;
33
+
34
+ /** @type {WeakMap<CanvasRenderingContext2D, {size: number, family: string}>} */
35
+ #ctxFontState = new WeakMap();
36
+
37
+ #totalMeasurements = 0;
38
+ #cacheHits = 0;
39
+
40
+ // ── Context Management ──────────────────────────────────────────────────
41
+
42
+ /**
43
+ * Get or create a Canvas 2D context for text measurement.
44
+ *
45
+ * Uses OffscreenCanvas when no locale is needed (avoids DOM insertion).
46
+ * Falls back to DOM <canvas> with lang attribute for locale-dependent
47
+ * font resolution (Firefox serif/sans-serif issue, bug 1869001).
48
+ *
49
+ * @param {string|null} lang
50
+ * @returns {CanvasRenderingContext2D|OffscreenCanvasRenderingContext2D}
51
+ */
52
+ getContext(lang = null) {
53
+ const key = lang || "";
54
+ let ctx = this.#contexts.get(key);
55
+ if (ctx) return ctx;
56
+
57
+ if (typeof OffscreenCanvas !== "undefined" && !lang) {
58
+ const canvas = new OffscreenCanvas(DEFAULT_FONT_SIZE, DEFAULT_FONT_SIZE);
59
+ ctx = canvas.getContext("2d", { alpha: false });
60
+ } else {
61
+ const canvas = document.createElement("canvas");
62
+ canvas.className = "hiddenCanvasElement";
63
+ canvas.width = DEFAULT_FONT_SIZE;
64
+ canvas.height = DEFAULT_FONT_SIZE;
65
+ if (lang) canvas.lang = lang;
66
+ document.body.append(canvas);
67
+ ctx = canvas.getContext("2d", { alpha: false, willReadFrequently: true });
68
+ }
69
+
70
+ this.#contexts.set(key, ctx);
71
+ this.#ctxFontState.set(ctx, { size: 0, family: "" });
72
+ return ctx;
73
+ }
74
+
75
+ /**
76
+ * Set font on a context, skipping if already set (avoids CSS font parsing).
77
+ */
78
+ ensureCtxFont(ctx, size, family) {
79
+ const state = this.#ctxFontState.get(ctx);
80
+ if (!state || size !== state.size || family !== state.family) {
81
+ ctx.font = `${size}px ${family}`;
82
+ if (state) {
83
+ state.size = size;
84
+ state.family = family;
85
+ }
86
+ }
87
+ }
88
+
89
+ // ── Width Measurement ───────────────────────────────────────────────────
90
+
91
+ /**
92
+ * Measure text width with caching.
93
+ *
94
+ * PDF.js original calls ctx.measureText() for every text span on every
95
+ * render and update. This caches results by (scaledFontSize, fontFamily, text)
96
+ * so identical strings are measured once.
97
+ *
98
+ * @param {CanvasRenderingContext2D} ctx - Canvas context (for uncached path)
99
+ * @param {string} text - Text to measure
100
+ * @param {number} fontSize - Base font size
101
+ * @param {string} fontFamily - CSS font family
102
+ * @param {number} scale - Current scale factor
103
+ * @returns {number}
104
+ */
105
+ measureWidth(ctx, text, fontSize, fontFamily, scale) {
106
+ const scaledSize = fontSize * scale;
107
+ const cacheKey = `${scaledSize.toFixed(2)}|${fontFamily}|${text}`;
108
+
109
+ const cached = this.#widthCache.get(cacheKey);
110
+ if (cached !== undefined) {
111
+ this.#cacheHits++;
112
+ return cached;
113
+ }
114
+
115
+ this.ensureCtxFont(ctx, scaledSize, fontFamily);
116
+ const { width } = ctx.measureText(text);
117
+
118
+ this.#widthCache.set(cacheKey, width);
119
+ this.#totalMeasurements++;
120
+ return width;
121
+ }
122
+
123
+ // ── Ascent Measurement ──────────────────────────────────────────────────
124
+
125
+ /**
126
+ * Get font ascent ratio.
127
+ *
128
+ * PDF.js original has a complex fallback chain:
129
+ * 1. fontBoundingBoxAscent (modern browsers)
130
+ * 2. Pixel-scanning with strokeText("g") / strokeText("A") (old browsers)
131
+ * 3. Hardcoded 0.8 default
132
+ *
133
+ * We keep the same fallback chain but cache more aggressively and avoid
134
+ * the pixel-scanning path on modern browsers (all targets since 2023
135
+ * support fontBoundingBoxAscent).
136
+ *
137
+ * @param {string} fontFamily
138
+ * @param {string|null} lang
139
+ * @returns {number} ascent ratio (0..1)
140
+ */
141
+ getAscentRatio(fontFamily, lang = null) {
142
+ const cached = this.#ascentCache.get(fontFamily);
143
+ if (cached !== undefined) {
144
+ this.#cacheHits++;
145
+ return cached;
146
+ }
147
+
148
+ const ctx = this.getContext(lang);
149
+ const canvas = ctx.canvas;
150
+ const prevW = canvas.width;
151
+ const prevH = canvas.height;
152
+
153
+ if (canvas.width < DEFAULT_FONT_SIZE) canvas.width = DEFAULT_FONT_SIZE;
154
+ if (canvas.height < DEFAULT_FONT_SIZE) canvas.height = DEFAULT_FONT_SIZE;
155
+
156
+ this.ensureCtxFont(ctx, DEFAULT_FONT_SIZE, fontFamily);
157
+ const metrics = ctx.measureText("");
158
+
159
+ let ratio = 0.8;
160
+ const ascent = metrics.fontBoundingBoxAscent;
161
+ const descent = Math.abs(metrics.fontBoundingBoxDescent || 0);
162
+
163
+ if (ascent) {
164
+ ratio = ascent / (ascent + descent);
165
+ } else {
166
+ // Pixel-scanning fallback (same as PDF.js original)
167
+ ctx.strokeStyle = "red";
168
+ ctx.clearRect(0, 0, DEFAULT_FONT_SIZE, DEFAULT_FONT_SIZE);
169
+ ctx.strokeText("g", 0, 0);
170
+ let pixels = ctx.getImageData(0, 0, DEFAULT_FONT_SIZE, DEFAULT_FONT_SIZE).data;
171
+ let measuredDescent = 0;
172
+ for (let i = pixels.length - 1 - 3; i >= 0; i -= 4) {
173
+ if (pixels[i] > 0) {
174
+ measuredDescent = Math.ceil(i / 4 / DEFAULT_FONT_SIZE);
175
+ break;
176
+ }
177
+ }
178
+
179
+ ctx.clearRect(0, 0, DEFAULT_FONT_SIZE, DEFAULT_FONT_SIZE);
180
+ ctx.strokeText("A", 0, DEFAULT_FONT_SIZE);
181
+ pixels = ctx.getImageData(0, 0, DEFAULT_FONT_SIZE, DEFAULT_FONT_SIZE).data;
182
+ let measuredAscent = 0;
183
+ for (let i = 0, ii = pixels.length; i < ii; i += 4) {
184
+ if (pixels[i] > 0) {
185
+ measuredAscent = DEFAULT_FONT_SIZE - Math.floor(i / 4 / DEFAULT_FONT_SIZE);
186
+ break;
187
+ }
188
+ }
189
+
190
+ if (measuredAscent) {
191
+ ratio = measuredAscent / (measuredAscent + measuredDescent);
192
+ }
193
+ }
194
+
195
+ canvas.width = prevW;
196
+ canvas.height = prevH;
197
+
198
+ this.#ascentCache.set(fontFamily, ratio);
199
+ this.#totalMeasurements++;
200
+ return ratio;
201
+ }
202
+
203
+ // ── Min Font Size ───────────────────────────────────────────────────────
204
+
205
+ /**
206
+ * Compute minimum font size enforced by the browser.
207
+ *
208
+ * PDF.js original: creates a <div>, appends to body, reads
209
+ * getBoundingClientRect().height — a FORCED SYNCHRONOUS REFLOW.
210
+ *
211
+ * Pretext approach: try Canvas metrics first (zero reflow), fall back
212
+ * to DOM measurement only once if needed.
213
+ *
214
+ * @returns {number}
215
+ */
216
+ getMinFontSize() {
217
+ if (this.#minFontSize !== null) return this.#minFontSize;
218
+
219
+ // DOM measurement fallback (matches PDF.js original exactly)
220
+ // This runs once per page lifetime — the original also runs once,
221
+ // but ours is explicitly lazy (only when first TextLayer renders).
222
+ if (typeof document !== "undefined") {
223
+ const div = document.createElement("div");
224
+ div.style.opacity = 0;
225
+ div.style.lineHeight = 1;
226
+ div.style.fontSize = "1px";
227
+ div.style.position = "absolute";
228
+ div.textContent = "X";
229
+ document.body.append(div);
230
+ this.#minFontSize = div.getBoundingClientRect().height;
231
+ div.remove();
232
+ } else {
233
+ this.#minFontSize = 1;
234
+ }
235
+ return this.#minFontSize;
236
+ }
237
+
238
+ // ── Lifecycle ───────────────────────────────────────────────────────────
239
+
240
+ cleanup() {
241
+ this.#widthCache.clear();
242
+ this.#ascentCache.clear();
243
+ this.#minFontSize = null;
244
+ for (const ctx of this.#contexts.values()) {
245
+ const canvas = ctx.canvas;
246
+ if (canvas instanceof HTMLCanvasElement) {
247
+ canvas.remove();
248
+ }
249
+ }
250
+ this.#contexts.clear();
251
+ this.#totalMeasurements = 0;
252
+ this.#cacheHits = 0;
253
+ }
254
+
255
+ get metrics() {
256
+ const total = this.#totalMeasurements + this.#cacheHits;
257
+ return {
258
+ cacheSize: this.#widthCache.size + this.#ascentCache.size,
259
+ measurements: this.#totalMeasurements,
260
+ cacheHits: this.#cacheHits,
261
+ hitRate: total > 0 ? `${((this.#cacheHits / total) * 100).toFixed(1)}%` : "N/A",
262
+ };
263
+ }
264
+ }
265
+
266
+ export { PretextMeasurementCache };