pretext-pdfjs 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +118 -0
- package/package.json +42 -0
- package/src/index.js +67 -0
- package/src/measurement-cache.js +266 -0
- package/src/pinch.js +498 -0
- package/src/pretext-text-layer.js +484 -0
- package/src/viewer.js +213 -0
|
@@ -0,0 +1,484 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pretext-pdf: PretextTextLayer
|
|
3
|
+
*
|
|
4
|
+
* Drop-in replacement for pdfjs-dist's TextLayer class.
|
|
5
|
+
*
|
|
6
|
+
* Identical public API:
|
|
7
|
+
* new PretextTextLayer({ textContentSource, container, viewport })
|
|
8
|
+
* .render() → Promise<void>
|
|
9
|
+
* .update({ viewport, onBefore }) → void
|
|
10
|
+
* .cancel() → void
|
|
11
|
+
* .textDivs → HTMLElement[]
|
|
12
|
+
* .textContentItemsStr → string[]
|
|
13
|
+
* PretextTextLayer.cleanup() → void
|
|
14
|
+
* PretextTextLayer.fontFamilyMap → Map
|
|
15
|
+
*
|
|
16
|
+
* What changed vs original:
|
|
17
|
+
* - #layout() uses PretextMeasurementCache.measureWidth() (cached)
|
|
18
|
+
* instead of raw ctx.measureText() (uncached)
|
|
19
|
+
* - #getAscent() uses PretextMeasurementCache.getAscentRatio() (cached)
|
|
20
|
+
* instead of per-font pixel scanning
|
|
21
|
+
* - #ensureMinFontSizeComputed() is lazily called and minimizes reflow
|
|
22
|
+
* - All Canvas contexts use OffscreenCanvas when possible
|
|
23
|
+
* - New: static pretextMetrics getter for profiling
|
|
24
|
+
* - New: static enableReflow() for Pretext-powered text reflow
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import { PretextMeasurementCache } from "./measurement-cache.js";
|
|
28
|
+
|
|
29
|
+
// ── Singleton measurement cache ────────────────────────────────────────────
|
|
30
|
+
const cache = new PretextMeasurementCache();
|
|
31
|
+
|
|
32
|
+
// ── Helpers imported from pdfjs-dist at runtime ────────────────────────────
|
|
33
|
+
let _pdfjs = null;
|
|
34
|
+
|
|
35
|
+
async function getPdfjs() {
|
|
36
|
+
if (_pdfjs) return _pdfjs;
|
|
37
|
+
_pdfjs = await import("pdfjs-dist");
|
|
38
|
+
return _pdfjs;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
function getPdfjsSync() {
|
|
42
|
+
if (!_pdfjs) throw new Error("Call PretextTextLayer.init() before use");
|
|
43
|
+
return _pdfjs;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ── Lazy Pretext import ────────────────────────────────────────────────────
|
|
47
|
+
let _pretext = undefined; // undefined = not attempted, null = unavailable
|
|
48
|
+
|
|
49
|
+
async function getPretext() {
|
|
50
|
+
if (_pretext !== undefined) return _pretext;
|
|
51
|
+
try {
|
|
52
|
+
_pretext = await import("@chenglou/pretext");
|
|
53
|
+
} catch {
|
|
54
|
+
_pretext = null;
|
|
55
|
+
}
|
|
56
|
+
return _pretext;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// ── Constants ──────────────────────────────────────────────────────────────
|
|
60
|
+
const MAX_TEXT_DIVS_TO_RENDER = 100000;
|
|
61
|
+
|
|
62
|
+
// ── PretextTextLayer ───────────────────────────────────────────────────────
|
|
63
|
+
|
|
64
|
+
class PretextTextLayer {
|
|
65
|
+
#capability = Promise.withResolvers();
|
|
66
|
+
#container = null;
|
|
67
|
+
#disableProcessItems = false;
|
|
68
|
+
#fontInspectorEnabled = !!globalThis.FontInspector?.enabled;
|
|
69
|
+
#lang = null;
|
|
70
|
+
#layoutTextParams = null;
|
|
71
|
+
#pageHeight = 0;
|
|
72
|
+
#pageWidth = 0;
|
|
73
|
+
#reader = null;
|
|
74
|
+
#rootContainer = null;
|
|
75
|
+
#rotation = 0;
|
|
76
|
+
#scale = 0;
|
|
77
|
+
#styleCache = Object.create(null);
|
|
78
|
+
#textContentItemsStr = [];
|
|
79
|
+
#textContentSource = null;
|
|
80
|
+
#textDivs = [];
|
|
81
|
+
#textDivProperties = new WeakMap();
|
|
82
|
+
#transform = null;
|
|
83
|
+
|
|
84
|
+
static #pendingTextLayers = new Set();
|
|
85
|
+
static #_fontFamilyMap = null;
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* Initialize pdfjs-dist dependency. Must be called once before constructing.
|
|
89
|
+
* Alternatively, pass the pdfjs module directly.
|
|
90
|
+
*
|
|
91
|
+
* @param {Object} [pdfjsModule] - pdfjs-dist module (optional, will auto-import if omitted)
|
|
92
|
+
*/
|
|
93
|
+
static async init(pdfjsModule) {
|
|
94
|
+
if (pdfjsModule) {
|
|
95
|
+
_pdfjs = pdfjsModule;
|
|
96
|
+
} else {
|
|
97
|
+
await getPdfjs();
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* @param {Object} options
|
|
103
|
+
* @param {ReadableStream|Object} options.textContentSource
|
|
104
|
+
* @param {HTMLElement} options.container
|
|
105
|
+
* @param {Object} options.viewport - pdfjs-dist PageViewport
|
|
106
|
+
*/
|
|
107
|
+
constructor({ textContentSource, container, viewport }) {
|
|
108
|
+
const pdfjs = getPdfjsSync();
|
|
109
|
+
|
|
110
|
+
if (textContentSource instanceof ReadableStream) {
|
|
111
|
+
this.#textContentSource = textContentSource;
|
|
112
|
+
} else if (typeof textContentSource === "object") {
|
|
113
|
+
this.#textContentSource = new ReadableStream({
|
|
114
|
+
start(controller) {
|
|
115
|
+
controller.enqueue(textContentSource);
|
|
116
|
+
controller.close();
|
|
117
|
+
},
|
|
118
|
+
});
|
|
119
|
+
} else {
|
|
120
|
+
throw new Error('No "textContentSource" parameter specified.');
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
this.#container = this.#rootContainer = container;
|
|
124
|
+
this.#scale = viewport.scale * (globalThis.devicePixelRatio || 1);
|
|
125
|
+
this.#rotation = viewport.rotation;
|
|
126
|
+
this.#layoutTextParams = { div: null, properties: null, ctx: null };
|
|
127
|
+
|
|
128
|
+
const { pageWidth, pageHeight, pageX, pageY } = viewport.rawDims;
|
|
129
|
+
this.#transform = [1, 0, 0, -1, -pageX, pageY + pageHeight];
|
|
130
|
+
this.#pageWidth = pageWidth;
|
|
131
|
+
this.#pageHeight = pageHeight;
|
|
132
|
+
|
|
133
|
+
// PRETEXT: lazy min font size (computed once, cached)
|
|
134
|
+
const minFontSize = cache.getMinFontSize();
|
|
135
|
+
|
|
136
|
+
pdfjs.setLayerDimensions(container, viewport);
|
|
137
|
+
|
|
138
|
+
this.#capability.promise
|
|
139
|
+
.finally(() => {
|
|
140
|
+
PretextTextLayer.#pendingTextLayers.delete(this);
|
|
141
|
+
this.#layoutTextParams = null;
|
|
142
|
+
this.#styleCache = null;
|
|
143
|
+
})
|
|
144
|
+
.catch(() => {});
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
static get fontFamilyMap() {
|
|
148
|
+
if (this.#_fontFamilyMap) return this.#_fontFamilyMap;
|
|
149
|
+
// Detect platform (same logic as pdfjs-dist)
|
|
150
|
+
const ua = typeof navigator !== "undefined" ? navigator.userAgent : "";
|
|
151
|
+
const isWindows = ua.includes("Windows");
|
|
152
|
+
const isFirefox = ua.includes("Firefox");
|
|
153
|
+
this.#_fontFamilyMap = new Map([
|
|
154
|
+
["sans-serif", `${isWindows && isFirefox ? "Calibri, " : ""}sans-serif`],
|
|
155
|
+
["monospace", `${isWindows && isFirefox ? "Lucida Console, " : ""}monospace`],
|
|
156
|
+
]);
|
|
157
|
+
return this.#_fontFamilyMap;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Render the text layer.
|
|
162
|
+
* @returns {Promise<void>}
|
|
163
|
+
*/
|
|
164
|
+
render() {
|
|
165
|
+
const pump = () => {
|
|
166
|
+
this.#reader.read().then(
|
|
167
|
+
({ value, done }) => {
|
|
168
|
+
if (done) {
|
|
169
|
+
this.#capability.resolve();
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
this.#lang ??= value.lang;
|
|
173
|
+
Object.assign(this.#styleCache, value.styles);
|
|
174
|
+
this.#processItems(value.items);
|
|
175
|
+
pump();
|
|
176
|
+
},
|
|
177
|
+
this.#capability.reject
|
|
178
|
+
);
|
|
179
|
+
};
|
|
180
|
+
this.#reader = this.#textContentSource.getReader();
|
|
181
|
+
PretextTextLayer.#pendingTextLayers.add(this);
|
|
182
|
+
pump();
|
|
183
|
+
return this.#capability.promise;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
/**
|
|
187
|
+
* Update a previously rendered text layer on viewport change.
|
|
188
|
+
* @param {Object} options
|
|
189
|
+
* @param {Object} options.viewport
|
|
190
|
+
* @param {Function} [options.onBefore]
|
|
191
|
+
*/
|
|
192
|
+
update({ viewport, onBefore = null }) {
|
|
193
|
+
const pdfjs = getPdfjsSync();
|
|
194
|
+
const scale = viewport.scale * (globalThis.devicePixelRatio || 1);
|
|
195
|
+
const rotation = viewport.rotation;
|
|
196
|
+
|
|
197
|
+
if (rotation !== this.#rotation) {
|
|
198
|
+
onBefore?.();
|
|
199
|
+
this.#rotation = rotation;
|
|
200
|
+
pdfjs.setLayerDimensions(this.#rootContainer, { rotation });
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
if (scale !== this.#scale) {
|
|
204
|
+
onBefore?.();
|
|
205
|
+
this.#scale = scale;
|
|
206
|
+
// PRETEXT: ctx used for uncached fallback path only
|
|
207
|
+
const ctx = cache.getContext(this.#lang);
|
|
208
|
+
const params = { div: null, properties: null, ctx };
|
|
209
|
+
for (const div of this.#textDivs) {
|
|
210
|
+
params.properties = this.#textDivProperties.get(div);
|
|
211
|
+
params.div = div;
|
|
212
|
+
this.#layout(params);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
/**
|
|
218
|
+
* Cancel rendering.
|
|
219
|
+
*/
|
|
220
|
+
cancel() {
|
|
221
|
+
const pdfjs = getPdfjsSync();
|
|
222
|
+
const abortEx = new pdfjs.AbortException("TextLayer task cancelled.");
|
|
223
|
+
this.#reader?.cancel(abortEx).catch(() => {});
|
|
224
|
+
this.#reader = null;
|
|
225
|
+
this.#capability.reject(abortEx);
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/** @type {HTMLElement[]} */
|
|
229
|
+
get textDivs() {
|
|
230
|
+
return this.#textDivs;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/** @type {string[]} */
|
|
234
|
+
get textContentItemsStr() {
|
|
235
|
+
return this.#textContentItemsStr;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
// ── Internal: process text content stream ────────────────────────────────
|
|
239
|
+
|
|
240
|
+
#processItems(items) {
|
|
241
|
+
if (this.#disableProcessItems) return;
|
|
242
|
+
|
|
243
|
+
// Ensure we have a Canvas context ready for this batch
|
|
244
|
+
if (!this.#layoutTextParams.ctx) {
|
|
245
|
+
this.#layoutTextParams.ctx = cache.getContext(this.#lang);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
const textDivs = this.#textDivs;
|
|
249
|
+
const textContentItemsStr = this.#textContentItemsStr;
|
|
250
|
+
|
|
251
|
+
for (const item of items) {
|
|
252
|
+
if (textDivs.length > MAX_TEXT_DIVS_TO_RENDER) {
|
|
253
|
+
console.warn("pretext-pdf: too many text items, stopping.");
|
|
254
|
+
this.#disableProcessItems = true;
|
|
255
|
+
return;
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
if (item.str === undefined) {
|
|
259
|
+
if (
|
|
260
|
+
item.type === "beginMarkedContentProps" ||
|
|
261
|
+
item.type === "beginMarkedContent"
|
|
262
|
+
) {
|
|
263
|
+
const parent = this.#container;
|
|
264
|
+
this.#container = document.createElement("span");
|
|
265
|
+
this.#container.classList.add("markedContent");
|
|
266
|
+
if (item.id !== null) {
|
|
267
|
+
this.#container.setAttribute("id", `${item.id}`);
|
|
268
|
+
}
|
|
269
|
+
parent.append(this.#container);
|
|
270
|
+
} else if (item.type === "endMarkedContent") {
|
|
271
|
+
this.#container = this.#container.parentNode;
|
|
272
|
+
}
|
|
273
|
+
continue;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
textContentItemsStr.push(item.str);
|
|
277
|
+
this.#appendText(item);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ── Internal: append a single text item ──────────────────────────────────
|
|
282
|
+
|
|
283
|
+
#appendText(geom) {
|
|
284
|
+
const pdfjs = getPdfjsSync();
|
|
285
|
+
const textDiv = document.createElement("span");
|
|
286
|
+
const textDivProperties = {
|
|
287
|
+
angle: 0,
|
|
288
|
+
canvasWidth: 0,
|
|
289
|
+
hasText: geom.str !== "",
|
|
290
|
+
hasEOL: geom.hasEOL,
|
|
291
|
+
fontSize: 0,
|
|
292
|
+
};
|
|
293
|
+
this.#textDivs.push(textDiv);
|
|
294
|
+
|
|
295
|
+
const tx = pdfjs.Util.transform(this.#transform, geom.transform);
|
|
296
|
+
let angle = Math.atan2(tx[1], tx[0]);
|
|
297
|
+
const style = this.#styleCache[geom.fontName];
|
|
298
|
+
if (style.vertical) {
|
|
299
|
+
angle += Math.PI / 2;
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
let fontFamily =
|
|
303
|
+
(this.#fontInspectorEnabled && style.fontSubstitution) ||
|
|
304
|
+
style.fontFamily;
|
|
305
|
+
fontFamily = PretextTextLayer.fontFamilyMap.get(fontFamily) || fontFamily;
|
|
306
|
+
|
|
307
|
+
const fontHeight = Math.hypot(tx[2], tx[3]);
|
|
308
|
+
|
|
309
|
+
// ── PRETEXT CHANGE: cached Canvas ascent (zero DOM reflows) ──
|
|
310
|
+
const fontAscent = fontHeight * cache.getAscentRatio(fontFamily, this.#lang);
|
|
311
|
+
|
|
312
|
+
let left, top;
|
|
313
|
+
if (angle === 0) {
|
|
314
|
+
left = tx[4];
|
|
315
|
+
top = tx[5] - fontAscent;
|
|
316
|
+
} else {
|
|
317
|
+
left = tx[4] + fontAscent * Math.sin(angle);
|
|
318
|
+
top = tx[5] - fontAscent * Math.cos(angle);
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
const minFontSize = cache.getMinFontSize();
|
|
322
|
+
const scaleFactorStr = "calc(var(--scale-factor)*";
|
|
323
|
+
const divStyle = textDiv.style;
|
|
324
|
+
|
|
325
|
+
if (this.#container === this.#rootContainer) {
|
|
326
|
+
divStyle.left = `${((100 * left) / this.#pageWidth).toFixed(2)}%`;
|
|
327
|
+
divStyle.top = `${((100 * top) / this.#pageHeight).toFixed(2)}%`;
|
|
328
|
+
} else {
|
|
329
|
+
divStyle.left = `${scaleFactorStr}${left.toFixed(2)}px)`;
|
|
330
|
+
divStyle.top = `${scaleFactorStr}${top.toFixed(2)}px)`;
|
|
331
|
+
}
|
|
332
|
+
divStyle.fontSize = `${scaleFactorStr}${(minFontSize * fontHeight).toFixed(2)}px)`;
|
|
333
|
+
divStyle.fontFamily = fontFamily;
|
|
334
|
+
|
|
335
|
+
textDivProperties.fontSize = fontHeight;
|
|
336
|
+
textDiv.setAttribute("role", "presentation");
|
|
337
|
+
textDiv.textContent = geom.str;
|
|
338
|
+
textDiv.dir = geom.dir;
|
|
339
|
+
|
|
340
|
+
if (this.#fontInspectorEnabled) {
|
|
341
|
+
textDiv.dataset.fontName =
|
|
342
|
+
style.fontSubstitutionLoadedName || geom.fontName;
|
|
343
|
+
}
|
|
344
|
+
if (angle !== 0) {
|
|
345
|
+
textDivProperties.angle = angle * (180 / Math.PI);
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
let shouldScaleText = false;
|
|
349
|
+
if (geom.str.length > 1) {
|
|
350
|
+
shouldScaleText = true;
|
|
351
|
+
} else if (geom.str !== " " && geom.transform[0] !== geom.transform[3]) {
|
|
352
|
+
const absScaleX = Math.abs(geom.transform[0]);
|
|
353
|
+
const absScaleY = Math.abs(geom.transform[3]);
|
|
354
|
+
if (
|
|
355
|
+
absScaleX !== absScaleY &&
|
|
356
|
+
Math.max(absScaleX, absScaleY) / Math.min(absScaleX, absScaleY) > 1.5
|
|
357
|
+
) {
|
|
358
|
+
shouldScaleText = true;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
if (shouldScaleText) {
|
|
362
|
+
textDivProperties.canvasWidth = style.vertical ? geom.height : geom.width;
|
|
363
|
+
}
|
|
364
|
+
this.#textDivProperties.set(textDiv, textDivProperties);
|
|
365
|
+
|
|
366
|
+
this.#layoutTextParams.div = textDiv;
|
|
367
|
+
this.#layoutTextParams.properties = textDivProperties;
|
|
368
|
+
this.#layout(this.#layoutTextParams);
|
|
369
|
+
|
|
370
|
+
if (textDivProperties.hasText) {
|
|
371
|
+
this.#container.append(textDiv);
|
|
372
|
+
}
|
|
373
|
+
if (textDivProperties.hasEOL) {
|
|
374
|
+
const br = document.createElement("br");
|
|
375
|
+
br.setAttribute("role", "presentation");
|
|
376
|
+
this.#container.append(br);
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
// ── Internal: layout a single text div ───────────────────────────────────
|
|
381
|
+
|
|
382
|
+
#layout(params) {
|
|
383
|
+
const { div, properties, ctx } = params;
|
|
384
|
+
const { style } = div;
|
|
385
|
+
|
|
386
|
+
const minFontSize = cache.getMinFontSize();
|
|
387
|
+
let transform = "";
|
|
388
|
+
if (minFontSize > 1) {
|
|
389
|
+
transform = `scale(${1 / minFontSize})`;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
if (properties.canvasWidth !== 0 && properties.hasText) {
|
|
393
|
+
const { fontFamily } = style;
|
|
394
|
+
const { canvasWidth, fontSize } = properties;
|
|
395
|
+
|
|
396
|
+
// ── PRETEXT CHANGE: cached measurement ──
|
|
397
|
+
// PDF.js original: ctx.measureText(div.textContent) — uncached
|
|
398
|
+
// Pretext fork: cache.measureWidth() — keyed by (font+text), reused across spans
|
|
399
|
+
const width = cache.measureWidth(
|
|
400
|
+
ctx,
|
|
401
|
+
div.textContent,
|
|
402
|
+
fontSize,
|
|
403
|
+
fontFamily,
|
|
404
|
+
this.#scale
|
|
405
|
+
);
|
|
406
|
+
|
|
407
|
+
if (width > 0) {
|
|
408
|
+
transform = `scaleX(${(canvasWidth * this.#scale) / width}) ${transform}`;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
if (properties.angle !== 0) {
|
|
412
|
+
transform = `rotate(${properties.angle}deg) ${transform}`;
|
|
413
|
+
}
|
|
414
|
+
if (transform.length > 0) {
|
|
415
|
+
style.transform = transform;
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
// ── Static methods ───────────────────────────────────────────────────────
|
|
420
|
+
|
|
421
|
+
/** Clean up global resources. */
|
|
422
|
+
static cleanup() {
|
|
423
|
+
if (this.#pendingTextLayers.size > 0) return;
|
|
424
|
+
cache.cleanup();
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
/** Pretext measurement cache metrics for profiling. */
|
|
428
|
+
static get pretextMetrics() {
|
|
429
|
+
return cache.metrics;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/**
|
|
433
|
+
* Reflow text content using @chenglou/pretext's full layout engine.
|
|
434
|
+
*
|
|
435
|
+
* This is the feature PDF.js cannot offer: take extracted PDF text and
|
|
436
|
+
* re-layout it responsively using Pretext's prepare() + layoutWithLines().
|
|
437
|
+
*
|
|
438
|
+
* @param {HTMLElement} container - Text layer container
|
|
439
|
+
* @param {string} text - Full text content
|
|
440
|
+
* @param {Object} options
|
|
441
|
+
* @param {number} options.width - Target width in px
|
|
442
|
+
* @param {string} options.font - CSS font spec (e.g. '16px Inter')
|
|
443
|
+
* @param {number} options.lineHeight - Line height in px
|
|
444
|
+
* @returns {Promise<{lineCount: number, height: number, lines: Array}>}
|
|
445
|
+
*/
|
|
446
|
+
static async enableReflow(container, text, { width, font, lineHeight }) {
|
|
447
|
+
const pretext = await getPretext();
|
|
448
|
+
if (!pretext) {
|
|
449
|
+
throw new Error(
|
|
450
|
+
"@chenglou/pretext is required for reflow mode. " +
|
|
451
|
+
"Install it: npm install @chenglou/pretext"
|
|
452
|
+
);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
const prepared = pretext.prepareWithSegments(text, font);
|
|
456
|
+
const result = pretext.layoutWithLines(prepared, width, lineHeight);
|
|
457
|
+
|
|
458
|
+
container.innerHTML = "";
|
|
459
|
+
let y = 0;
|
|
460
|
+
for (const line of result.lines) {
|
|
461
|
+
const lineDiv = document.createElement("div");
|
|
462
|
+
lineDiv.textContent = line.text;
|
|
463
|
+
lineDiv.style.cssText = `
|
|
464
|
+
position: absolute;
|
|
465
|
+
left: 0;
|
|
466
|
+
top: ${y}px;
|
|
467
|
+
font: ${font};
|
|
468
|
+
white-space: pre;
|
|
469
|
+
color: transparent;
|
|
470
|
+
cursor: text;
|
|
471
|
+
`;
|
|
472
|
+
container.append(lineDiv);
|
|
473
|
+
y += lineHeight;
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
return {
|
|
477
|
+
lineCount: result.lineCount,
|
|
478
|
+
height: result.height,
|
|
479
|
+
lines: result.lines,
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
|
|
484
|
+
export { PretextTextLayer, PretextMeasurementCache, cache as pretextCache };
|
package/src/viewer.js
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* pretext-pdf/viewer
|
|
3
|
+
*
|
|
4
|
+
* High-level viewer that uses PretextTextLayer for text overlay.
|
|
5
|
+
* Drop-in for apps that need a simple render-a-page API.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { PretextPDFViewer } from "pretext-pdf/viewer";
|
|
9
|
+
*
|
|
10
|
+
* const viewer = new PretextPDFViewer(containerElement);
|
|
11
|
+
* await viewer.open("document.pdf");
|
|
12
|
+
* // or: await viewer.open(uint8Array);
|
|
13
|
+
* await viewer.renderPage(1);
|
|
14
|
+
* viewer.setScale(1.5);
|
|
15
|
+
* await viewer.renderPage(1);
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import {
|
|
19
|
+
getDocument,
|
|
20
|
+
GlobalWorkerOptions,
|
|
21
|
+
Util,
|
|
22
|
+
setLayerDimensions,
|
|
23
|
+
} from "pdfjs-dist";
|
|
24
|
+
import { PretextTextLayer } from "./pretext-text-layer.js";
|
|
25
|
+
|
|
26
|
+
class PretextPDFViewer {
|
|
27
|
+
/** @type {HTMLElement} */
|
|
28
|
+
#container;
|
|
29
|
+
|
|
30
|
+
/** @type {Object|null} pdfjs PDFDocumentProxy */
|
|
31
|
+
#pdfDoc = null;
|
|
32
|
+
|
|
33
|
+
/** @type {number} */
|
|
34
|
+
#scale = 1.5;
|
|
35
|
+
|
|
36
|
+
/** @type {number} */
|
|
37
|
+
#currentPage = 0;
|
|
38
|
+
|
|
39
|
+
/** @type {HTMLCanvasElement} */
|
|
40
|
+
#canvas;
|
|
41
|
+
|
|
42
|
+
/** @type {HTMLDivElement} */
|
|
43
|
+
#textLayerDiv;
|
|
44
|
+
|
|
45
|
+
/** @type {PretextTextLayer|null} */
|
|
46
|
+
#textLayer = null;
|
|
47
|
+
|
|
48
|
+
/** @type {boolean} */
|
|
49
|
+
#initialized = false;
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* @param {HTMLElement} container - DOM element to render into
|
|
53
|
+
* @param {Object} [options]
|
|
54
|
+
* @param {number} [options.scale=1.5]
|
|
55
|
+
* @param {string} [options.workerSrc] - PDF.js worker URL
|
|
56
|
+
*/
|
|
57
|
+
constructor(container, options = {}) {
|
|
58
|
+
this.#container = container;
|
|
59
|
+
this.#scale = options.scale || 1.5;
|
|
60
|
+
|
|
61
|
+
if (options.workerSrc) {
|
|
62
|
+
GlobalWorkerOptions.workerSrc = options.workerSrc;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// Create canvas
|
|
66
|
+
this.#canvas = document.createElement("canvas");
|
|
67
|
+
this.#canvas.style.display = "block";
|
|
68
|
+
|
|
69
|
+
// Create text layer overlay
|
|
70
|
+
this.#textLayerDiv = document.createElement("div");
|
|
71
|
+
this.#textLayerDiv.className = "textLayer";
|
|
72
|
+
this.#textLayerDiv.style.cssText = `
|
|
73
|
+
position: absolute;
|
|
74
|
+
left: 0;
|
|
75
|
+
top: 0;
|
|
76
|
+
right: 0;
|
|
77
|
+
bottom: 0;
|
|
78
|
+
overflow: hidden;
|
|
79
|
+
line-height: 1.0;
|
|
80
|
+
`;
|
|
81
|
+
|
|
82
|
+
// Wrapper for positioning
|
|
83
|
+
const wrapper = document.createElement("div");
|
|
84
|
+
wrapper.style.position = "relative";
|
|
85
|
+
wrapper.style.display = "inline-block";
|
|
86
|
+
wrapper.append(this.#canvas, this.#textLayerDiv);
|
|
87
|
+
this.#container.append(wrapper);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/**
|
|
91
|
+
* Open a PDF document.
|
|
92
|
+
* @param {string|URL|Uint8Array|ArrayBuffer} source - URL, typed array, or path
|
|
93
|
+
* @returns {Promise<{numPages: number}>}
|
|
94
|
+
*/
|
|
95
|
+
async open(source) {
|
|
96
|
+
// Initialize PretextTextLayer's pdfjs dependency
|
|
97
|
+
if (!this.#initialized) {
|
|
98
|
+
const pdfjs = await import("pdfjs-dist");
|
|
99
|
+
await PretextTextLayer.init(pdfjs);
|
|
100
|
+
this.#initialized = true;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const loadingTask = getDocument(
|
|
104
|
+
source instanceof Uint8Array || source instanceof ArrayBuffer
|
|
105
|
+
? { data: source }
|
|
106
|
+
: source
|
|
107
|
+
);
|
|
108
|
+
this.#pdfDoc = await loadingTask.promise;
|
|
109
|
+
return { numPages: this.#pdfDoc.numPages };
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Render a specific page.
|
|
114
|
+
* @param {number} pageNum - 1-based page number
|
|
115
|
+
* @returns {Promise<{width: number, height: number, textItems: number, pretextMetrics: Object}>}
|
|
116
|
+
*/
|
|
117
|
+
async renderPage(pageNum) {
|
|
118
|
+
if (!this.#pdfDoc) throw new Error("No document loaded. Call open() first.");
|
|
119
|
+
if (pageNum < 1 || pageNum > this.#pdfDoc.numPages) {
|
|
120
|
+
throw new RangeError(`Page ${pageNum} out of range (1-${this.#pdfDoc.numPages})`);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Cancel previous text layer
|
|
124
|
+
this.#textLayer?.cancel();
|
|
125
|
+
|
|
126
|
+
const page = await this.#pdfDoc.getPage(pageNum);
|
|
127
|
+
const viewport = page.getViewport({ scale: this.#scale });
|
|
128
|
+
const outputScale = globalThis.devicePixelRatio || 1;
|
|
129
|
+
|
|
130
|
+
// Size canvas
|
|
131
|
+
this.#canvas.width = Math.floor(viewport.width * outputScale);
|
|
132
|
+
this.#canvas.height = Math.floor(viewport.height * outputScale);
|
|
133
|
+
this.#canvas.style.width = `${Math.floor(viewport.width)}px`;
|
|
134
|
+
this.#canvas.style.height = `${Math.floor(viewport.height)}px`;
|
|
135
|
+
|
|
136
|
+
// Size wrapper
|
|
137
|
+
const wrapper = this.#canvas.parentElement;
|
|
138
|
+
wrapper.style.width = `${Math.floor(viewport.width)}px`;
|
|
139
|
+
wrapper.style.height = `${Math.floor(viewport.height)}px`;
|
|
140
|
+
|
|
141
|
+
// Render canvas
|
|
142
|
+
const ctx = this.#canvas.getContext("2d");
|
|
143
|
+
await page.render({
|
|
144
|
+
canvasContext: ctx,
|
|
145
|
+
viewport,
|
|
146
|
+
transform: outputScale !== 1 ? [outputScale, 0, 0, outputScale, 0, 0] : null,
|
|
147
|
+
}).promise;
|
|
148
|
+
|
|
149
|
+
// Render Pretext text layer
|
|
150
|
+
this.#textLayerDiv.innerHTML = "";
|
|
151
|
+
|
|
152
|
+
const textContent = await page.getTextContent({
|
|
153
|
+
includeMarkedContent: true,
|
|
154
|
+
disableNormalization: true,
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
this.#textLayer = new PretextTextLayer({
|
|
158
|
+
textContentSource: textContent,
|
|
159
|
+
container: this.#textLayerDiv,
|
|
160
|
+
viewport,
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
await this.#textLayer.render();
|
|
164
|
+
|
|
165
|
+
this.#currentPage = pageNum;
|
|
166
|
+
|
|
167
|
+
return {
|
|
168
|
+
width: viewport.width,
|
|
169
|
+
height: viewport.height,
|
|
170
|
+
textItems: this.#textLayer.textContentItemsStr.length,
|
|
171
|
+
pretextMetrics: PretextTextLayer.pretextMetrics,
|
|
172
|
+
};
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
/**
|
|
176
|
+
* Set zoom scale.
|
|
177
|
+
* @param {number} scale
|
|
178
|
+
*/
|
|
179
|
+
setScale(scale) {
|
|
180
|
+
this.#scale = scale;
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/** @returns {number} */
|
|
184
|
+
get scale() {
|
|
185
|
+
return this.#scale;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
/** @returns {number} */
|
|
189
|
+
get numPages() {
|
|
190
|
+
return this.#pdfDoc?.numPages || 0;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/** @returns {number} */
|
|
194
|
+
get currentPage() {
|
|
195
|
+
return this.#currentPage;
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
/** Get Pretext measurement cache metrics. */
|
|
199
|
+
get pretextMetrics() {
|
|
200
|
+
return PretextTextLayer.pretextMetrics;
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
/** Clean up resources. */
|
|
204
|
+
destroy() {
|
|
205
|
+
this.#textLayer?.cancel();
|
|
206
|
+
PretextTextLayer.cleanup();
|
|
207
|
+
this.#pdfDoc?.destroy();
|
|
208
|
+
this.#pdfDoc = null;
|
|
209
|
+
this.#container.innerHTML = "";
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
export { PretextPDFViewer };
|