@embedpdf/engines 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/converters.cjs +10 -29
- package/dist/converters.cjs.map +1 -1
- package/dist/converters.d.ts +6 -12
- package/dist/converters.js +11 -29
- package/dist/converters.js.map +1 -1
- package/dist/index.cjs +113 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +18 -6
- package/dist/index.js +114 -20
- package/dist/index.js.map +1 -1
- package/dist/pdfium-direct-engine.cjs +86 -18
- package/dist/pdfium-direct-engine.cjs.map +1 -1
- package/dist/pdfium-direct-engine.d.ts +11 -5
- package/dist/pdfium-direct-engine.js +86 -18
- package/dist/pdfium-direct-engine.js.map +1 -1
- package/dist/pdfium-worker-engine.cjs +28 -7
- package/dist/pdfium-worker-engine.cjs.map +1 -1
- package/dist/pdfium-worker-engine.d.ts +10 -4
- package/dist/pdfium-worker-engine.js +28 -7
- package/dist/pdfium-worker-engine.js.map +1 -1
- package/dist/pdfium.cjs +86 -15
- package/dist/pdfium.cjs.map +1 -1
- package/dist/pdfium.d.ts +10 -4
- package/dist/pdfium.js +87 -16
- package/dist/pdfium.js.map +1 -1
- package/dist/worker.cjs +24 -4
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts +9 -3
- package/dist/worker.js +24 -4
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var models = require('@embedpdf/models');
|
|
4
3
|
var pdfium = require('@embedpdf/pdfium');
|
|
4
|
+
var models = require('@embedpdf/models');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* Read string from WASM heap
|
|
@@ -267,7 +267,7 @@ var PdfiumErrorCode;
|
|
|
267
267
|
PdfiumErrorCode[PdfiumErrorCode["XFALoad"] = 7] = "XFALoad";
|
|
268
268
|
PdfiumErrorCode[PdfiumErrorCode["XFALayout"] = 8] = "XFALayout";
|
|
269
269
|
})(PdfiumErrorCode || (PdfiumErrorCode = {}));
|
|
270
|
-
const browserImageDataToBlobConverter = (pdfImageData) => {
|
|
270
|
+
const browserImageDataToBlobConverter = (pdfImageData, imageType = 'image/webp') => {
|
|
271
271
|
// Check if we're in a browser environment
|
|
272
272
|
if (typeof OffscreenCanvas === 'undefined') {
|
|
273
273
|
throw new Error('OffscreenCanvas is not available in this environment. ' +
|
|
@@ -277,7 +277,7 @@ const browserImageDataToBlobConverter = (pdfImageData) => {
|
|
|
277
277
|
const imageData = new ImageData(pdfImageData.data, pdfImageData.width, pdfImageData.height);
|
|
278
278
|
const off = new OffscreenCanvas(imageData.width, imageData.height);
|
|
279
279
|
off.getContext('2d').putImageData(imageData, 0, 0);
|
|
280
|
-
return off.convertToBlob({ type:
|
|
280
|
+
return off.convertToBlob({ type: imageType });
|
|
281
281
|
};
|
|
282
282
|
/**
|
|
283
283
|
* Pdf engine that based on pdfium wasm
|
|
@@ -783,7 +783,7 @@ class PdfiumEngine {
|
|
|
783
783
|
*
|
|
784
784
|
* @public
|
|
785
785
|
*/
|
|
786
|
-
renderPage(doc, page, scaleFactor = 1, rotation = models.Rotation.Degree0, dpr = 1, options = { withAnnotations: false }) {
|
|
786
|
+
renderPage(doc, page, scaleFactor = 1, rotation = models.Rotation.Degree0, dpr = 1, options = { withAnnotations: false }, imageType = 'image/webp') {
|
|
787
787
|
const task = new models.Task();
|
|
788
788
|
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'renderPage', doc, page, scaleFactor, rotation, dpr, options);
|
|
789
789
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPage`, 'Begin', `${doc.id}-${page.index}`);
|
|
@@ -800,7 +800,7 @@ class PdfiumEngine {
|
|
|
800
800
|
size: page.size,
|
|
801
801
|
}, scaleFactor, rotation, dpr, options);
|
|
802
802
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPage`, 'End', `${doc.id}-${page.index}`);
|
|
803
|
-
this.imageDataConverter(imageData).then((blob) => task.resolve(blob));
|
|
803
|
+
this.imageDataConverter(imageData, imageType).then((blob) => task.resolve(blob));
|
|
804
804
|
return task;
|
|
805
805
|
}
|
|
806
806
|
/**
|
|
@@ -808,7 +808,7 @@ class PdfiumEngine {
|
|
|
808
808
|
*
|
|
809
809
|
* @public
|
|
810
810
|
*/
|
|
811
|
-
renderPageRect(doc, page, scaleFactor, rotation, dpr, rect, options) {
|
|
811
|
+
renderPageRect(doc, page, scaleFactor, rotation, dpr, rect, options, imageType = 'image/webp') {
|
|
812
812
|
const task = new models.Task();
|
|
813
813
|
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'renderPageRect', doc, page, scaleFactor, rotation, dpr, rect, options);
|
|
814
814
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPageRect`, 'Begin', `${doc.id}-${page.index}`);
|
|
@@ -822,7 +822,7 @@ class PdfiumEngine {
|
|
|
822
822
|
}
|
|
823
823
|
const imageData = this.renderPageRectToImageData(ctx, page, rect, scaleFactor, rotation, dpr, options);
|
|
824
824
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPageRect`, 'End', `${doc.id}-${page.index}`);
|
|
825
|
-
this.imageDataConverter(imageData).then((blob) => task.resolve(blob));
|
|
825
|
+
this.imageDataConverter(imageData, imageType).then((blob) => task.resolve(blob));
|
|
826
826
|
return task;
|
|
827
827
|
}
|
|
828
828
|
/**
|
|
@@ -1364,6 +1364,62 @@ class PdfiumEngine {
|
|
|
1364
1364
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `ExtractText`, 'End', doc.id);
|
|
1365
1365
|
return models.PdfTaskHelper.resolve(text);
|
|
1366
1366
|
}
|
|
1367
|
+
/**
|
|
1368
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
1369
|
+
*
|
|
1370
|
+
* @public
|
|
1371
|
+
*/
|
|
1372
|
+
getTextSlices(doc, slices) {
|
|
1373
|
+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices', doc, slices);
|
|
1374
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'Begin', doc.id);
|
|
1375
|
+
/* ⚠︎ 1 — trivial case */
|
|
1376
|
+
if (slices.length === 0) {
|
|
1377
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1378
|
+
return models.PdfTaskHelper.resolve([]);
|
|
1379
|
+
}
|
|
1380
|
+
/* ⚠︎ 2 — document must be open */
|
|
1381
|
+
const ctx = this.cache.getContext(doc.id);
|
|
1382
|
+
if (!ctx) {
|
|
1383
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1384
|
+
return models.PdfTaskHelper.reject({
|
|
1385
|
+
code: models.PdfErrorCode.DocNotOpen,
|
|
1386
|
+
message: 'document does not open',
|
|
1387
|
+
});
|
|
1388
|
+
}
|
|
1389
|
+
try {
|
|
1390
|
+
/* keep caller order */
|
|
1391
|
+
const out = new Array(slices.length);
|
|
1392
|
+
/* group → open each page once */
|
|
1393
|
+
const byPage = new Map();
|
|
1394
|
+
slices.forEach((s, i) => {
|
|
1395
|
+
(byPage.get(s.pageIndex) ?? byPage.set(s.pageIndex, []).get(s.pageIndex)).push({
|
|
1396
|
+
slice: s,
|
|
1397
|
+
pos: i,
|
|
1398
|
+
});
|
|
1399
|
+
});
|
|
1400
|
+
for (const [pageIdx, list] of byPage) {
|
|
1401
|
+
const pageCtx = ctx.acquirePage(pageIdx);
|
|
1402
|
+
const textPagePtr = pageCtx.getTextPage();
|
|
1403
|
+
for (const { slice, pos } of list) {
|
|
1404
|
+
const bufPtr = this.malloc(2 * (slice.charCount + 1)); // UTF-16 + NIL
|
|
1405
|
+
this.pdfiumModule.FPDFText_GetText(textPagePtr, slice.charIndex, slice.charCount, bufPtr);
|
|
1406
|
+
out[pos] = models.stripPdfUnwantedMarkers(this.pdfiumModule.pdfium.UTF16ToString(bufPtr));
|
|
1407
|
+
this.free(bufPtr);
|
|
1408
|
+
}
|
|
1409
|
+
pageCtx.release();
|
|
1410
|
+
}
|
|
1411
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1412
|
+
return models.PdfTaskHelper.resolve(out);
|
|
1413
|
+
}
|
|
1414
|
+
catch (e) {
|
|
1415
|
+
this.logger.error(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices error', e);
|
|
1416
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1417
|
+
return models.PdfTaskHelper.reject({
|
|
1418
|
+
code: models.PdfErrorCode.Unknown,
|
|
1419
|
+
message: String(e),
|
|
1420
|
+
});
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1367
1423
|
/**
|
|
1368
1424
|
* {@inheritDoc @embedpdf/models!PdfEngine.merge}
|
|
1369
1425
|
*
|
|
@@ -1893,14 +1949,12 @@ class PdfiumEngine {
|
|
|
1893
1949
|
const runs = [];
|
|
1894
1950
|
let current = null;
|
|
1895
1951
|
let curObjPtr = null;
|
|
1952
|
+
let bounds = null;
|
|
1896
1953
|
/** ── main loop ──────────────────────────────────────────── */
|
|
1897
1954
|
for (let i = 0; i < glyphs.length; i++) {
|
|
1898
1955
|
const g = glyphs[i];
|
|
1899
1956
|
/* 1 — find the CPDF_TextObject this glyph belongs to */
|
|
1900
1957
|
const objPtr = this.pdfiumModule.FPDFText_GetTextObject(textPagePtr, i);
|
|
1901
|
-
if (g.isEmpty) {
|
|
1902
|
-
continue;
|
|
1903
|
-
}
|
|
1904
1958
|
/* 2 — start a new run when the text object changes */
|
|
1905
1959
|
if (objPtr !== curObjPtr) {
|
|
1906
1960
|
curObjPtr = objPtr;
|
|
@@ -1914,6 +1968,12 @@ class PdfiumEngine {
|
|
|
1914
1968
|
charStart: i,
|
|
1915
1969
|
glyphs: [],
|
|
1916
1970
|
};
|
|
1971
|
+
bounds = {
|
|
1972
|
+
minX: g.origin.x,
|
|
1973
|
+
minY: g.origin.y,
|
|
1974
|
+
maxX: g.origin.x + g.size.width,
|
|
1975
|
+
maxY: g.origin.y + g.size.height,
|
|
1976
|
+
};
|
|
1917
1977
|
runs.push(current);
|
|
1918
1978
|
}
|
|
1919
1979
|
/* 3 — append the slim glyph record */
|
|
@@ -1922,16 +1982,24 @@ class PdfiumEngine {
|
|
|
1922
1982
|
y: g.origin.y,
|
|
1923
1983
|
width: g.size.width,
|
|
1924
1984
|
height: g.size.height,
|
|
1925
|
-
flags: g.isSpace ? 1 : 0,
|
|
1985
|
+
flags: g.isEmpty ? 2 : g.isSpace ? 1 : 0,
|
|
1926
1986
|
});
|
|
1927
1987
|
/* 4 — expand the run's bounding rect */
|
|
1988
|
+
if (g.isEmpty) {
|
|
1989
|
+
continue;
|
|
1990
|
+
}
|
|
1928
1991
|
const right = g.origin.x + g.size.width;
|
|
1929
1992
|
const bottom = g.origin.y + g.size.height;
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1993
|
+
// Update bounds
|
|
1994
|
+
bounds.minX = Math.min(bounds.minX, g.origin.x);
|
|
1995
|
+
bounds.minY = Math.min(bounds.minY, g.origin.y);
|
|
1996
|
+
bounds.maxX = Math.max(bounds.maxX, right);
|
|
1997
|
+
bounds.maxY = Math.max(bounds.maxY, bottom);
|
|
1998
|
+
// Calculate final rect from bounds
|
|
1999
|
+
current.rect.x = bounds.minX;
|
|
2000
|
+
current.rect.y = bounds.minY;
|
|
2001
|
+
current.rect.width = bounds.maxX - bounds.minX;
|
|
2002
|
+
current.rect.height = bounds.maxY - bounds.minY;
|
|
1935
2003
|
}
|
|
1936
2004
|
return runs;
|
|
1937
2005
|
}
|
|
@@ -4026,11 +4094,11 @@ class PdfiumEngine {
|
|
|
4026
4094
|
}
|
|
4027
4095
|
}
|
|
4028
4096
|
|
|
4029
|
-
async function createPdfiumEngine(wasmUrl) {
|
|
4097
|
+
async function createPdfiumEngine(wasmUrl, logger) {
|
|
4030
4098
|
const response = await fetch(wasmUrl);
|
|
4031
4099
|
const wasmBinary = await response.arrayBuffer();
|
|
4032
4100
|
const wasmModule = await pdfium.init({ wasmBinary });
|
|
4033
|
-
return new PdfiumEngine(wasmModule);
|
|
4101
|
+
return new PdfiumEngine(wasmModule, logger);
|
|
4034
4102
|
}
|
|
4035
4103
|
|
|
4036
4104
|
exports.createPdfiumEngine = createPdfiumEngine;
|