@embedpdf/engines 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +103 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +13 -1
- package/dist/index.js +104 -10
- package/dist/index.js.map +1 -1
- package/dist/pdfium-direct-engine.cjs +80 -12
- package/dist/pdfium-direct-engine.cjs.map +1 -1
- package/dist/pdfium-direct-engine.d.ts +8 -2
- package/dist/pdfium-direct-engine.js +80 -12
- package/dist/pdfium-direct-engine.js.map +1 -1
- package/dist/pdfium-worker-engine.cjs +24 -3
- package/dist/pdfium-worker-engine.cjs.map +1 -1
- package/dist/pdfium-worker-engine.d.ts +8 -2
- package/dist/pdfium-worker-engine.js +24 -3
- package/dist/pdfium-worker-engine.js.map +1 -1
- package/dist/pdfium.cjs +80 -9
- package/dist/pdfium.cjs.map +1 -1
- package/dist/pdfium.d.ts +7 -1
- package/dist/pdfium.js +81 -10
- package/dist/pdfium.js.map +1 -1
- package/dist/worker.cjs +20 -0
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts +7 -1
- package/dist/worker.js +20 -0
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var models = require('@embedpdf/models');
|
|
4
3
|
var pdfium = require('@embedpdf/pdfium');
|
|
4
|
+
var models = require('@embedpdf/models');
|
|
5
5
|
|
|
6
6
|
/**
|
|
7
7
|
* Read string from WASM heap
|
|
@@ -1364,6 +1364,62 @@ class PdfiumEngine {
|
|
|
1364
1364
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `ExtractText`, 'End', doc.id);
|
|
1365
1365
|
return models.PdfTaskHelper.resolve(text);
|
|
1366
1366
|
}
|
|
1367
|
+
/**
|
|
1368
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
1369
|
+
*
|
|
1370
|
+
* @public
|
|
1371
|
+
*/
|
|
1372
|
+
getTextSlices(doc, slices) {
|
|
1373
|
+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices', doc, slices);
|
|
1374
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'Begin', doc.id);
|
|
1375
|
+
/* ⚠︎ 1 — trivial case */
|
|
1376
|
+
if (slices.length === 0) {
|
|
1377
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1378
|
+
return models.PdfTaskHelper.resolve([]);
|
|
1379
|
+
}
|
|
1380
|
+
/* ⚠︎ 2 — document must be open */
|
|
1381
|
+
const ctx = this.cache.getContext(doc.id);
|
|
1382
|
+
if (!ctx) {
|
|
1383
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1384
|
+
return models.PdfTaskHelper.reject({
|
|
1385
|
+
code: models.PdfErrorCode.DocNotOpen,
|
|
1386
|
+
message: 'document does not open',
|
|
1387
|
+
});
|
|
1388
|
+
}
|
|
1389
|
+
try {
|
|
1390
|
+
/* keep caller order */
|
|
1391
|
+
const out = new Array(slices.length);
|
|
1392
|
+
/* group → open each page once */
|
|
1393
|
+
const byPage = new Map();
|
|
1394
|
+
slices.forEach((s, i) => {
|
|
1395
|
+
(byPage.get(s.pageIndex) ?? byPage.set(s.pageIndex, []).get(s.pageIndex)).push({
|
|
1396
|
+
slice: s,
|
|
1397
|
+
pos: i,
|
|
1398
|
+
});
|
|
1399
|
+
});
|
|
1400
|
+
for (const [pageIdx, list] of byPage) {
|
|
1401
|
+
const pageCtx = ctx.acquirePage(pageIdx);
|
|
1402
|
+
const textPagePtr = pageCtx.getTextPage();
|
|
1403
|
+
for (const { slice, pos } of list) {
|
|
1404
|
+
const bufPtr = this.malloc(2 * (slice.charCount + 1)); // UTF-16 + NIL
|
|
1405
|
+
this.pdfiumModule.FPDFText_GetText(textPagePtr, slice.charIndex, slice.charCount, bufPtr);
|
|
1406
|
+
out[pos] = models.stripPdfUnwantedMarkers(this.pdfiumModule.pdfium.UTF16ToString(bufPtr));
|
|
1407
|
+
this.free(bufPtr);
|
|
1408
|
+
}
|
|
1409
|
+
pageCtx.release();
|
|
1410
|
+
}
|
|
1411
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1412
|
+
return models.PdfTaskHelper.resolve(out);
|
|
1413
|
+
}
|
|
1414
|
+
catch (e) {
|
|
1415
|
+
this.logger.error(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices error', e);
|
|
1416
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1417
|
+
return models.PdfTaskHelper.reject({
|
|
1418
|
+
code: models.PdfErrorCode.Unknown,
|
|
1419
|
+
message: String(e),
|
|
1420
|
+
});
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1367
1423
|
/**
|
|
1368
1424
|
* {@inheritDoc @embedpdf/models!PdfEngine.merge}
|
|
1369
1425
|
*
|
|
@@ -1893,14 +1949,12 @@ class PdfiumEngine {
|
|
|
1893
1949
|
const runs = [];
|
|
1894
1950
|
let current = null;
|
|
1895
1951
|
let curObjPtr = null;
|
|
1952
|
+
let bounds = null;
|
|
1896
1953
|
/** ── main loop ──────────────────────────────────────────── */
|
|
1897
1954
|
for (let i = 0; i < glyphs.length; i++) {
|
|
1898
1955
|
const g = glyphs[i];
|
|
1899
1956
|
/* 1 — find the CPDF_TextObject this glyph belongs to */
|
|
1900
1957
|
const objPtr = this.pdfiumModule.FPDFText_GetTextObject(textPagePtr, i);
|
|
1901
|
-
if (g.isEmpty) {
|
|
1902
|
-
continue;
|
|
1903
|
-
}
|
|
1904
1958
|
/* 2 — start a new run when the text object changes */
|
|
1905
1959
|
if (objPtr !== curObjPtr) {
|
|
1906
1960
|
curObjPtr = objPtr;
|
|
@@ -1914,6 +1968,12 @@ class PdfiumEngine {
|
|
|
1914
1968
|
charStart: i,
|
|
1915
1969
|
glyphs: [],
|
|
1916
1970
|
};
|
|
1971
|
+
bounds = {
|
|
1972
|
+
minX: g.origin.x,
|
|
1973
|
+
minY: g.origin.y,
|
|
1974
|
+
maxX: g.origin.x + g.size.width,
|
|
1975
|
+
maxY: g.origin.y + g.size.height,
|
|
1976
|
+
};
|
|
1917
1977
|
runs.push(current);
|
|
1918
1978
|
}
|
|
1919
1979
|
/* 3 — append the slim glyph record */
|
|
@@ -1922,16 +1982,24 @@ class PdfiumEngine {
|
|
|
1922
1982
|
y: g.origin.y,
|
|
1923
1983
|
width: g.size.width,
|
|
1924
1984
|
height: g.size.height,
|
|
1925
|
-
flags: g.isSpace ? 1 : 0,
|
|
1985
|
+
flags: g.isEmpty ? 2 : g.isSpace ? 1 : 0,
|
|
1926
1986
|
});
|
|
1927
1987
|
/* 4 — expand the run's bounding rect */
|
|
1988
|
+
if (g.isEmpty) {
|
|
1989
|
+
continue;
|
|
1990
|
+
}
|
|
1928
1991
|
const right = g.origin.x + g.size.width;
|
|
1929
1992
|
const bottom = g.origin.y + g.size.height;
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1993
|
+
// Update bounds
|
|
1994
|
+
bounds.minX = Math.min(bounds.minX, g.origin.x);
|
|
1995
|
+
bounds.minY = Math.min(bounds.minY, g.origin.y);
|
|
1996
|
+
bounds.maxX = Math.max(bounds.maxX, right);
|
|
1997
|
+
bounds.maxY = Math.max(bounds.maxY, bottom);
|
|
1998
|
+
// Calculate final rect from bounds
|
|
1999
|
+
current.rect.x = bounds.minX;
|
|
2000
|
+
current.rect.y = bounds.minY;
|
|
2001
|
+
current.rect.width = bounds.maxX - bounds.minX;
|
|
2002
|
+
current.rect.height = bounds.maxY - bounds.minY;
|
|
1935
2003
|
}
|
|
1936
2004
|
return runs;
|
|
1937
2005
|
}
|
|
@@ -4026,11 +4094,11 @@ class PdfiumEngine {
|
|
|
4026
4094
|
}
|
|
4027
4095
|
}
|
|
4028
4096
|
|
|
4029
|
-
async function createPdfiumEngine(wasmUrl) {
|
|
4097
|
+
async function createPdfiumEngine(wasmUrl, logger) {
|
|
4030
4098
|
const response = await fetch(wasmUrl);
|
|
4031
4099
|
const wasmBinary = await response.arrayBuffer();
|
|
4032
4100
|
const wasmModule = await pdfium.init({ wasmBinary });
|
|
4033
|
-
return new PdfiumEngine(wasmModule);
|
|
4101
|
+
return new PdfiumEngine(wasmModule, logger);
|
|
4034
4102
|
}
|
|
4035
4103
|
|
|
4036
4104
|
exports.createPdfiumEngine = createPdfiumEngine;
|