@embedpdf/engines 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +103 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +13 -1
- package/dist/index.js +104 -10
- package/dist/index.js.map +1 -1
- package/dist/pdfium-direct-engine.cjs +80 -12
- package/dist/pdfium-direct-engine.cjs.map +1 -1
- package/dist/pdfium-direct-engine.d.ts +8 -2
- package/dist/pdfium-direct-engine.js +80 -12
- package/dist/pdfium-direct-engine.js.map +1 -1
- package/dist/pdfium-worker-engine.cjs +24 -3
- package/dist/pdfium-worker-engine.cjs.map +1 -1
- package/dist/pdfium-worker-engine.d.ts +8 -2
- package/dist/pdfium-worker-engine.js +24 -3
- package/dist/pdfium-worker-engine.js.map +1 -1
- package/dist/pdfium.cjs +80 -9
- package/dist/pdfium.cjs.map +1 -1
- package/dist/pdfium.d.ts +7 -1
- package/dist/pdfium.js +81 -10
- package/dist/pdfium.js.map +1 -1
- package/dist/worker.cjs +20 -0
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts +7 -1
- package/dist/worker.js +20 -0
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
package/dist/index.cjs
CHANGED
|
@@ -1364,6 +1364,62 @@ class PdfiumEngine {
|
|
|
1364
1364
|
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, `ExtractText`, 'End', doc.id);
|
|
1365
1365
|
return models.PdfTaskHelper.resolve(text);
|
|
1366
1366
|
}
|
|
1367
|
+
/**
|
|
1368
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
1369
|
+
*
|
|
1370
|
+
* @public
|
|
1371
|
+
*/
|
|
1372
|
+
getTextSlices(doc, slices) {
|
|
1373
|
+
this.logger.debug(LOG_SOURCE$2, LOG_CATEGORY$2, 'getTextSlices', doc, slices);
|
|
1374
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'Begin', doc.id);
|
|
1375
|
+
/* ⚠︎ 1 — trivial case */
|
|
1376
|
+
if (slices.length === 0) {
|
|
1377
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1378
|
+
return models.PdfTaskHelper.resolve([]);
|
|
1379
|
+
}
|
|
1380
|
+
/* ⚠︎ 2 — document must be open */
|
|
1381
|
+
const ctx = this.cache.getContext(doc.id);
|
|
1382
|
+
if (!ctx) {
|
|
1383
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1384
|
+
return models.PdfTaskHelper.reject({
|
|
1385
|
+
code: models.PdfErrorCode.DocNotOpen,
|
|
1386
|
+
message: 'document does not open',
|
|
1387
|
+
});
|
|
1388
|
+
}
|
|
1389
|
+
try {
|
|
1390
|
+
/* keep caller order */
|
|
1391
|
+
const out = new Array(slices.length);
|
|
1392
|
+
/* group → open each page once */
|
|
1393
|
+
const byPage = new Map();
|
|
1394
|
+
slices.forEach((s, i) => {
|
|
1395
|
+
(byPage.get(s.pageIndex) ?? byPage.set(s.pageIndex, []).get(s.pageIndex)).push({
|
|
1396
|
+
slice: s,
|
|
1397
|
+
pos: i,
|
|
1398
|
+
});
|
|
1399
|
+
});
|
|
1400
|
+
for (const [pageIdx, list] of byPage) {
|
|
1401
|
+
const pageCtx = ctx.acquirePage(pageIdx);
|
|
1402
|
+
const textPagePtr = pageCtx.getTextPage();
|
|
1403
|
+
for (const { slice, pos } of list) {
|
|
1404
|
+
const bufPtr = this.malloc(2 * (slice.charCount + 1)); // UTF-16 + NIL
|
|
1405
|
+
this.pdfiumModule.FPDFText_GetText(textPagePtr, slice.charIndex, slice.charCount, bufPtr);
|
|
1406
|
+
out[pos] = models.stripPdfUnwantedMarkers(this.pdfiumModule.pdfium.UTF16ToString(bufPtr));
|
|
1407
|
+
this.free(bufPtr);
|
|
1408
|
+
}
|
|
1409
|
+
pageCtx.release();
|
|
1410
|
+
}
|
|
1411
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1412
|
+
return models.PdfTaskHelper.resolve(out);
|
|
1413
|
+
}
|
|
1414
|
+
catch (e) {
|
|
1415
|
+
this.logger.error(LOG_SOURCE$2, LOG_CATEGORY$2, 'getTextSlices error', e);
|
|
1416
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1417
|
+
return models.PdfTaskHelper.reject({
|
|
1418
|
+
code: models.PdfErrorCode.Unknown,
|
|
1419
|
+
message: String(e),
|
|
1420
|
+
});
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1367
1423
|
/**
|
|
1368
1424
|
* {@inheritDoc @embedpdf/models!PdfEngine.merge}
|
|
1369
1425
|
*
|
|
@@ -1893,14 +1949,12 @@ class PdfiumEngine {
|
|
|
1893
1949
|
const runs = [];
|
|
1894
1950
|
let current = null;
|
|
1895
1951
|
let curObjPtr = null;
|
|
1952
|
+
let bounds = null;
|
|
1896
1953
|
/** ── main loop ──────────────────────────────────────────── */
|
|
1897
1954
|
for (let i = 0; i < glyphs.length; i++) {
|
|
1898
1955
|
const g = glyphs[i];
|
|
1899
1956
|
/* 1 — find the CPDF_TextObject this glyph belongs to */
|
|
1900
1957
|
const objPtr = this.pdfiumModule.FPDFText_GetTextObject(textPagePtr, i);
|
|
1901
|
-
if (g.isEmpty) {
|
|
1902
|
-
continue;
|
|
1903
|
-
}
|
|
1904
1958
|
/* 2 — start a new run when the text object changes */
|
|
1905
1959
|
if (objPtr !== curObjPtr) {
|
|
1906
1960
|
curObjPtr = objPtr;
|
|
@@ -1914,6 +1968,12 @@ class PdfiumEngine {
|
|
|
1914
1968
|
charStart: i,
|
|
1915
1969
|
glyphs: [],
|
|
1916
1970
|
};
|
|
1971
|
+
bounds = {
|
|
1972
|
+
minX: g.origin.x,
|
|
1973
|
+
minY: g.origin.y,
|
|
1974
|
+
maxX: g.origin.x + g.size.width,
|
|
1975
|
+
maxY: g.origin.y + g.size.height,
|
|
1976
|
+
};
|
|
1917
1977
|
runs.push(current);
|
|
1918
1978
|
}
|
|
1919
1979
|
/* 3 — append the slim glyph record */
|
|
@@ -1922,16 +1982,24 @@ class PdfiumEngine {
|
|
|
1922
1982
|
y: g.origin.y,
|
|
1923
1983
|
width: g.size.width,
|
|
1924
1984
|
height: g.size.height,
|
|
1925
|
-
flags: g.isSpace ? 1 : 0,
|
|
1985
|
+
flags: g.isEmpty ? 2 : g.isSpace ? 1 : 0,
|
|
1926
1986
|
});
|
|
1927
1987
|
/* 4 — expand the run's bounding rect */
|
|
1988
|
+
if (g.isEmpty) {
|
|
1989
|
+
continue;
|
|
1990
|
+
}
|
|
1928
1991
|
const right = g.origin.x + g.size.width;
|
|
1929
1992
|
const bottom = g.origin.y + g.size.height;
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1933
|
-
|
|
1934
|
-
|
|
1993
|
+
// Update bounds
|
|
1994
|
+
bounds.minX = Math.min(bounds.minX, g.origin.x);
|
|
1995
|
+
bounds.minY = Math.min(bounds.minY, g.origin.y);
|
|
1996
|
+
bounds.maxX = Math.max(bounds.maxX, right);
|
|
1997
|
+
bounds.maxY = Math.max(bounds.maxY, bottom);
|
|
1998
|
+
// Calculate final rect from bounds
|
|
1999
|
+
current.rect.x = bounds.minX;
|
|
2000
|
+
current.rect.y = bounds.minY;
|
|
2001
|
+
current.rect.width = bounds.maxX - bounds.minX;
|
|
2002
|
+
current.rect.height = bounds.maxY - bounds.minY;
|
|
1935
2003
|
}
|
|
1936
2004
|
return runs;
|
|
1937
2005
|
}
|
|
@@ -4177,6 +4245,9 @@ class EngineRunner {
|
|
|
4177
4245
|
case 'extractText':
|
|
4178
4246
|
task = this.engine[name](...args);
|
|
4179
4247
|
break;
|
|
4248
|
+
case 'getTextSlices':
|
|
4249
|
+
task = this.engine[name](...args);
|
|
4250
|
+
break;
|
|
4180
4251
|
case 'getPageGlyphs':
|
|
4181
4252
|
task = this.engine[name](...args);
|
|
4182
4253
|
break;
|
|
@@ -4937,6 +5008,26 @@ class WebWorkerEngine {
|
|
|
4937
5008
|
this.proxy(task, request);
|
|
4938
5009
|
return task;
|
|
4939
5010
|
}
|
|
5011
|
+
/**
|
|
5012
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
5013
|
+
*
|
|
5014
|
+
* @public
|
|
5015
|
+
*/
|
|
5016
|
+
getTextSlices(doc, slices) {
|
|
5017
|
+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices', doc, slices);
|
|
5018
|
+
const requestId = this.generateRequestId(doc.id);
|
|
5019
|
+
const task = new WorkerTask(this.worker, requestId);
|
|
5020
|
+
const request = {
|
|
5021
|
+
id: requestId,
|
|
5022
|
+
type: 'ExecuteRequest',
|
|
5023
|
+
data: {
|
|
5024
|
+
name: 'getTextSlices',
|
|
5025
|
+
args: [doc, slices],
|
|
5026
|
+
},
|
|
5027
|
+
};
|
|
5028
|
+
this.proxy(task, request);
|
|
5029
|
+
return task;
|
|
5030
|
+
}
|
|
4940
5031
|
/**
|
|
4941
5032
|
* {@inheritDoc @embedpdf/models!PdfEngine.getPageGlyphs}
|
|
4942
5033
|
*
|
|
@@ -5301,6 +5392,9 @@ function createMockPdfEngine(partialEngine) {
|
|
|
5301
5392
|
extractText: (pdf, pageIndexes) => {
|
|
5302
5393
|
return models.PdfTaskHelper.resolve('');
|
|
5303
5394
|
},
|
|
5395
|
+
getTextSlices: (doc, slices) => {
|
|
5396
|
+
return models.PdfTaskHelper.resolve([]);
|
|
5397
|
+
},
|
|
5304
5398
|
getPageGlyphs: (doc, page) => {
|
|
5305
5399
|
return models.PdfTaskHelper.resolve([]);
|
|
5306
5400
|
},
|