@embedpdf/engines 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +103 -9
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +13 -1
- package/dist/index.js +104 -10
- package/dist/index.js.map +1 -1
- package/dist/pdfium-direct-engine.cjs +80 -12
- package/dist/pdfium-direct-engine.cjs.map +1 -1
- package/dist/pdfium-direct-engine.d.ts +8 -2
- package/dist/pdfium-direct-engine.js +80 -12
- package/dist/pdfium-direct-engine.js.map +1 -1
- package/dist/pdfium-worker-engine.cjs +24 -3
- package/dist/pdfium-worker-engine.cjs.map +1 -1
- package/dist/pdfium-worker-engine.d.ts +8 -2
- package/dist/pdfium-worker-engine.js +24 -3
- package/dist/pdfium-worker-engine.js.map +1 -1
- package/dist/pdfium.cjs +80 -9
- package/dist/pdfium.cjs.map +1 -1
- package/dist/pdfium.d.ts +7 -1
- package/dist/pdfium.js +81 -10
- package/dist/pdfium.js.map +1 -1
- package/dist/worker.cjs +20 -0
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts +7 -1
- package/dist/worker.js +20 -0
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { SearchTarget, PdfImage, ImageConversionTypes, PdfEngine, Logger, Task, PdfErrorReason, PdfFileUrl, PdfUrlOptions, PdfDocumentObject, PdfFile, PdfFileLoader, PdfSignatureObject, PdfBookmarkObject, PdfPageObject, Rotation, PdfRenderOptions, PdfTask, Rect, PdfAnnotationObject, PdfAnnotationTransformation, PdfTextRectObject, PdfAttachmentObject, PdfWidgetAnnoObject, FormFieldValue, PdfPageFlattenFlag, PdfPageFlattenResult, PdfInkListObject, PdfStampAnnoObjectContents, Position, PdfPageGeometry, PdfGlyphObject, MatchFlag, SearchAllPagesResult, PdfEngineMethodName, PdfEngineMethodArgs, TaskReturn, PdfEngineMethodReturnType, PdfMetadataObject, PdfBookmarksObject } from '@embedpdf/models';
|
|
1
|
+
import { SearchTarget, PdfImage, ImageConversionTypes, PdfEngine, Logger, Task, PdfErrorReason, PdfFileUrl, PdfUrlOptions, PdfDocumentObject, PdfFile, PdfFileLoader, PdfSignatureObject, PdfBookmarkObject, PdfPageObject, Rotation, PdfRenderOptions, PdfTask, Rect, PdfAnnotationObject, PdfAnnotationTransformation, PdfTextRectObject, PdfAttachmentObject, PdfWidgetAnnoObject, FormFieldValue, PdfPageFlattenFlag, PdfPageFlattenResult, PageTextSlice, PdfInkListObject, PdfStampAnnoObjectContents, Position, PdfPageGeometry, PdfGlyphObject, MatchFlag, SearchAllPagesResult, PdfEngineMethodName, PdfEngineMethodArgs, TaskReturn, PdfEngineMethodReturnType, PdfMetadataObject, PdfBookmarksObject } from '@embedpdf/models';
|
|
2
2
|
import { WrappedPdfiumModule, PdfiumRuntimeMethods, PdfiumModule } from '@embedpdf/pdfium';
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -269,6 +269,12 @@ declare class PdfiumEngine<T = Blob> implements PdfEngine<T> {
|
|
|
269
269
|
* @public
|
|
270
270
|
*/
|
|
271
271
|
extractText(doc: PdfDocumentObject, pageIndexes: number[]): Task<any, PdfErrorReason> | Task<string, PdfErrorReason>;
|
|
272
|
+
/**
|
|
273
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
274
|
+
*
|
|
275
|
+
* @public
|
|
276
|
+
*/
|
|
277
|
+
getTextSlices(doc: PdfDocumentObject, slices: PageTextSlice[]): PdfTask<string[]>;
|
|
272
278
|
/**
|
|
273
279
|
* {@inheritDoc @embedpdf/models!PdfEngine.merge}
|
|
274
280
|
*
|
|
@@ -1431,6 +1437,12 @@ declare class WebWorkerEngine implements PdfEngine {
|
|
|
1431
1437
|
* @public
|
|
1432
1438
|
*/
|
|
1433
1439
|
extractText(doc: PdfDocumentObject, pageIndexes: number[]): WorkerTask<string>;
|
|
1440
|
+
/**
|
|
1441
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
1442
|
+
*
|
|
1443
|
+
* @public
|
|
1444
|
+
*/
|
|
1445
|
+
getTextSlices(doc: PdfDocumentObject, slices: PageTextSlice[]): WorkerTask<string[]>;
|
|
1434
1446
|
/**
|
|
1435
1447
|
* {@inheritDoc @embedpdf/models!PdfEngine.getPageGlyphs}
|
|
1436
1448
|
*
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { NoopLogger, PdfTaskHelper, PdfErrorCode, Task, Rotation, PdfAnnotationSubtype, PdfPageObjectType, PdfAnnotationObjectStatus, quadToRect, PDF_FORM_FIELD_TYPE, toIntRect, transformRect, toIntSize, transformSize, PdfActionType, PdfZoomMode, AppearanceMode, MatchFlag, swap, PdfPageFlattenResult } from '@embedpdf/models';
|
|
1
|
+
import { NoopLogger, PdfTaskHelper, PdfErrorCode, Task, Rotation, PdfAnnotationSubtype, stripPdfUnwantedMarkers, PdfPageObjectType, PdfAnnotationObjectStatus, quadToRect, PDF_FORM_FIELD_TYPE, toIntRect, transformRect, toIntSize, transformSize, PdfActionType, PdfZoomMode, AppearanceMode, MatchFlag, swap, PdfPageFlattenResult } from '@embedpdf/models';
|
|
2
2
|
import { init } from '@embedpdf/pdfium';
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -1362,6 +1362,62 @@ class PdfiumEngine {
|
|
|
1362
1362
|
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, `ExtractText`, 'End', doc.id);
|
|
1363
1363
|
return PdfTaskHelper.resolve(text);
|
|
1364
1364
|
}
|
|
1365
|
+
/**
|
|
1366
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
1367
|
+
*
|
|
1368
|
+
* @public
|
|
1369
|
+
*/
|
|
1370
|
+
getTextSlices(doc, slices) {
|
|
1371
|
+
this.logger.debug(LOG_SOURCE$2, LOG_CATEGORY$2, 'getTextSlices', doc, slices);
|
|
1372
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'Begin', doc.id);
|
|
1373
|
+
/* ⚠︎ 1 — trivial case */
|
|
1374
|
+
if (slices.length === 0) {
|
|
1375
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1376
|
+
return PdfTaskHelper.resolve([]);
|
|
1377
|
+
}
|
|
1378
|
+
/* ⚠︎ 2 — document must be open */
|
|
1379
|
+
const ctx = this.cache.getContext(doc.id);
|
|
1380
|
+
if (!ctx) {
|
|
1381
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1382
|
+
return PdfTaskHelper.reject({
|
|
1383
|
+
code: PdfErrorCode.DocNotOpen,
|
|
1384
|
+
message: 'document does not open',
|
|
1385
|
+
});
|
|
1386
|
+
}
|
|
1387
|
+
try {
|
|
1388
|
+
/* keep caller order */
|
|
1389
|
+
const out = new Array(slices.length);
|
|
1390
|
+
/* group → open each page once */
|
|
1391
|
+
const byPage = new Map();
|
|
1392
|
+
slices.forEach((s, i) => {
|
|
1393
|
+
(byPage.get(s.pageIndex) ?? byPage.set(s.pageIndex, []).get(s.pageIndex)).push({
|
|
1394
|
+
slice: s,
|
|
1395
|
+
pos: i,
|
|
1396
|
+
});
|
|
1397
|
+
});
|
|
1398
|
+
for (const [pageIdx, list] of byPage) {
|
|
1399
|
+
const pageCtx = ctx.acquirePage(pageIdx);
|
|
1400
|
+
const textPagePtr = pageCtx.getTextPage();
|
|
1401
|
+
for (const { slice, pos } of list) {
|
|
1402
|
+
const bufPtr = this.malloc(2 * (slice.charCount + 1)); // UTF-16 + NIL
|
|
1403
|
+
this.pdfiumModule.FPDFText_GetText(textPagePtr, slice.charIndex, slice.charCount, bufPtr);
|
|
1404
|
+
out[pos] = stripPdfUnwantedMarkers(this.pdfiumModule.pdfium.UTF16ToString(bufPtr));
|
|
1405
|
+
this.free(bufPtr);
|
|
1406
|
+
}
|
|
1407
|
+
pageCtx.release();
|
|
1408
|
+
}
|
|
1409
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1410
|
+
return PdfTaskHelper.resolve(out);
|
|
1411
|
+
}
|
|
1412
|
+
catch (e) {
|
|
1413
|
+
this.logger.error(LOG_SOURCE$2, LOG_CATEGORY$2, 'getTextSlices error', e);
|
|
1414
|
+
this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
|
|
1415
|
+
return PdfTaskHelper.reject({
|
|
1416
|
+
code: PdfErrorCode.Unknown,
|
|
1417
|
+
message: String(e),
|
|
1418
|
+
});
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1365
1421
|
/**
|
|
1366
1422
|
* {@inheritDoc @embedpdf/models!PdfEngine.merge}
|
|
1367
1423
|
*
|
|
@@ -1891,14 +1947,12 @@ class PdfiumEngine {
|
|
|
1891
1947
|
const runs = [];
|
|
1892
1948
|
let current = null;
|
|
1893
1949
|
let curObjPtr = null;
|
|
1950
|
+
let bounds = null;
|
|
1894
1951
|
/** ── main loop ──────────────────────────────────────────── */
|
|
1895
1952
|
for (let i = 0; i < glyphs.length; i++) {
|
|
1896
1953
|
const g = glyphs[i];
|
|
1897
1954
|
/* 1 — find the CPDF_TextObject this glyph belongs to */
|
|
1898
1955
|
const objPtr = this.pdfiumModule.FPDFText_GetTextObject(textPagePtr, i);
|
|
1899
|
-
if (g.isEmpty) {
|
|
1900
|
-
continue;
|
|
1901
|
-
}
|
|
1902
1956
|
/* 2 — start a new run when the text object changes */
|
|
1903
1957
|
if (objPtr !== curObjPtr) {
|
|
1904
1958
|
curObjPtr = objPtr;
|
|
@@ -1912,6 +1966,12 @@ class PdfiumEngine {
|
|
|
1912
1966
|
charStart: i,
|
|
1913
1967
|
glyphs: [],
|
|
1914
1968
|
};
|
|
1969
|
+
bounds = {
|
|
1970
|
+
minX: g.origin.x,
|
|
1971
|
+
minY: g.origin.y,
|
|
1972
|
+
maxX: g.origin.x + g.size.width,
|
|
1973
|
+
maxY: g.origin.y + g.size.height,
|
|
1974
|
+
};
|
|
1915
1975
|
runs.push(current);
|
|
1916
1976
|
}
|
|
1917
1977
|
/* 3 — append the slim glyph record */
|
|
@@ -1920,16 +1980,24 @@ class PdfiumEngine {
|
|
|
1920
1980
|
y: g.origin.y,
|
|
1921
1981
|
width: g.size.width,
|
|
1922
1982
|
height: g.size.height,
|
|
1923
|
-
flags: g.isSpace ? 1 : 0,
|
|
1983
|
+
flags: g.isEmpty ? 2 : g.isSpace ? 1 : 0,
|
|
1924
1984
|
});
|
|
1925
1985
|
/* 4 — expand the run's bounding rect */
|
|
1986
|
+
if (g.isEmpty) {
|
|
1987
|
+
continue;
|
|
1988
|
+
}
|
|
1926
1989
|
const right = g.origin.x + g.size.width;
|
|
1927
1990
|
const bottom = g.origin.y + g.size.height;
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1991
|
+
// Update bounds
|
|
1992
|
+
bounds.minX = Math.min(bounds.minX, g.origin.x);
|
|
1993
|
+
bounds.minY = Math.min(bounds.minY, g.origin.y);
|
|
1994
|
+
bounds.maxX = Math.max(bounds.maxX, right);
|
|
1995
|
+
bounds.maxY = Math.max(bounds.maxY, bottom);
|
|
1996
|
+
// Calculate final rect from bounds
|
|
1997
|
+
current.rect.x = bounds.minX;
|
|
1998
|
+
current.rect.y = bounds.minY;
|
|
1999
|
+
current.rect.width = bounds.maxX - bounds.minX;
|
|
2000
|
+
current.rect.height = bounds.maxY - bounds.minY;
|
|
1933
2001
|
}
|
|
1934
2002
|
return runs;
|
|
1935
2003
|
}
|
|
@@ -4175,6 +4243,9 @@ class EngineRunner {
|
|
|
4175
4243
|
case 'extractText':
|
|
4176
4244
|
task = this.engine[name](...args);
|
|
4177
4245
|
break;
|
|
4246
|
+
case 'getTextSlices':
|
|
4247
|
+
task = this.engine[name](...args);
|
|
4248
|
+
break;
|
|
4178
4249
|
case 'getPageGlyphs':
|
|
4179
4250
|
task = this.engine[name](...args);
|
|
4180
4251
|
break;
|
|
@@ -4935,6 +5006,26 @@ class WebWorkerEngine {
|
|
|
4935
5006
|
this.proxy(task, request);
|
|
4936
5007
|
return task;
|
|
4937
5008
|
}
|
|
5009
|
+
/**
|
|
5010
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
5011
|
+
*
|
|
5012
|
+
* @public
|
|
5013
|
+
*/
|
|
5014
|
+
getTextSlices(doc, slices) {
|
|
5015
|
+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices', doc, slices);
|
|
5016
|
+
const requestId = this.generateRequestId(doc.id);
|
|
5017
|
+
const task = new WorkerTask(this.worker, requestId);
|
|
5018
|
+
const request = {
|
|
5019
|
+
id: requestId,
|
|
5020
|
+
type: 'ExecuteRequest',
|
|
5021
|
+
data: {
|
|
5022
|
+
name: 'getTextSlices',
|
|
5023
|
+
args: [doc, slices],
|
|
5024
|
+
},
|
|
5025
|
+
};
|
|
5026
|
+
this.proxy(task, request);
|
|
5027
|
+
return task;
|
|
5028
|
+
}
|
|
4938
5029
|
/**
|
|
4939
5030
|
* {@inheritDoc @embedpdf/models!PdfEngine.getPageGlyphs}
|
|
4940
5031
|
*
|
|
@@ -5299,6 +5390,9 @@ function createMockPdfEngine(partialEngine) {
|
|
|
5299
5390
|
extractText: (pdf, pageIndexes) => {
|
|
5300
5391
|
return PdfTaskHelper.resolve('');
|
|
5301
5392
|
},
|
|
5393
|
+
getTextSlices: (doc, slices) => {
|
|
5394
|
+
return PdfTaskHelper.resolve([]);
|
|
5395
|
+
},
|
|
5302
5396
|
getPageGlyphs: (doc, page) => {
|
|
5303
5397
|
return PdfTaskHelper.resolve([]);
|
|
5304
5398
|
},
|