@embedpdf/engines 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/converters.cjs +10 -29
- package/dist/converters.cjs.map +1 -1
- package/dist/converters.d.ts +6 -12
- package/dist/converters.js +11 -29
- package/dist/converters.js.map +1 -1
- package/dist/index.cjs +113 -19
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +18 -6
- package/dist/index.js +114 -20
- package/dist/index.js.map +1 -1
- package/dist/pdfium-direct-engine.cjs +86 -18
- package/dist/pdfium-direct-engine.cjs.map +1 -1
- package/dist/pdfium-direct-engine.d.ts +11 -5
- package/dist/pdfium-direct-engine.js +86 -18
- package/dist/pdfium-direct-engine.js.map +1 -1
- package/dist/pdfium-worker-engine.cjs +28 -7
- package/dist/pdfium-worker-engine.cjs.map +1 -1
- package/dist/pdfium-worker-engine.d.ts +10 -4
- package/dist/pdfium-worker-engine.js +28 -7
- package/dist/pdfium-worker-engine.js.map +1 -1
- package/dist/pdfium.cjs +86 -15
- package/dist/pdfium.cjs.map +1 -1
- package/dist/pdfium.d.ts +10 -4
- package/dist/pdfium.js +87 -16
- package/dist/pdfium.js.map +1 -1
- package/dist/worker.cjs +24 -4
- package/dist/worker.cjs.map +1 -1
- package/dist/worker.d.ts +9 -3
- package/dist/worker.js +24 -4
- package/dist/worker.js.map +1 -1
- package/package.json +3 -3
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { PdfEngine, Logger, Task, PdfErrorReason, PdfFileUrl, PdfUrlOptions, PdfDocumentObject, PdfFile, PdfFileLoader, PdfSignatureObject, PdfBookmarkObject, PdfPageObject, Rotation, PdfRenderOptions, PdfTask, Rect, PdfAnnotationObject, PdfAnnotationTransformation, PdfTextRectObject, PdfAttachmentObject, PdfWidgetAnnoObject, FormFieldValue, PdfPageFlattenFlag, PdfPageFlattenResult, PdfInkListObject, PdfStampAnnoObjectContents, Position, PdfPageGeometry, PdfGlyphObject, MatchFlag, SearchAllPagesResult, PdfImage } from '@embedpdf/models';
|
|
1
|
+
import { PdfEngine, Logger, Task, PdfErrorReason, PdfFileUrl, PdfUrlOptions, PdfDocumentObject, PdfFile, PdfFileLoader, PdfSignatureObject, PdfBookmarkObject, PdfPageObject, Rotation, PdfRenderOptions, ImageConversionTypes, PdfTask, Rect, PdfAnnotationObject, PdfAnnotationTransformation, PdfTextRectObject, PdfAttachmentObject, PdfWidgetAnnoObject, FormFieldValue, PdfPageFlattenFlag, PdfPageFlattenResult, PageTextSlice, PdfInkListObject, PdfStampAnnoObjectContents, Position, PdfPageGeometry, PdfGlyphObject, MatchFlag, SearchAllPagesResult, PdfImage } from '@embedpdf/models';
|
|
2
2
|
import { WrappedPdfiumModule } from '@embedpdf/pdfium';
|
|
3
3
|
|
|
4
4
|
/**
|
|
@@ -6,7 +6,7 @@ import { WrappedPdfiumModule } from '@embedpdf/pdfium';
|
|
|
6
6
|
* In browser: uses OffscreenCanvas
|
|
7
7
|
* In Node.js: can use Sharp or other image processing libraries
|
|
8
8
|
*/
|
|
9
|
-
type ImageDataConverter<T = Blob> = (imageData: PdfImage) => Promise<T>;
|
|
9
|
+
type ImageDataConverter<T = Blob> = (imageData: PdfImage, imageType?: ImageConversionTypes) => Promise<T>;
|
|
10
10
|
/**
|
|
11
11
|
* Pdf engine that based on pdfium wasm
|
|
12
12
|
*/
|
|
@@ -127,13 +127,13 @@ declare class PdfiumEngine<T = Blob> implements PdfEngine<T> {
|
|
|
127
127
|
*
|
|
128
128
|
* @public
|
|
129
129
|
*/
|
|
130
|
-
renderPage(doc: PdfDocumentObject, page: PdfPageObject, scaleFactor?: number, rotation?: Rotation, dpr?: number, options?: PdfRenderOptions): PdfTask<T>;
|
|
130
|
+
renderPage(doc: PdfDocumentObject, page: PdfPageObject, scaleFactor?: number, rotation?: Rotation, dpr?: number, options?: PdfRenderOptions, imageType?: ImageConversionTypes): PdfTask<T>;
|
|
131
131
|
/**
|
|
132
132
|
* {@inheritDoc @embedpdf/models!PdfEngine.renderPageRect}
|
|
133
133
|
*
|
|
134
134
|
* @public
|
|
135
135
|
*/
|
|
136
|
-
renderPageRect(doc: PdfDocumentObject, page: PdfPageObject, scaleFactor: number, rotation: Rotation, dpr: number, rect: Rect, options: PdfRenderOptions): PdfTask<T>;
|
|
136
|
+
renderPageRect(doc: PdfDocumentObject, page: PdfPageObject, scaleFactor: number, rotation: Rotation, dpr: number, rect: Rect, options: PdfRenderOptions, imageType?: ImageConversionTypes): PdfTask<T>;
|
|
137
137
|
/**
|
|
138
138
|
* {@inheritDoc @embedpdf/models!PdfEngine.getAllAnnotations}
|
|
139
139
|
*
|
|
@@ -213,6 +213,12 @@ declare class PdfiumEngine<T = Blob> implements PdfEngine<T> {
|
|
|
213
213
|
* @public
|
|
214
214
|
*/
|
|
215
215
|
extractText(doc: PdfDocumentObject, pageIndexes: number[]): Task<any, PdfErrorReason> | Task<string, PdfErrorReason>;
|
|
216
|
+
/**
|
|
217
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
218
|
+
*
|
|
219
|
+
* @public
|
|
220
|
+
*/
|
|
221
|
+
getTextSlices(doc: PdfDocumentObject, slices: PageTextSlice[]): PdfTask<string[]>;
|
|
216
222
|
/**
|
|
217
223
|
* {@inheritDoc @embedpdf/models!PdfEngine.merge}
|
|
218
224
|
*
|
|
@@ -986,6 +992,6 @@ declare class PdfiumEngine<T = Blob> implements PdfEngine<T> {
|
|
|
986
992
|
private searchAllInPage;
|
|
987
993
|
}
|
|
988
994
|
|
|
989
|
-
declare function createPdfiumEngine(wasmUrl: string): Promise<PdfiumEngine>;
|
|
995
|
+
declare function createPdfiumEngine(wasmUrl: string, logger?: Logger): Promise<PdfiumEngine>;
|
|
990
996
|
|
|
991
997
|
export { createPdfiumEngine };
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { NoopLogger, PdfTaskHelper, PdfErrorCode, Task, Rotation, PdfAnnotationSubtype, PdfPageObjectType, PdfAnnotationObjectStatus, quadToRect, PDF_FORM_FIELD_TYPE, toIntRect, transformRect, toIntSize, transformSize, PdfActionType, PdfZoomMode, AppearanceMode, MatchFlag } from '@embedpdf/models';
|
|
2
1
|
import { init } from '@embedpdf/pdfium';
|
|
2
|
+
import { NoopLogger, PdfTaskHelper, PdfErrorCode, Task, Rotation, PdfAnnotationSubtype, stripPdfUnwantedMarkers, PdfPageObjectType, PdfAnnotationObjectStatus, quadToRect, PDF_FORM_FIELD_TYPE, toIntRect, transformRect, toIntSize, transformSize, PdfActionType, PdfZoomMode, AppearanceMode, MatchFlag } from '@embedpdf/models';
|
|
3
3
|
|
|
4
4
|
/**
|
|
5
5
|
* Read string from WASM heap
|
|
@@ -265,7 +265,7 @@ var PdfiumErrorCode;
|
|
|
265
265
|
PdfiumErrorCode[PdfiumErrorCode["XFALoad"] = 7] = "XFALoad";
|
|
266
266
|
PdfiumErrorCode[PdfiumErrorCode["XFALayout"] = 8] = "XFALayout";
|
|
267
267
|
})(PdfiumErrorCode || (PdfiumErrorCode = {}));
|
|
268
|
-
const browserImageDataToBlobConverter = (pdfImageData) => {
|
|
268
|
+
const browserImageDataToBlobConverter = (pdfImageData, imageType = 'image/webp') => {
|
|
269
269
|
// Check if we're in a browser environment
|
|
270
270
|
if (typeof OffscreenCanvas === 'undefined') {
|
|
271
271
|
throw new Error('OffscreenCanvas is not available in this environment. ' +
|
|
@@ -275,7 +275,7 @@ const browserImageDataToBlobConverter = (pdfImageData) => {
|
|
|
275
275
|
const imageData = new ImageData(pdfImageData.data, pdfImageData.width, pdfImageData.height);
|
|
276
276
|
const off = new OffscreenCanvas(imageData.width, imageData.height);
|
|
277
277
|
off.getContext('2d').putImageData(imageData, 0, 0);
|
|
278
|
-
return off.convertToBlob({ type:
|
|
278
|
+
return off.convertToBlob({ type: imageType });
|
|
279
279
|
};
|
|
280
280
|
/**
|
|
281
281
|
* Pdf engine that based on pdfium wasm
|
|
@@ -781,7 +781,7 @@ class PdfiumEngine {
|
|
|
781
781
|
*
|
|
782
782
|
* @public
|
|
783
783
|
*/
|
|
784
|
-
renderPage(doc, page, scaleFactor = 1, rotation = Rotation.Degree0, dpr = 1, options = { withAnnotations: false }) {
|
|
784
|
+
renderPage(doc, page, scaleFactor = 1, rotation = Rotation.Degree0, dpr = 1, options = { withAnnotations: false }, imageType = 'image/webp') {
|
|
785
785
|
const task = new Task();
|
|
786
786
|
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'renderPage', doc, page, scaleFactor, rotation, dpr, options);
|
|
787
787
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPage`, 'Begin', `${doc.id}-${page.index}`);
|
|
@@ -798,7 +798,7 @@ class PdfiumEngine {
|
|
|
798
798
|
size: page.size,
|
|
799
799
|
}, scaleFactor, rotation, dpr, options);
|
|
800
800
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPage`, 'End', `${doc.id}-${page.index}`);
|
|
801
|
-
this.imageDataConverter(imageData).then((blob) => task.resolve(blob));
|
|
801
|
+
this.imageDataConverter(imageData, imageType).then((blob) => task.resolve(blob));
|
|
802
802
|
return task;
|
|
803
803
|
}
|
|
804
804
|
/**
|
|
@@ -806,7 +806,7 @@ class PdfiumEngine {
|
|
|
806
806
|
*
|
|
807
807
|
* @public
|
|
808
808
|
*/
|
|
809
|
-
renderPageRect(doc, page, scaleFactor, rotation, dpr, rect, options) {
|
|
809
|
+
renderPageRect(doc, page, scaleFactor, rotation, dpr, rect, options, imageType = 'image/webp') {
|
|
810
810
|
const task = new Task();
|
|
811
811
|
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'renderPageRect', doc, page, scaleFactor, rotation, dpr, rect, options);
|
|
812
812
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPageRect`, 'Begin', `${doc.id}-${page.index}`);
|
|
@@ -820,7 +820,7 @@ class PdfiumEngine {
|
|
|
820
820
|
}
|
|
821
821
|
const imageData = this.renderPageRectToImageData(ctx, page, rect, scaleFactor, rotation, dpr, options);
|
|
822
822
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `RenderPageRect`, 'End', `${doc.id}-${page.index}`);
|
|
823
|
-
this.imageDataConverter(imageData).then((blob) => task.resolve(blob));
|
|
823
|
+
this.imageDataConverter(imageData, imageType).then((blob) => task.resolve(blob));
|
|
824
824
|
return task;
|
|
825
825
|
}
|
|
826
826
|
/**
|
|
@@ -1362,6 +1362,62 @@ class PdfiumEngine {
|
|
|
1362
1362
|
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, `ExtractText`, 'End', doc.id);
|
|
1363
1363
|
return PdfTaskHelper.resolve(text);
|
|
1364
1364
|
}
|
|
1365
|
+
/**
|
|
1366
|
+
* {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
|
|
1367
|
+
*
|
|
1368
|
+
* @public
|
|
1369
|
+
*/
|
|
1370
|
+
getTextSlices(doc, slices) {
|
|
1371
|
+
this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices', doc, slices);
|
|
1372
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'Begin', doc.id);
|
|
1373
|
+
/* ⚠︎ 1 — trivial case */
|
|
1374
|
+
if (slices.length === 0) {
|
|
1375
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1376
|
+
return PdfTaskHelper.resolve([]);
|
|
1377
|
+
}
|
|
1378
|
+
/* ⚠︎ 2 — document must be open */
|
|
1379
|
+
const ctx = this.cache.getContext(doc.id);
|
|
1380
|
+
if (!ctx) {
|
|
1381
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1382
|
+
return PdfTaskHelper.reject({
|
|
1383
|
+
code: PdfErrorCode.DocNotOpen,
|
|
1384
|
+
message: 'document does not open',
|
|
1385
|
+
});
|
|
1386
|
+
}
|
|
1387
|
+
try {
|
|
1388
|
+
/* keep caller order */
|
|
1389
|
+
const out = new Array(slices.length);
|
|
1390
|
+
/* group → open each page once */
|
|
1391
|
+
const byPage = new Map();
|
|
1392
|
+
slices.forEach((s, i) => {
|
|
1393
|
+
(byPage.get(s.pageIndex) ?? byPage.set(s.pageIndex, []).get(s.pageIndex)).push({
|
|
1394
|
+
slice: s,
|
|
1395
|
+
pos: i,
|
|
1396
|
+
});
|
|
1397
|
+
});
|
|
1398
|
+
for (const [pageIdx, list] of byPage) {
|
|
1399
|
+
const pageCtx = ctx.acquirePage(pageIdx);
|
|
1400
|
+
const textPagePtr = pageCtx.getTextPage();
|
|
1401
|
+
for (const { slice, pos } of list) {
|
|
1402
|
+
const bufPtr = this.malloc(2 * (slice.charCount + 1)); // UTF-16 + NIL
|
|
1403
|
+
this.pdfiumModule.FPDFText_GetText(textPagePtr, slice.charIndex, slice.charCount, bufPtr);
|
|
1404
|
+
out[pos] = stripPdfUnwantedMarkers(this.pdfiumModule.pdfium.UTF16ToString(bufPtr));
|
|
1405
|
+
this.free(bufPtr);
|
|
1406
|
+
}
|
|
1407
|
+
pageCtx.release();
|
|
1408
|
+
}
|
|
1409
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1410
|
+
return PdfTaskHelper.resolve(out);
|
|
1411
|
+
}
|
|
1412
|
+
catch (e) {
|
|
1413
|
+
this.logger.error(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices error', e);
|
|
1414
|
+
this.logger.perf(LOG_SOURCE, LOG_CATEGORY, 'GetTextSlices', 'End', doc.id);
|
|
1415
|
+
return PdfTaskHelper.reject({
|
|
1416
|
+
code: PdfErrorCode.Unknown,
|
|
1417
|
+
message: String(e),
|
|
1418
|
+
});
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1365
1421
|
/**
|
|
1366
1422
|
* {@inheritDoc @embedpdf/models!PdfEngine.merge}
|
|
1367
1423
|
*
|
|
@@ -1891,14 +1947,12 @@ class PdfiumEngine {
|
|
|
1891
1947
|
const runs = [];
|
|
1892
1948
|
let current = null;
|
|
1893
1949
|
let curObjPtr = null;
|
|
1950
|
+
let bounds = null;
|
|
1894
1951
|
/** ── main loop ──────────────────────────────────────────── */
|
|
1895
1952
|
for (let i = 0; i < glyphs.length; i++) {
|
|
1896
1953
|
const g = glyphs[i];
|
|
1897
1954
|
/* 1 — find the CPDF_TextObject this glyph belongs to */
|
|
1898
1955
|
const objPtr = this.pdfiumModule.FPDFText_GetTextObject(textPagePtr, i);
|
|
1899
|
-
if (g.isEmpty) {
|
|
1900
|
-
continue;
|
|
1901
|
-
}
|
|
1902
1956
|
/* 2 — start a new run when the text object changes */
|
|
1903
1957
|
if (objPtr !== curObjPtr) {
|
|
1904
1958
|
curObjPtr = objPtr;
|
|
@@ -1912,6 +1966,12 @@ class PdfiumEngine {
|
|
|
1912
1966
|
charStart: i,
|
|
1913
1967
|
glyphs: [],
|
|
1914
1968
|
};
|
|
1969
|
+
bounds = {
|
|
1970
|
+
minX: g.origin.x,
|
|
1971
|
+
minY: g.origin.y,
|
|
1972
|
+
maxX: g.origin.x + g.size.width,
|
|
1973
|
+
maxY: g.origin.y + g.size.height,
|
|
1974
|
+
};
|
|
1915
1975
|
runs.push(current);
|
|
1916
1976
|
}
|
|
1917
1977
|
/* 3 — append the slim glyph record */
|
|
@@ -1920,16 +1980,24 @@ class PdfiumEngine {
|
|
|
1920
1980
|
y: g.origin.y,
|
|
1921
1981
|
width: g.size.width,
|
|
1922
1982
|
height: g.size.height,
|
|
1923
|
-
flags: g.isSpace ? 1 : 0,
|
|
1983
|
+
flags: g.isEmpty ? 2 : g.isSpace ? 1 : 0,
|
|
1924
1984
|
});
|
|
1925
1985
|
/* 4 — expand the run's bounding rect */
|
|
1986
|
+
if (g.isEmpty) {
|
|
1987
|
+
continue;
|
|
1988
|
+
}
|
|
1926
1989
|
const right = g.origin.x + g.size.width;
|
|
1927
1990
|
const bottom = g.origin.y + g.size.height;
|
|
1928
|
-
|
|
1929
|
-
|
|
1930
|
-
|
|
1931
|
-
|
|
1932
|
-
|
|
1991
|
+
// Update bounds
|
|
1992
|
+
bounds.minX = Math.min(bounds.minX, g.origin.x);
|
|
1993
|
+
bounds.minY = Math.min(bounds.minY, g.origin.y);
|
|
1994
|
+
bounds.maxX = Math.max(bounds.maxX, right);
|
|
1995
|
+
bounds.maxY = Math.max(bounds.maxY, bottom);
|
|
1996
|
+
// Calculate final rect from bounds
|
|
1997
|
+
current.rect.x = bounds.minX;
|
|
1998
|
+
current.rect.y = bounds.minY;
|
|
1999
|
+
current.rect.width = bounds.maxX - bounds.minX;
|
|
2000
|
+
current.rect.height = bounds.maxY - bounds.minY;
|
|
1933
2001
|
}
|
|
1934
2002
|
return runs;
|
|
1935
2003
|
}
|
|
@@ -4024,11 +4092,11 @@ class PdfiumEngine {
|
|
|
4024
4092
|
}
|
|
4025
4093
|
}
|
|
4026
4094
|
|
|
4027
|
-
async function createPdfiumEngine(wasmUrl) {
|
|
4095
|
+
async function createPdfiumEngine(wasmUrl, logger) {
|
|
4028
4096
|
const response = await fetch(wasmUrl);
|
|
4029
4097
|
const wasmBinary = await response.arrayBuffer();
|
|
4030
4098
|
const wasmModule = await init({ wasmBinary });
|
|
4031
|
-
return new PdfiumEngine(wasmModule);
|
|
4099
|
+
return new PdfiumEngine(wasmModule, logger);
|
|
4032
4100
|
}
|
|
4033
4101
|
|
|
4034
4102
|
export { createPdfiumEngine };
|