@embedpdf/engines 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1364,6 +1364,62 @@ class PdfiumEngine {
1364
1364
  this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, `ExtractText`, 'End', doc.id);
1365
1365
  return models.PdfTaskHelper.resolve(text);
1366
1366
  }
1367
+ /**
1368
+ * {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
1369
+ *
1370
+ * @public
1371
+ */
1372
+ getTextSlices(doc, slices) {
1373
+ this.logger.debug(LOG_SOURCE$2, LOG_CATEGORY$2, 'getTextSlices', doc, slices);
1374
+ this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'Begin', doc.id);
1375
+ /* ⚠︎ 1 — trivial case */
1376
+ if (slices.length === 0) {
1377
+ this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
1378
+ return models.PdfTaskHelper.resolve([]);
1379
+ }
1380
+ /* ⚠︎ 2 — document must be open */
1381
+ const ctx = this.cache.getContext(doc.id);
1382
+ if (!ctx) {
1383
+ this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
1384
+ return models.PdfTaskHelper.reject({
1385
+ code: models.PdfErrorCode.DocNotOpen,
1386
+ message: 'document does not open',
1387
+ });
1388
+ }
1389
+ try {
1390
+ /* keep caller order */
1391
+ const out = new Array(slices.length);
1392
+ /* group → open each page once */
1393
+ const byPage = new Map();
1394
+ slices.forEach((s, i) => {
1395
+ (byPage.get(s.pageIndex) ?? byPage.set(s.pageIndex, []).get(s.pageIndex)).push({
1396
+ slice: s,
1397
+ pos: i,
1398
+ });
1399
+ });
1400
+ for (const [pageIdx, list] of byPage) {
1401
+ const pageCtx = ctx.acquirePage(pageIdx);
1402
+ const textPagePtr = pageCtx.getTextPage();
1403
+ for (const { slice, pos } of list) {
1404
+ const bufPtr = this.malloc(2 * (slice.charCount + 1)); // UTF-16 + NIL
1405
+ this.pdfiumModule.FPDFText_GetText(textPagePtr, slice.charIndex, slice.charCount, bufPtr);
1406
+ out[pos] = models.stripPdfUnwantedMarkers(this.pdfiumModule.pdfium.UTF16ToString(bufPtr));
1407
+ this.free(bufPtr);
1408
+ }
1409
+ pageCtx.release();
1410
+ }
1411
+ this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
1412
+ return models.PdfTaskHelper.resolve(out);
1413
+ }
1414
+ catch (e) {
1415
+ this.logger.error(LOG_SOURCE$2, LOG_CATEGORY$2, 'getTextSlices error', e);
1416
+ this.logger.perf(LOG_SOURCE$2, LOG_CATEGORY$2, 'GetTextSlices', 'End', doc.id);
1417
+ return models.PdfTaskHelper.reject({
1418
+ code: models.PdfErrorCode.Unknown,
1419
+ message: String(e),
1420
+ });
1421
+ }
1422
+ }
1367
1423
  /**
1368
1424
  * {@inheritDoc @embedpdf/models!PdfEngine.merge}
1369
1425
  *
@@ -1893,14 +1949,12 @@ class PdfiumEngine {
1893
1949
  const runs = [];
1894
1950
  let current = null;
1895
1951
  let curObjPtr = null;
1952
+ let bounds = null;
1896
1953
  /** ── main loop ──────────────────────────────────────────── */
1897
1954
  for (let i = 0; i < glyphs.length; i++) {
1898
1955
  const g = glyphs[i];
1899
1956
  /* 1 — find the CPDF_TextObject this glyph belongs to */
1900
1957
  const objPtr = this.pdfiumModule.FPDFText_GetTextObject(textPagePtr, i);
1901
- if (g.isEmpty) {
1902
- continue;
1903
- }
1904
1958
  /* 2 — start a new run when the text object changes */
1905
1959
  if (objPtr !== curObjPtr) {
1906
1960
  curObjPtr = objPtr;
@@ -1914,6 +1968,12 @@ class PdfiumEngine {
1914
1968
  charStart: i,
1915
1969
  glyphs: [],
1916
1970
  };
1971
+ bounds = {
1972
+ minX: g.origin.x,
1973
+ minY: g.origin.y,
1974
+ maxX: g.origin.x + g.size.width,
1975
+ maxY: g.origin.y + g.size.height,
1976
+ };
1917
1977
  runs.push(current);
1918
1978
  }
1919
1979
  /* 3 — append the slim glyph record */
@@ -1922,16 +1982,24 @@ class PdfiumEngine {
1922
1982
  y: g.origin.y,
1923
1983
  width: g.size.width,
1924
1984
  height: g.size.height,
1925
- flags: g.isSpace ? 1 : 0,
1985
+ flags: g.isEmpty ? 2 : g.isSpace ? 1 : 0,
1926
1986
  });
1927
1987
  /* 4 — expand the run's bounding rect */
1988
+ if (g.isEmpty) {
1989
+ continue;
1990
+ }
1928
1991
  const right = g.origin.x + g.size.width;
1929
1992
  const bottom = g.origin.y + g.size.height;
1930
- current.rect.width =
1931
- Math.max(current.rect.x + current.rect.width, right) - current.rect.x;
1932
- current.rect.y = Math.min(current.rect.y, g.origin.y);
1933
- current.rect.height =
1934
- Math.max(current.rect.y + current.rect.height, bottom) - current.rect.y;
1993
+ // Update bounds
1994
+ bounds.minX = Math.min(bounds.minX, g.origin.x);
1995
+ bounds.minY = Math.min(bounds.minY, g.origin.y);
1996
+ bounds.maxX = Math.max(bounds.maxX, right);
1997
+ bounds.maxY = Math.max(bounds.maxY, bottom);
1998
+ // Calculate final rect from bounds
1999
+ current.rect.x = bounds.minX;
2000
+ current.rect.y = bounds.minY;
2001
+ current.rect.width = bounds.maxX - bounds.minX;
2002
+ current.rect.height = bounds.maxY - bounds.minY;
1935
2003
  }
1936
2004
  return runs;
1937
2005
  }
@@ -4177,6 +4245,9 @@ class EngineRunner {
4177
4245
  case 'extractText':
4178
4246
  task = this.engine[name](...args);
4179
4247
  break;
4248
+ case 'getTextSlices':
4249
+ task = this.engine[name](...args);
4250
+ break;
4180
4251
  case 'getPageGlyphs':
4181
4252
  task = this.engine[name](...args);
4182
4253
  break;
@@ -4937,6 +5008,26 @@ class WebWorkerEngine {
4937
5008
  this.proxy(task, request);
4938
5009
  return task;
4939
5010
  }
5011
+ /**
5012
+ * {@inheritDoc @embedpdf/models!PdfEngine.getTextSlices}
5013
+ *
5014
+ * @public
5015
+ */
5016
+ getTextSlices(doc, slices) {
5017
+ this.logger.debug(LOG_SOURCE, LOG_CATEGORY, 'getTextSlices', doc, slices);
5018
+ const requestId = this.generateRequestId(doc.id);
5019
+ const task = new WorkerTask(this.worker, requestId);
5020
+ const request = {
5021
+ id: requestId,
5022
+ type: 'ExecuteRequest',
5023
+ data: {
5024
+ name: 'getTextSlices',
5025
+ args: [doc, slices],
5026
+ },
5027
+ };
5028
+ this.proxy(task, request);
5029
+ return task;
5030
+ }
4940
5031
  /**
4941
5032
  * {@inheritDoc @embedpdf/models!PdfEngine.getPageGlyphs}
4942
5033
  *
@@ -5301,6 +5392,9 @@ function createMockPdfEngine(partialEngine) {
5301
5392
  extractText: (pdf, pageIndexes) => {
5302
5393
  return models.PdfTaskHelper.resolve('');
5303
5394
  },
5395
+ getTextSlices: (doc, slices) => {
5396
+ return models.PdfTaskHelper.resolve([]);
5397
+ },
5304
5398
  getPageGlyphs: (doc, page) => {
5305
5399
  return models.PdfTaskHelper.resolve([]);
5306
5400
  },