npm - scribe.js-ocr - Versions diffs - 0.7.4 → 0.9.0 - Mend

scribe.js-ocr 0.7.4 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

package/build-deno-compile.sh +30 -0
package/cli/cli.js +46 -18
package/cli/detectPDFType.js +1 -2
package/cli/extract.js +14 -7
package/cli/main.js +39 -39
package/cli/require.js +1 -1
package/cli/scribe.js +12 -11
package/fonts/Dingbats.woff +0 -0
package/fonts/all/URWGothicBook-Bold.woff +0 -0
package/fonts/all/URWGothicBook-BoldItalic.woff +0 -0
package/fonts/all/URWGothicBook-Italic.woff +0 -0
package/fonts/all/URWGothicBook-Regular.woff +0 -0
package/fonts/latin/URWGothicBook-Bold.woff +0 -0
package/fonts/latin/URWGothicBook-BoldItalic.woff +0 -0
package/fonts/latin/URWGothicBook-Italic.woff +0 -0
package/fonts/latin/URWGothicBook-Regular.woff +0 -0
package/js/canvasAdapter.js +4 -1
package/js/clear.js +7 -8
package/js/containers/app.js +2 -0
package/js/containers/dataContainer.js +1 -4
package/js/containers/fontContainer.js +59 -44
package/js/containers/imageContainer.js +13 -35
package/js/coordinates.js +3 -3
package/js/debug.js +2 -2
package/js/export/export.js +103 -18
package/js/export/exportDebugCsv.js +4 -3
package/js/export/pdf/writePdf.js +389 -0
package/js/export/{writePdfFonts.js → pdf/writePdfFonts.js} +16 -12
package/js/export/pdf/writePdfImages.js +218 -0
package/js/export/{writePdf.js → pdf/writePdfText.js} +28 -315
package/js/export/writeDocx.js +12 -5
package/js/export/writeHocr.js +11 -10
package/js/export/writeHtml.js +208 -48
package/js/export/writeTabular.js +31 -20
package/js/export/writeText.js +12 -10
package/js/fontContainerMain.js +101 -50
package/js/fontEval.js +18 -14
package/js/fontStatistics.js +90 -90
package/js/generalWorkerMain.js +52 -6
package/js/global.d.ts +178 -6
package/js/import/convertDocTextract.js +447 -0
package/js/import/convertPageAbbyy.js +10 -4
package/js/import/convertPageBlocks.js +4 -4
package/js/import/convertPageGoogleVision.js +204 -0
package/js/import/convertPageHocr.js +3 -3
package/js/import/convertPageShared.js +1 -0
package/js/import/convertPageStext.js +18 -10
package/js/import/convertPageText.js +289 -0
package/js/import/import.js +133 -125
package/js/import/importOCR.js +98 -46
package/js/import/nodeAdapter.js +2 -2
package/js/modifyOCR.js +6 -5
package/js/nudge.js +3 -3
package/js/objects/{fontMetricsObjects.js → charMetricsObjects.js} +12 -12
package/js/objects/imageObjects.js +3 -2
package/js/objects/layoutObjects.js +37 -0
package/js/objects/ocrObjects.js +51 -3
package/js/recognizeConvert.js +74 -23
package/js/utils/fontUtils.js +32 -1
package/js/utils/imageUtils.js +99 -0
package/js/utils/miscUtils.js +158 -9
package/js/utils/reflowPars.js +4 -0
package/js/worker/compareOCRModule.js +20 -18
package/js/worker/generalWorker.js +12 -6
package/js/worker/optimizeFontModule.js +19 -19
package/mupdf/libmupdf.js +3 -3
package/mupdf/libmupdf.wasm +0 -0
package/mupdf/mupdf-async.js +1 -1
package/mupdf/mupdf-worker.js +9 -4
package/package.json +7 -4
package/scribe.js +5 -5
package/tess/tesseract.esm.min.js +1 -1
package/tess/tesseract.min.js +1 -1
package/tess/worker.min.js +1 -1

package/js/export/pdf/writePdf.js ADDED Viewed

@@ -0,0 +1,389 @@
+import { FontCont } from '../../containers/fontContainer.js';
+import { createEmbeddedFontType0, createEmbeddedFontType1 } from './writePdfFonts.js';
+import { createEmbeddedImages, createImageResourceDict, drawImageCommands } from './writePdfImages.js';
+import { opt } from '../../containers/app.js';
+import { ocrPageToPDFStream } from './writePdfText.js';
+import { getDistinctCharsFont, subsetFont } from '../../utils/fontUtils.js';
+// Creates 3 PDF objects necessary to embed font.
+// These are (1) the font dictionary, (2) the font descriptor, and (3) the font file,
+// which will be located at objects firstObjIndex, firstObjIndex + 1, and firstObjIndex + 2 (respectively).
+/**
+ * Create a PDF from an array of ocrPage objects.
+ *
+ * @param {Object} params
+ * @param {Array<OcrPage>} params.ocrArr -
+ * @param {PageMetrics[]} params.pageMetricsArr -
+ * @param {number} [params.minpage=0] -
+ * @param {number} [params.maxpage=-1] -
+ * @param {("ebook"|"eval"|"proof"|"invis")} [params.textMode="ebook"] -
+ * @param {boolean} [params.rotateText=false] -
+ * @param {boolean} [params.rotateBackground=false] -
+ * @param {dims} [params.dimsLimit] -
+ * @param {number} [params.confThreshHigh=85] -
+ * @param {number} [params.confThreshMed=75] -
+ * @param {number} [params.proofOpacity=0.8] -
+ * @param {Array<ImageWrapper>} [params.images=[]] - Array of images to include in PDF
+ * @param {boolean} [params.includeImages=false] - Whether to include images in the PDF
+ *
+ * A valid PDF will be created if an empty array is provided for `ocrArr`, as long as `maxpage` is set manually.
+ */
+export async function writePdf({
+  ocrArr,
+  pageMetricsArr,
+  minpage = 0,
+  maxpage = -1,
+  textMode = 'ebook',
+  rotateText = false,
+  rotateBackground = false,
+  dimsLimit = { width: -1, height: -1 },
+  confThreshHigh = 85,
+  confThreshMed = 75,
+  proofOpacity = 0.8,
+  images = [],
+  includeImages = false,
+}) {
+  if (!FontCont.raw) throw new Error('No fonts loaded.');
+  if (maxpage === -1) {
+    maxpage = ocrArr.length - 1;
+  }
+  // This can happen if (1) `ocrArr` is length 0 and (2) `maxpage` is left as the default (-1).
+  if (maxpage < 0) throw new Error('PDF with negative page count requested.');
+  let fontI = 0;
+  let objectI = 3;
+  /** @type {Object<string, PdfFontFamily>} */
+  const pdfFonts = {};
+  /** @type {{familyKey: string, key: string}[]} */
+  const pdfFontRefs = [];
+  /** @type {string[][]} */
+  const pdfFontObjStrArr = [];
+  /** @type {Set<PdfFontInfo>} */
+  const pdfFontsUsed = new Set();
+  /**
+   *
+   * @param {string} familyKey
+   * @param {FontContainerFamily} familyObj
+   */
+  const addFontFamilyRef = async (familyKey, familyObj) => {
+    pdfFonts[familyKey] = {};
+    for (const [key, value] of Object.entries(familyObj)) {
+      // This should include both (1) if this is a standard 14 font and (2) if characters outside of the Windows-1252 range are used.
+      // If the latter is true, then a composite font is needed, even if the font is a standard 14 font.
+      // TODO: We currently have no mechanism for resolving name conflicts between fonts in the base and overlay document.
+      // As a workaround, we use the names `/FO[n]` rather than the more common `/F[n]`.
+      // However, this likely will cause issues if this application is used to create visible text, and then the resulting PDF is uploaded.
+      // This would move the fonts from the overlay document to the base document, and the names would conflict.
+      const isStandardFont = false;
+      if (isStandardFont) {
+        pdfFonts[familyKey][key] = {
+          type: 1, index: fontI, name: `/FO${String(fontI)}`, objN: objectI, opentype: value.opentype,
+        };
+        pdfFontRefs.push({ familyKey, key });
+        pdfFontObjStrArr.push(null);
+        objectI += 3;
+      } else {
+        pdfFonts[familyKey][key] = {
+          type: 0, index: fontI, name: `/FO${String(fontI)}`, objN: objectI, opentype: value.opentype,
+        };
+        pdfFontRefs.push({ familyKey, key });
+        pdfFontObjStrArr.push(null);
+        objectI += 6;
+      }
+      fontI++;
+    }
+  };
+  // Create reference to all fonts.
+  // Only the fonts that are actually used will be included in the final PDF.
+  for (const familyKeyI of Object.keys(FontCont.raw)) {
+    const useOpt = FontCont.useOptFamily(familyKeyI);
+    const familyObjI = {
+      normal: useOpt && FontCont.opt?.[familyKeyI]?.normal ? FontCont.opt[familyKeyI].normal : FontCont.raw[familyKeyI].normal,
+      italic: useOpt && FontCont.opt?.[familyKeyI]?.italic ? FontCont.opt[familyKeyI].italic : FontCont.raw[familyKeyI].italic,
+      bold: useOpt && FontCont.opt?.[familyKeyI]?.bold ? FontCont.opt[familyKeyI].bold : FontCont.raw[familyKeyI].bold,
+      boldItalic: useOpt && FontCont.opt?.[familyKeyI]?.boldItalic ? FontCont.opt[familyKeyI].boldItalic : FontCont.raw[familyKeyI].boldItalic,
+    };
+    await addFontFamilyRef(familyKeyI, familyObjI);
+  }
+  if (FontCont.doc) {
+    for (const familyKeyI of Object.keys(FontCont.doc)) {
+      await addFontFamilyRef(familyKeyI, FontCont.doc[familyKeyI]);
+    }
+  }
+  if (FontCont.supp.chi_sim) {
+    const charArr = getDistinctCharsFont(ocrArr, FontCont.supp.chi_sim.family);
+    if (charArr.length > 0) {
+      const fontExport = await subsetFont(FontCont.supp.chi_sim.opentype, charArr);
+      pdfFonts.NotoSansSC = {};
+      pdfFonts.NotoSansSC.normal = {
+        type: 0, index: fontI, name: `/FO${String(fontI)}`, objN: objectI, opentype: fontExport,
+      };
+      pdfFontRefs.push({ familyKey: 'NotoSansSC', key: 'normal' });
+      pdfFontObjStrArr.push(null);
+      objectI += 6;
+      fontI++;
+    }
+  }
+  // Add images [WIP]
+  /** @type {Array<string>} */
+  const pdfImageObjStrArr = [];
+  const imageObjIndices = [];
+  if (includeImages && images && images.length > 0) {
+    const imageObjects = createEmbeddedImages(images, objectI);
+    for (let i = 0; i < imageObjects.length; i++) {
+      pdfImageObjStrArr.push(imageObjects[i]);
+      imageObjIndices.push(objectI + i);
+    }
+    objectI += imageObjects.length;
+  }
+  /** @type {Array<string>} */
+  const pdfPageObjStrArr = [];
+  // Add pages
+  const pageIndexArr = [];
+  for (let i = minpage; i <= maxpage; i++) {
+    const angle = pageMetricsArr[i].angle || 0;
+    const { dims } = pageMetricsArr[i];
+    // eslint-disable-next-line no-await-in-loop
+    const { pdfObj, pdfFontsUsed: pdfFontsUsedI } = (await ocrPageToPDF({
+      pageObj: ocrArr[i],
+      inputDims: dims,
+      outputDims: dimsLimit,
+      firstObjIndex: objectI,
+      parentIndex: 2,
+      proofOpacity,
+      pdfFonts,
+      textMode,
+      angle,
+      rotateText,
+      rotateBackground,
+      confThreshHigh,
+      confThreshMed,
+      imageObjIndices,
+      includeImages,
+    }));
+    for (const font of pdfFontsUsedI) {
+      pdfFontsUsed.add(font);
+    }
+    for (let j = 0; j < pdfObj.length; j++) {
+      pdfPageObjStrArr.push(pdfObj[j]);
+    }
+    // This assumes the "page" is always the first object returned by `ocrPageToPDF`.
+    pageIndexArr.push(objectI);
+    objectI += pdfObj.length;
+    opt.progressHandler({ n: i, type: 'export', info: { } });
+  }
+  // Create font objects for fonts that are used
+  for (const pdfFont of pdfFontsUsed) {
+    if (pdfFont.opentype?.names?.postScriptName?.en === 'NotoSansSC-Regular') continue;
+    const isStandardFont = false;
+    if (isStandardFont) {
+      pdfFontObjStrArr[pdfFont.index] = createEmbeddedFontType1(pdfFont.opentype, pdfFont.objN);
+    } else {
+      pdfFontObjStrArr[pdfFont.index] = createEmbeddedFontType0({ font: pdfFont.opentype, firstObjIndex: pdfFont.objN });
+    }
+  }
+  /** @type {Array<string>} */
+  const pdfObjStrArr = [];
+  let pdfOut = '%PDF-1.7\n%µ¶n\n';
+  pdfObjStrArr.push('1 0 obj\n<</Type /Catalog\n/Pages 2 0 R>>\nendobj\n\n');
+  let pagesObjStr = '2 0 obj\n<</Type /Pages\n/Kids [';
+  for (let i = 0; i < (maxpage - minpage + 1); i++) {
+    pagesObjStr += `${String(pageIndexArr[i])} 0 R\n`;
+  }
+  pagesObjStr += `]\n/Count ${String(maxpage - minpage + 1)}>>\nendobj\n\n`;
+  pdfObjStrArr.push(pagesObjStr);
+  /** @type {{type: string, offset: number}[]} */
+  const xrefArr = [];
+  for (let i = 0; i < pdfObjStrArr.length; i++) {
+    xrefArr.push({ type: 'obj', offset: pdfOut.length + 2 });
+    pdfOut += pdfObjStrArr[i];
+  }
+  for (let i = 0; i < pdfFontRefs.length; i++) {
+    if (pdfFontObjStrArr[i]) {
+      for (let j = 0; j < pdfFontObjStrArr[i].length; j++) {
+        xrefArr.push({ type: 'obj', offset: pdfOut.length + 2 });
+        pdfOut += pdfFontObjStrArr[i][j];
+      }
+    } else {
+      xrefArr.push({ type: 'free', offset: 0 });
+      xrefArr.push({ type: 'free', offset: 0 });
+      xrefArr.push({ type: 'free', offset: 0 });
+      xrefArr.push({ type: 'free', offset: 0 });
+      xrefArr.push({ type: 'free', offset: 0 });
+      xrefArr.push({ type: 'free', offset: 0 });
+    }
+  }
+  for (let i = 0; i < pdfImageObjStrArr.length; i++) {
+    xrefArr.push({ type: 'obj', offset: pdfOut.length + 2 });
+    pdfOut += pdfImageObjStrArr[i];
+  }
+  for (let i = 0; i < pdfPageObjStrArr.length; i++) {
+    xrefArr.push({ type: 'obj', offset: pdfOut.length + 2 });
+    pdfOut += pdfPageObjStrArr[i];
+  }
+  // The 0th object always exists, and contains no meaningful data.
+  const objCount = pdfObjStrArr.length + pdfFontRefs.length * 6 + pdfImageObjStrArr.length + pdfPageObjStrArr.length + 1;
+  const xrefOffset = pdfOut.length + 2;
+  let xrefStr = `xref\n0 ${objCount}\n`;
+  xrefStr += '0000000000 65535 f\n';
+  for (let i = 0; i < xrefArr.length; i++) {
+    if (xrefArr[i].type === 'obj') {
+      xrefStr += `${String(xrefArr[i].offset).padStart(10, '0')} 00000 n\n`;
+    } else {
+      xrefStr += '0000000000 65535 f\n';
+    }
+  }
+  xrefStr += `trailer
+  <<  /Root 1 0 R
+      /Size ${objCount}
+  >>
+startxref
+${xrefOffset}
+%%EOF`;
+  pdfOut += xrefStr;
+  return pdfOut;
+}
+/**
+ * Generates PDF objects for a single page of OCR data.
+ * Generally returns an array of 2 strings, the first being the text content object, and the second being the page object.
+ * If there is no text content, only the page object is returned.
+ * @param {Object} params - Parameters object
+ * @param {OcrPage} params.pageObj
+ * @param {dims} params.inputDims
+ * @param {dims} params.outputDims
+ * @param {number} params.firstObjIndex
+ * @param {number} params.parentIndex
+ * @param {number} params.proofOpacity
+ * @param {Object<string, PdfFontFamily>} params.pdfFonts
+ * @param {("ebook"|"eval"|"proof"|"invis")} params.textMode -
+ * @param {number} params.angle
+ * @param {boolean} [params.rotateText=false]
+ * @param {boolean} [params.rotateBackground=false]
+ * @param {number} [params.confThreshHigh=85]
+ * @param {number} [params.confThreshMed=75]
+ * @param {?import('opentype.js').Font} [params.fontChiSim=null]
+ * @param {Array<number>} [params.imageObjIndices=[]] - Array of image object indices
+ * @param {boolean} [params.includeImages=false] - Whether to include images
+ */
+async function ocrPageToPDF({
+  pageObj,
+  inputDims,
+  outputDims,
+  firstObjIndex,
+  parentIndex,
+  proofOpacity,
+  pdfFonts,
+  textMode,
+  angle,
+  rotateText = false,
+  rotateBackground = false,
+  confThreshHigh = 85,
+  confThreshMed = 75,
+  imageObjIndices = [],
+  includeImages = false,
+}) {
+  if (outputDims.width < 1) {
+    outputDims = inputDims;
+  }
+  const noTextContent = !pageObj || pageObj.lines.length === 0;
+  const noImageContent = !includeImages || imageObjIndices.length === 0;
+  const pageIndex = firstObjIndex;
+  let pageObjStr = `${String(pageIndex)} 0 obj\n<</Type/Page/MediaBox[0 0 ${String(outputDims.width)} ${String(outputDims.height)}]`;
+  if (noTextContent && noImageContent) {
+    pageObjStr += '/Resources<<>>';
+    pageObjStr += `/Parent ${parentIndex} 0 R>>\nendobj\n\n`;
+    return { pdfObj: [pageObjStr], pdfFontsUsed: /** @type {Set<PdfFontInfo>} */ (new Set()) };
+  }
+  pageObjStr += `/Contents ${String(firstObjIndex + 2)} 0 R`;
+  let imageContentObjStr = '';
+  if (includeImages && imageObjIndices.length > 0) {
+    if (imageObjIndices.length > 0) {
+      let rotation = 0;
+      if (rotateBackground && Math.abs(angle ?? 0) > 0.05) {
+        rotation = angle;
+      }
+      imageContentObjStr += drawImageCommands(0, 0, 0, outputDims.width, outputDims.height, rotation);
+    }
+  }
+  const { textContentObjStr, pdfFontsUsed } = await ocrPageToPDFStream(pageObj, outputDims, pdfFonts, textMode, angle,
+    rotateText, rotateBackground, confThreshHigh, confThreshMed);
+  let pdfFontsStr = '';
+  for (const font of pdfFontsUsed) {
+    pdfFontsStr += `${String(font.name)} ${String(font.objN)} 0 R\n`;
+  }
+  let resourceDictObjStr = `${String(firstObjIndex + 1)} 0 obj\n<<`;
+  resourceDictObjStr += `/Font<<${pdfFontsStr}>>`;
+  if (includeImages && imageObjIndices.length > 0) {
+    const imageResourceStr = createImageResourceDict(imageObjIndices);
+    resourceDictObjStr += imageResourceStr;
+  }
+  // Use `GSO` prefix to avoid conflicts with other graphics states, which are normally named `/GS[n]` by convention.
+  resourceDictObjStr += '/ExtGState<<';
+  resourceDictObjStr += '/GSO0 <</ca 0.0>>';
+  resourceDictObjStr += `/GSO1 <</ca ${proofOpacity}>>`;
+  resourceDictObjStr += '>>';
+  resourceDictObjStr += '>>\nendobj\n\n';
+  const pageResourceStr = `/Resources ${String(firstObjIndex + 1)} 0 R`;
+  pageObjStr += `${pageResourceStr}/Parent ${parentIndex} 0 R>>\nendobj\n\n`;
+  const pageContentObjStr = `${String(firstObjIndex + 2)} 0 obj\n<</Length ${String(imageContentObjStr.length + textContentObjStr.length)} >>\nstream\n${imageContentObjStr}${textContentObjStr}\nendstream\nendobj\n\n`;
+  return {
+    pdfObj: [pageObjStr, resourceDictObjStr, pageContentObjStr], pdfFontsUsed,
+  };
+}

package/js/export/{writePdfFonts.js → pdf/writePdfFonts.js} RENAMED Viewed

@@ -1,8 +1,8 @@
 // Function for converting from bufferArray to hex (string)
 // Taken from https://stackoverflow.com/questions/40031688/javascript-arraybuffer-to-hex
-import { win1252Chars } from '../../fonts/encoding.js';
-import { determineSansSerif } from '../utils/miscUtils.js';
+import { win1252Chars } from '../../../fonts/encoding.js';
+import { determineSansSerif } from '../../utils/miscUtils.js';
 /** @type {Array<string>} */
 const byteToHex = [];
@@ -15,17 +15,18 @@ for (let n = 0; n <= 0xff; ++n) {
 /**
  * Converts an ArrayBuffer to a hexadecimal string.
  *
- * @param {ArrayBuffer} arrayBuffer - The ArrayBuffer to be converted.
+ * @param {ArrayBufferLike} arrayBuffer - The ArrayBuffer to be converted.
  * @returns {string} The hexadecimal representation of the ArrayBuffer.
  */
 export function hex(arrayBuffer) {
   const buff = new Uint8Array(arrayBuffer);
-  /** @type {Array<string>} */
-  const hexOctets = []; // new Array(buff.length) is even faster (preallocates necessary array size), then use hexOctets[i] instead of .push()
-  for (let i = 0; i < buff.length; ++i) hexOctets.push(byteToHex[buff[i]]);
+  let hexOctets = '';
+  for (let i = 0; i < buff.length; ++i) {
+    if (i % 32 === 0 && i !== 0) hexOctets += '\n';
+    hexOctets += byteToHex[buff[i]];
+  }
-  return hexOctets.join('');
+  return hexOctets;
 }
 /**
@@ -248,16 +249,19 @@ export function createEmbeddedFontType1(font, firstObjIndex, italic = false, isS
  * Converts a Opentype.js font object into an array of strings for adding to a PDF.
  * The font is represented as a composite "Type 0" font.
  *
- * @param {opentype.Font} font - Opentype.js font object
- * @param {number} firstObjIndex - Index for the first PDF object
- * @param {boolean} [italic=false] - Whether the font is italic.
+ * @param {Object} options - Configuration object
+ * @param {opentype.Font} options.font - Opentype.js font object
+ * @param {number} options.firstObjIndex - Index for the first PDF object
+ * @param {boolean} [options.italic=false] - Whether the font is italic.
  *
  * This function does not produce "toUnicode" or "Widths" objects,
  * so any PDF it creates directly will lack usable copy/paste.
  * However, both of these objects will be created from the embedded file
  * when the result is run through mupdf.
  */
-export function createEmbeddedFontType0(font, firstObjIndex, italic = false) {
+export function createEmbeddedFontType0({
+  font, firstObjIndex, italic = false,
+}) {
   // Start 1st object: Font Dictionary
   let fontDictObjStr = `${String(firstObjIndex)} 0 obj\n<</Type/Font/Subtype/Type0`;

package/js/export/pdf/writePdfImages.js ADDED Viewed

@@ -0,0 +1,218 @@
+/* eslint-disable no-bitwise */
+import { imageUtils } from '../../objects/imageObjects.js';
+import { base64ToBytes, getPngIHDRInfo } from '../../utils/imageUtils.js';
+import { hex } from './writePdfFonts.js';
+/**
+ * Extracts the concatenated data from all IDAT chunks of a PNG file.
+ * @param {Uint8Array} pngBytes - The raw bytes of the PNG file.
+ * @returns {Uint8Array} The concatenated zlib-compressed image data.
+ */
+function extractPngIdatData(pngBytes) {
+  // PNG signature
+  const signature = [137, 80, 78, 71, 13, 10, 26, 10];
+  for (let i = 0; i < 8; i++) {
+    if (pngBytes[i] !== signature[i]) {
+      throw new Error('Invalid PNG file signature');
+    }
+  }
+  let offset = 8;
+  const idatChunks = [];
+  while (offset < pngBytes.length) {
+    // Read chunk length directly from bytes (big-endian)
+    const length = (pngBytes[offset] << 24)
+                   | (pngBytes[offset + 1] << 16)
+                   | (pngBytes[offset + 2] << 8)
+                   | pngBytes[offset + 3];
+    offset += 4;
+    const type = String.fromCharCode(
+      pngBytes[offset],
+      pngBytes[offset + 1],
+      pngBytes[offset + 2],
+      pngBytes[offset + 3],
+    );
+    offset += 4;
+    if (type === 'IDAT') {
+      idatChunks.push(pngBytes.subarray(offset, offset + length));
+    } else if (type === 'IEND') {
+      break;
+    }
+    offset += length + 4; // Skip data and CRC
+  }
+  if (idatChunks.length === 0) {
+    console.warn('No IDAT chunks found in PNG image.');
+    return pngBytes; // Fallback if no IDAT chunks are found
+  }
+  const totalLength = idatChunks.reduce((acc, chunk) => acc + chunk.length, 0);
+  const concatenated = new Uint8Array(totalLength);
+  let currentOffset = 0;
+  for (const chunk of idatChunks) {
+    concatenated.set(chunk, currentOffset);
+    currentOffset += chunk.length;
+  }
+  return concatenated;
+}
+/**
+ * Creates PDF XObject for an .jpeg image
+ * @param {number} objIndex - PDF object index
+ * @param {ArrayBufferLike} imageData - Raw image data
+ * @param {number} width - Image width
+ * @param {number} height - Image height
+ * @returns {string} PDF XObject string
+ */
+const createImageXObjectJpeg = (objIndex, imageData, width, height) => {
+  const imageBytes = new Uint8Array(imageData);
+  let objStr = `${String(objIndex)} 0 obj\n`;
+  objStr += '<</Type /XObject\n';
+  objStr += '/Subtype /Image\n';
+  // For JPEG, we can use the raw JPEG data directly
+  const imageHexStr = hex(imageBytes.buffer);
+  objStr += `/Width ${String(width)}\n`;
+  objStr += `/Height ${String(height)}\n`;
+  objStr += '/ColorSpace /DeviceRGB\n';
+  objStr += '/BitsPerComponent 8\n';
+  objStr += '/Filter [ /ASCIIHexDecode /DCTDecode ]\n';
+  objStr += `/Length ${String(imageHexStr.length)}\n`;
+  objStr += '>>\nstream\n';
+  objStr += `${imageHexStr}\n`;
+  objStr += 'endstream\nendobj\n\n';
+  return objStr;
+};
+/**
+ * Creates PDF XObject for an .png image
+ * @param {number} objIndex - PDF object index
+ * @param {ArrayBufferLike} imageData - Raw image data
+ * @returns {string} PDF XObject string
+ */
+const createImageXObjectPng = (objIndex, imageData) => {
+  const imageBytes = new Uint8Array(imageData);
+  let objStr = `${String(objIndex)} 0 obj\n`;
+  objStr += '<</Type /XObject\n';
+  objStr += '/Subtype /Image\n';
+  // For PNG, extract IDAT data and get header info
+  const imageDataOutput = extractPngIdatData(imageBytes);
+  const imageHexStr = hex(imageDataOutput.buffer);
+  const idhr = getPngIHDRInfo(imageBytes);
+  const predictor = 15;
+  let colors = 3;
+  let colorSpace = '/DeviceRGB';
+  // Determine color space and number of color components based on PNG color type
+  // Missing palette support (colorType 3)
+  if (idhr.colorType === 0) {
+    colors = 1;
+    colorSpace = '/DeviceGray';
+  } else if (idhr.colorType === 2) {
+    colors = 3;
+    colorSpace = '/DeviceRGB';
+  } else if (idhr.colorType === 4) {
+    colors = 2;
+    colorSpace = '/DeviceGray';
+  } else if (idhr.colorType === 6) {
+    colors = 4;
+    colorSpace = '/DeviceRGB';
+  }
+  objStr += '/DecodeParms [ null <<';
+  objStr += `/Predictor ${predictor} `;
+  objStr += `/Colors ${colors} `;
+  objStr += `/Columns ${String(idhr.width)} `;
+  objStr += ' >> ]\n';
+  objStr += `/Width ${String(idhr.width)}\n`;
+  objStr += `/Height ${String(idhr.height)}\n`;
+  objStr += `/ColorSpace ${colorSpace}\n`;
+  objStr += `/BitsPerComponent ${idhr.bitDepth}\n`;
+  objStr += '/Filter [ /ASCIIHexDecode /FlateDecode ]\n';
+  objStr += `/Length ${String(imageHexStr.length)}\n`;
+  objStr += '>>\nstream\n';
+  objStr += `${imageHexStr}\n`;
+  objStr += 'endstream\nendobj\n\n';
+  return objStr;
+};
+/**
+ * Creates PDF objects for multiple images
+ * @param {ImageWrapper[]} images - Array of image data
+ * @param {number} firstObjIndex - Starting object index
+ */
+export function createEmbeddedImages(images, firstObjIndex) {
+  /** @type {string[]} */
+  const imageObjArr = [];
+  images.forEach((image, index) => {
+    const objIndex = firstObjIndex + index;
+    const dims = imageUtils.getDims(image);
+    const imageBytes = base64ToBytes(image.src);
+    let objParts;
+    if (image.format === 'jpeg') {
+      objParts = createImageXObjectJpeg(objIndex, imageBytes.buffer, dims.width, dims.height);
+    } else {
+      objParts = createImageXObjectPng(objIndex, imageBytes.buffer);
+    }
+    imageObjArr.push(objParts);
+  });
+  return imageObjArr;
+}
+/**
+ * Creates a resource dictionary entry for images
+ * @param {Array<number>} imageObjIndices - Array of image object indices
+ * @returns {string} Resource dictionary XObject entries
+ */
+export function createImageResourceDict(imageObjIndices) {
+  if (imageObjIndices.length === 0) return '';
+  let resourceStr = '/XObject<<';
+  imageObjIndices.forEach((objIndex, i) => {
+    resourceStr += `/Im${String(i)} ${String(objIndex)} 0 R\n`;
+  });
+  resourceStr += '>>';
+  return resourceStr;
+}
+/**
+ * Generates PDF drawing commands to place an image on a page with optional rotation
+ * @param {number} imageIndex - Index of the image (for /Im naming)
+ * @param {number} x - X position
+ * @param {number} y - Y position
+ * @param {number} width - Display width
+ * @param {number} height - Display height
+ * @param {number} rotation - Rotation angle in degrees (default: 0)
+ * @returns {string} PDF drawing commands
+ */
+export function drawImageCommands(imageIndex, x, y, width, height, rotation = 0) {
+  const angle = (rotation * Math.PI) / 180;
+  const centerX = x + width / 2;
+  const centerY = y + height / 2;
+  const cos = Math.cos(angle);
+  const sin = Math.sin(angle);
+  const a = width * cos;
+  const b = width * sin;
+  const c = -height * sin;
+  const d = height * cos;
+  const e = centerX - (width * cos - height * sin) / 2;
+  const f = centerY - (width * sin + height * cos) / 2;
+  return `q\n${a} ${b} ${c} ${d} ${e} ${f} cm\n/Im${imageIndex} Do\nQ\n`;
+}