@heripo/pdf-parser 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -1780,7 +1780,8 @@ var VlmTextCorrector = class {
1780
1780
  },
1781
1781
  {
1782
1782
  type: "image",
1783
- image: `data:image/png;base64,${imageBase64}`
1783
+ image: imageBase64,
1784
+ mediaType: "image/png"
1784
1785
  }
1785
1786
  ]
1786
1787
  }
@@ -1992,7 +1993,7 @@ var VlmTextCorrector = class {
1992
1993
  */
1993
1994
  readPageImage(outputDir, pageNo) {
1994
1995
  const imagePath = (0, import_node_path4.join)(outputDir, "pages", `page_${pageNo - 1}.png`);
1995
- return (0, import_node_fs4.readFileSync)(imagePath).toString("base64");
1996
+ return new Uint8Array((0, import_node_fs4.readFileSync)(imagePath));
1996
1997
  }
1997
1998
  /**
1998
1999
  * Apply VLM corrections to the DoclingDocument.
@@ -2261,7 +2262,7 @@ var OcrStrategySampler = class {
2261
2262
  this.logger.debug(
2262
2263
  `[OcrStrategySampler] Analyzing page ${pageNo} for Korean-Hanja mix and language...`
2263
2264
  );
2264
- const base64Image = (0, import_node_fs5.readFileSync)(pageFile).toString("base64");
2265
+ const imageData = new Uint8Array((0, import_node_fs5.readFileSync)(pageFile));
2265
2266
  const messages = [
2266
2267
  {
2267
2268
  role: "user",
@@ -2269,7 +2270,8 @@ var OcrStrategySampler = class {
2269
2270
  { type: "text", text: KOREAN_HANJA_MIX_PROMPT },
2270
2271
  {
2271
2272
  type: "image",
2272
- image: `data:image/png;base64,${base64Image}`
2273
+ image: imageData,
2274
+ mediaType: "image/png"
2273
2275
  }
2274
2276
  ]
2275
2277
  }