@heripo/pdf-parser 0.1.12 → 0.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1756,7 +1756,8 @@ var VlmTextCorrector = class {
1756
1756
  },
1757
1757
  {
1758
1758
  type: "image",
1759
- image: `data:image/png;base64,${imageBase64}`
1759
+ image: imageBase64,
1760
+ mediaType: "image/png"
1760
1761
  }
1761
1762
  ]
1762
1763
  }
@@ -1968,7 +1969,7 @@ var VlmTextCorrector = class {
1968
1969
  */
1969
1970
  readPageImage(outputDir, pageNo) {
1970
1971
  const imagePath = join4(outputDir, "pages", `page_${pageNo - 1}.png`);
1971
- return readFileSync(imagePath).toString("base64");
1972
+ return new Uint8Array(readFileSync(imagePath));
1972
1973
  }
1973
1974
  /**
1974
1975
  * Apply VLM corrections to the DoclingDocument.
@@ -2237,7 +2238,7 @@ var OcrStrategySampler = class {
2237
2238
  this.logger.debug(
2238
2239
  `[OcrStrategySampler] Analyzing page ${pageNo} for Korean-Hanja mix and language...`
2239
2240
  );
2240
- const base64Image = readFileSync2(pageFile).toString("base64");
2241
+ const imageData = new Uint8Array(readFileSync2(pageFile));
2241
2242
  const messages = [
2242
2243
  {
2243
2244
  role: "user",
@@ -2245,7 +2246,8 @@ var OcrStrategySampler = class {
2245
2246
  { type: "text", text: KOREAN_HANJA_MIX_PROMPT },
2246
2247
  {
2247
2248
  type: "image",
2248
- image: `data:image/png;base64,${base64Image}`
2249
+ image: imageData,
2250
+ mediaType: "image/png"
2249
2251
  }
2250
2252
  ]
2251
2253
  }