open-research 1.0.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,6 +5,7 @@
5
5
  <h3 align="center">The research-native CLI agent.</h3>
6
6
 
7
7
  <p align="center">
8
+ <a href="https://open-research.info">open-research.info</a> &nbsp;·&nbsp;
8
9
  <a href="https://www.npmjs.com/package/open-research"><img src="https://img.shields.io/npm/v/open-research.svg" alt="npm" /></a>
9
10
  <a href="https://github.com/gangj277/open-research/blob/main/LICENSE"><img src="https://img.shields.io/npm/l/open-research.svg" alt="license" /></a>
10
11
  </p>
@@ -227,10 +227,14 @@ async function executeFetchUrl(args, signal) {
227
227
 
228
228
  // src/lib/fs/pdf.ts
229
229
  import fs from "fs/promises";
230
+ import path from "path";
231
+ import { createRequire } from "module";
232
+ var require2 = createRequire(import.meta.url);
233
+ var standardFontDataUrl = path.join(path.dirname(require2.resolve("pdfjs-dist/package.json")), "standard_fonts") + "/";
230
234
  async function extractPdfText(filePath, options) {
231
235
  const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
232
236
  const buffer = await fs.readFile(filePath);
233
- const document = await pdfjs.getDocument({ data: new Uint8Array(buffer) }).promise;
237
+ const document = await pdfjs.getDocument({ data: new Uint8Array(buffer), standardFontDataUrl }).promise;
234
238
  const totalPages = document.numPages;
235
239
  const start = Math.max(1, options?.startPage ?? 1);
236
240
  const end = Math.min(totalPages, options?.endPage ?? totalPages);
@@ -245,7 +249,7 @@ async function extractPdfText(filePath, options) {
245
249
  }
246
250
  async function extractPdfTextFromBuffer(buffer, options) {
247
251
  const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
248
- const document = await pdfjs.getDocument({ data: buffer }).promise;
252
+ const document = await pdfjs.getDocument({ data: buffer, standardFontDataUrl }).promise;
249
253
  const totalPages = document.numPages;
250
254
  const end = Math.min(totalPages, options?.maxPages ?? 20);
251
255
  const pages = [];