open-research 1.0.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
<h3 align="center">The research-native CLI agent.</h3>
|
|
6
6
|
|
|
7
7
|
<p align="center">
|
|
8
|
+
<a href="https://open-research.info">open-research.info</a> ·
|
|
8
9
|
<a href="https://www.npmjs.com/package/open-research"><img src="https://img.shields.io/npm/v/open-research.svg" alt="npm" /></a>
|
|
9
10
|
<a href="https://github.com/gangj277/open-research/blob/main/LICENSE"><img src="https://img.shields.io/npm/l/open-research.svg" alt="license" /></a>
|
|
10
11
|
</p>
|
|
@@ -227,10 +227,14 @@ async function executeFetchUrl(args, signal) {
|
|
|
227
227
|
|
|
228
228
|
// src/lib/fs/pdf.ts
|
|
229
229
|
import fs from "fs/promises";
|
|
230
|
+
import path from "path";
|
|
231
|
+
import { createRequire } from "module";
|
|
232
|
+
var require2 = createRequire(import.meta.url);
|
|
233
|
+
var standardFontDataUrl = path.join(path.dirname(require2.resolve("pdfjs-dist/package.json")), "standard_fonts") + "/";
|
|
230
234
|
async function extractPdfText(filePath, options) {
|
|
231
235
|
const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
232
236
|
const buffer = await fs.readFile(filePath);
|
|
233
|
-
const document = await pdfjs.getDocument({ data: new Uint8Array(buffer) }).promise;
|
|
237
|
+
const document = await pdfjs.getDocument({ data: new Uint8Array(buffer), standardFontDataUrl }).promise;
|
|
234
238
|
const totalPages = document.numPages;
|
|
235
239
|
const start = Math.max(1, options?.startPage ?? 1);
|
|
236
240
|
const end = Math.min(totalPages, options?.endPage ?? totalPages);
|
|
@@ -245,7 +249,7 @@ async function extractPdfText(filePath, options) {
|
|
|
245
249
|
}
|
|
246
250
|
async function extractPdfTextFromBuffer(buffer, options) {
|
|
247
251
|
const pdfjs = await import("pdfjs-dist/legacy/build/pdf.mjs");
|
|
248
|
-
const document = await pdfjs.getDocument({ data: buffer }).promise;
|
|
252
|
+
const document = await pdfjs.getDocument({ data: buffer, standardFontDataUrl }).promise;
|
|
249
253
|
const totalPages = document.numPages;
|
|
250
254
|
const end = Math.min(totalPages, options?.maxPages ?? 20);
|
|
251
255
|
const pages = [];
|