paper-manager 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,9 +17,7 @@
17
17
  "properties": {
18
18
  "provider": {
19
19
  "type": "string",
20
- "enum": [
21
- "openai"
22
- ]
20
+ "enum": ["openai"]
23
21
  },
24
22
  "model": {
25
23
  "type": "string"
@@ -37,11 +35,7 @@
37
35
  "maximum": 9007199254740991
38
36
  }
39
37
  },
40
- "required": [
41
- "provider",
42
- "model",
43
- "apiKey"
44
- ],
38
+ "required": ["provider", "model", "apiKey"],
45
39
  "additionalProperties": false
46
40
  }
47
41
  },
@@ -50,8 +44,6 @@
50
44
  "minLength": 1
51
45
  }
52
46
  },
53
- "required": [
54
- "embeddingModels"
55
- ],
47
+ "required": ["embeddingModels"],
56
48
  "additionalProperties": false
57
49
  }
@@ -1,2 +1,2 @@
1
- import type { Document } from "@langchain/core/documents";
1
+ import { Document } from "@langchain/core/documents";
2
2
  export declare function extractPdfContent(pdfPath: string): Promise<Document[]>;
@@ -1,6 +1,18 @@
1
- import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
1
+ import { readFile } from "node:fs/promises";
2
+ import { Document } from "@langchain/core/documents";
3
+ import { PDFParse } from "pdf-parse";
2
4
  export async function extractPdfContent(pdfPath) {
3
- const loader = new PDFLoader(pdfPath, { splitPages: true });
4
- return loader.load();
5
+ const data = await readFile(pdfPath);
6
+ const parser = new PDFParse({ data });
7
+ const result = await parser.getText();
8
+ await parser.destroy();
9
+ return result.pages.map((page) => new Document({
10
+ pageContent: page.text,
11
+ metadata: {
12
+ source: pdfPath,
13
+ pdf: { totalPages: result.total },
14
+ loc: { pageNumber: page.num },
15
+ },
16
+ }));
5
17
  }
6
18
  //# sourceMappingURL=extractor.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/pdf/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,8CAA8C,CAAC;AAGzE,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,OAAe;IACrD,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,OAAO,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;IAC5D,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;AACvB,CAAC"}
1
+ {"version":3,"file":"extractor.js","sourceRoot":"","sources":["../../src/pdf/extractor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAE5C,OAAO,EAAE,QAAQ,EAAE,MAAM,2BAA2B,CAAC;AACrD,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC,MAAM,CAAC,KAAK,UAAU,iBAAiB,CAAC,OAAe;IACrD,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,CAAC;IACrC,MAAM,MAAM,GAAG,IAAI,QAAQ,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC;IACtC,MAAM,MAAM,GAAG,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IACtC,MAAM,MAAM,CAAC,OAAO,EAAE,CAAC;IAEvB,OAAO,MAAM,CAAC,KAAK,CAAC,GAAG,CACrB,CAAC,IAAI,EAAE,EAAE,CACP,IAAI,QAAQ,CAAC;QACX,WAAW,EAAE,IAAI,CAAC,IAAI;QACtB,QAAQ,EAAE;YACR,MAAM,EAAE,OAAO;YACf,GAAG,EAAE,EAAE,UAAU,EAAE,MAAM,CAAC,KAAK,EAAE;YACjC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,CAAC,GAAG,EAAE;SAC9B;KACF,CAAC,CACL,CAAC;AACJ,CAAC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "paper-manager",
3
- "version": "0.2.3",
3
+ "version": "0.2.5",
4
4
  "description": "A paper management system.",
5
5
  "keywords": [],
6
6
  "homepage": "https://github.com/EurFelux/paper-manager",
@@ -31,6 +31,7 @@
31
31
  "commander": "^14.0.3",
32
32
  "faiss-node": "^0.5.1",
33
33
  "langchain": "^1.2.28",
34
+ "pdf-parse": "^2.4.5",
34
35
  "zod": "^4.3.6"
35
36
  },
36
37
  "devDependencies": {
@@ -48,7 +49,7 @@
48
49
  },
49
50
  "scripts": {
50
51
  "build": "tsc",
51
- "generate:schema": "tsx scripts/generate-schema.ts",
52
+ "generate:schema": "tsx scripts/generate-schema.ts && oxfmt config.schema.json",
52
53
  "release": "tsx scripts/release.ts",
53
54
  "test": "echo \"Error: no test specified\" && exit 1",
54
55
  "typecheck": "tsc --noEmit",