paper-manager 0.9.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/dep.js +48 -0
- package/dist/commands/literature.js +59 -2
- package/dist/extractor/markdown.js +88 -0
- package/dist/index.js +2 -0
- package/package.json +4 -1
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import chalk from "chalk";
|
|
2
|
+
import { Command } from "commander";
|
|
3
|
+
import { checkOpendataLoaderStatus } from "../extractor/markdown.js";
|
|
4
|
+
import { log } from "../logger.js";
|
|
5
|
+
const KNOWN_DEPS = new Set(["opendataloader"]);
|
|
6
|
+
export function createDepCommand() {
|
|
7
|
+
const dep = new Command("dep").description("Manage external dependencies");
|
|
8
|
+
dep
|
|
9
|
+
.command("check <dep>")
|
|
10
|
+
.description("Check if an external dependency is available")
|
|
11
|
+
.action(async (depName) => {
|
|
12
|
+
if (!KNOWN_DEPS.has(depName)) {
|
|
13
|
+
log.error(`Unknown dependency: ${depName}`);
|
|
14
|
+
log.step(`Available: ${[...KNOWN_DEPS].join(", ")}`);
|
|
15
|
+
process.exit(1);
|
|
16
|
+
}
|
|
17
|
+
if (depName === "opendataloader") {
|
|
18
|
+
await checkOpendataLoader();
|
|
19
|
+
}
|
|
20
|
+
});
|
|
21
|
+
return dep;
|
|
22
|
+
}
|
|
23
|
+
async function checkOpendataLoader() {
|
|
24
|
+
log.info("Checking opendataloader-pdf...");
|
|
25
|
+
const status = await checkOpendataLoaderStatus();
|
|
26
|
+
const pkgIcon = status.packageInstalled ? chalk.green("✔") : chalk.red("✖");
|
|
27
|
+
const javaIcon = status.javaAvailable ? chalk.green("✔") : chalk.red("✖");
|
|
28
|
+
log.plain(` ${pkgIcon} @opendataloader/pdf package`);
|
|
29
|
+
if (status.javaAvailable) {
|
|
30
|
+
log.plain(` ${javaIcon} Java runtime (${status.javaVersion})`);
|
|
31
|
+
}
|
|
32
|
+
else {
|
|
33
|
+
log.plain(` ${javaIcon} Java runtime (not found)`);
|
|
34
|
+
}
|
|
35
|
+
log.newline();
|
|
36
|
+
if (status.packageInstalled && status.javaAvailable) {
|
|
37
|
+
log.success("opendataloader-pdf is ready.");
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
log.error("opendataloader-pdf is not available.");
|
|
41
|
+
if (!status.packageInstalled) {
|
|
42
|
+
log.step("Install: pnpm add @opendataloader/pdf");
|
|
43
|
+
}
|
|
44
|
+
if (!status.javaAvailable) {
|
|
45
|
+
log.step("Install Java 11+: https://adoptium.net/");
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
}
|
|
@@ -9,6 +9,7 @@ import * as projectLit from "../db/project/literatures.js";
|
|
|
9
9
|
import * as userKb from "../db/user/knowledge-bases.js";
|
|
10
10
|
import * as userLit from "../db/user/literatures.js";
|
|
11
11
|
import { extractContent, extractPdfMetadata } from "../extractor/index.js";
|
|
12
|
+
import { convertPdfToMarkdown, isOpendataLoaderAvailable } from "../extractor/markdown.js";
|
|
12
13
|
import { log } from "../logger.js";
|
|
13
14
|
import { splitDocuments } from "../text-splitter.js";
|
|
14
15
|
import { addDocuments, createVectorStore } from "../vector-store/index.js";
|
|
@@ -94,6 +95,14 @@ export function createLiteratureCommand() {
|
|
|
94
95
|
const ext = path.extname(litPath);
|
|
95
96
|
fs.mkdirSync(filesDir, { recursive: true });
|
|
96
97
|
fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
|
|
98
|
+
// Convert PDF to Markdown if opendataloader is available
|
|
99
|
+
if (isPdf && (await isOpendataLoaderAvailable())) {
|
|
100
|
+
const markdown = await convertPdfToMarkdown(absolutePath);
|
|
101
|
+
if (markdown) {
|
|
102
|
+
fs.writeFileSync(path.join(filesDir, `${literature.id}.md`), markdown, "utf-8");
|
|
103
|
+
log.step("Converted to Markdown via opendataloader-pdf.");
|
|
104
|
+
}
|
|
105
|
+
}
|
|
97
106
|
// Split text and add to vector store
|
|
98
107
|
log.info("Splitting text...");
|
|
99
108
|
const splitDocs = splitDocuments(docs, { chunkSize: 1000, chunkOverlap: 200 });
|
|
@@ -129,6 +138,40 @@ export function createLiteratureCommand() {
|
|
|
129
138
|
if (literature.keywords.length > 0)
|
|
130
139
|
log.label("Keywords:", literature.keywords.join(", "));
|
|
131
140
|
});
|
|
141
|
+
// ─── lit convert ────────────────────────────────────────────
|
|
142
|
+
lit
|
|
143
|
+
.command("convert <id>")
|
|
144
|
+
.description("Convert an existing literature PDF to Markdown via opendataloader-pdf")
|
|
145
|
+
.action(async (id) => {
|
|
146
|
+
const found = findLiteratureWithScope(id);
|
|
147
|
+
if (!found) {
|
|
148
|
+
log.error(`Literature not found: ${id}`);
|
|
149
|
+
process.exit(1);
|
|
150
|
+
}
|
|
151
|
+
const filesDir = getFilesDir(getBaseDir(found.scope));
|
|
152
|
+
const pdfFile = findLiteratureFiles(filesDir, id).find((f) => f.endsWith(".pdf"));
|
|
153
|
+
if (!pdfFile) {
|
|
154
|
+
log.error(`No PDF file found for literature: ${id}`);
|
|
155
|
+
process.exit(1);
|
|
156
|
+
}
|
|
157
|
+
const mdPath = path.join(filesDir, `${id}.md`);
|
|
158
|
+
if (fs.existsSync(mdPath)) {
|
|
159
|
+
log.error("Markdown file already exists. Delete it first to reconvert.");
|
|
160
|
+
process.exit(1);
|
|
161
|
+
}
|
|
162
|
+
if (!(await isOpendataLoaderAvailable())) {
|
|
163
|
+
log.error("opendataloader-pdf is not available. Run `paper dep check opendataloader` for details.");
|
|
164
|
+
process.exit(1);
|
|
165
|
+
}
|
|
166
|
+
log.info("Converting PDF to Markdown...");
|
|
167
|
+
const markdown = await convertPdfToMarkdown(path.join(filesDir, pdfFile));
|
|
168
|
+
if (!markdown) {
|
|
169
|
+
log.error("Conversion failed.");
|
|
170
|
+
process.exit(1);
|
|
171
|
+
}
|
|
172
|
+
fs.writeFileSync(mdPath, markdown, "utf-8");
|
|
173
|
+
log.success(`Markdown saved: ${id}.md`);
|
|
174
|
+
});
|
|
132
175
|
// ─── lit remove ────────────────────────────────────────────
|
|
133
176
|
lit
|
|
134
177
|
.command("remove <knowledge-base-id> <id>")
|
|
@@ -226,11 +269,14 @@ export function createLiteratureCommand() {
|
|
|
226
269
|
log.plain(JSON.stringify(literatures, null, 2));
|
|
227
270
|
return;
|
|
228
271
|
}
|
|
272
|
+
const filesDir = getFilesDir(getBaseDir(resolved.scope));
|
|
229
273
|
for (const l of literatures) {
|
|
230
274
|
log.header(l.id);
|
|
231
275
|
log.label("Title:", l.title);
|
|
232
276
|
if (l.author)
|
|
233
277
|
log.label("Author:", l.author);
|
|
278
|
+
const files = findLiteratureFiles(filesDir, l.id);
|
|
279
|
+
log.label("Files:", files.length > 0 ? files.join(", ") : "(none)");
|
|
234
280
|
log.label("Created:", l.createdAt.toISOString());
|
|
235
281
|
log.newline();
|
|
236
282
|
}
|
|
@@ -313,7 +359,8 @@ export function createLiteratureCommand() {
|
|
|
313
359
|
log.plain(JSON.stringify(literature, null, 2));
|
|
314
360
|
return;
|
|
315
361
|
}
|
|
316
|
-
|
|
362
|
+
const filesDir = getFilesDir(getBaseDir(resolved.scope));
|
|
363
|
+
printLiterature(literature, filesDir);
|
|
317
364
|
});
|
|
318
365
|
// ─── lit note ──────────────────────────────────────────────
|
|
319
366
|
const note = lit.command("note").description("Manage literature notes");
|
|
@@ -386,7 +433,15 @@ function findLiteratureWithScope(id) {
|
|
|
386
433
|
return { literature: uLit, scope: "user" };
|
|
387
434
|
return null;
|
|
388
435
|
}
|
|
389
|
-
function
|
|
436
|
+
function findLiteratureFiles(filesDir, id) {
|
|
437
|
+
if (!fs.existsSync(filesDir))
|
|
438
|
+
return [];
|
|
439
|
+
return fs
|
|
440
|
+
.readdirSync(filesDir)
|
|
441
|
+
.filter((name) => name.startsWith(`${id}.`))
|
|
442
|
+
.sort();
|
|
443
|
+
}
|
|
444
|
+
function printLiterature(lit, filesDir) {
|
|
390
445
|
log.header(lit.id);
|
|
391
446
|
log.label("Title:", lit.title);
|
|
392
447
|
if (lit.titleTranslation)
|
|
@@ -405,6 +460,8 @@ function printLiterature(lit) {
|
|
|
405
460
|
log.label("DOI:", lit.doi);
|
|
406
461
|
if (lit.knowledgeBaseId)
|
|
407
462
|
log.label("Knowledge Base:", lit.knowledgeBaseId);
|
|
463
|
+
const files = findLiteratureFiles(filesDir, lit.id);
|
|
464
|
+
log.label("Files:", files.length > 0 ? files.join(", ") : "(none)");
|
|
408
465
|
log.label("Created:", lit.createdAt.toISOString());
|
|
409
466
|
log.label("Updated:", lit.updatedAt.toISOString());
|
|
410
467
|
const noteEntries = Object.entries(lit.notes);
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { mkdirSync, readdirSync, readFileSync, rmSync } from "node:fs";
|
|
3
|
+
import { tmpdir } from "node:os";
|
|
4
|
+
import * as path from "node:path";
|
|
5
|
+
/**
|
|
6
|
+
* Check whether opendataloader-pdf is available (package installed + Java runtime).
|
|
7
|
+
* Result is cached after the first call.
|
|
8
|
+
*/
|
|
9
|
+
export async function isOpendataLoaderAvailable() {
|
|
10
|
+
if (cachedAvailability !== undefined)
|
|
11
|
+
return cachedAvailability;
|
|
12
|
+
cachedAvailability = await detectAvailability();
|
|
13
|
+
return cachedAvailability;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Convert a PDF file to Markdown using opendataloader-pdf.
|
|
17
|
+
* Returns the markdown content on success, or null on failure.
|
|
18
|
+
*/
|
|
19
|
+
export async function convertPdfToMarkdown(pdfPath) {
|
|
20
|
+
const outDir = path.join(tmpdir(), `odl-${Date.now()}`);
|
|
21
|
+
mkdirSync(outDir, { recursive: true });
|
|
22
|
+
try {
|
|
23
|
+
const { convert } = await import("@opendataloader/pdf");
|
|
24
|
+
await convert([pdfPath], {
|
|
25
|
+
outputDir: outDir,
|
|
26
|
+
format: "markdown",
|
|
27
|
+
quiet: true,
|
|
28
|
+
});
|
|
29
|
+
const mdFile = readdirSync(outDir).find((f) => f.endsWith(".md"));
|
|
30
|
+
if (!mdFile)
|
|
31
|
+
return null;
|
|
32
|
+
return readFileSync(path.join(outDir, mdFile), "utf-8");
|
|
33
|
+
}
|
|
34
|
+
catch {
|
|
35
|
+
return null;
|
|
36
|
+
}
|
|
37
|
+
finally {
|
|
38
|
+
rmSync(outDir, { recursive: true, force: true });
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
// ─── Internal ────────────────────────────────────────────
|
|
42
|
+
let cachedAvailability;
|
|
43
|
+
async function detectAvailability() {
|
|
44
|
+
const [hasPackage, hasJava] = await Promise.all([checkPackage(), checkJava()]);
|
|
45
|
+
return hasPackage && hasJava;
|
|
46
|
+
}
|
|
47
|
+
async function checkPackage() {
|
|
48
|
+
try {
|
|
49
|
+
await import("@opendataloader/pdf");
|
|
50
|
+
return true;
|
|
51
|
+
}
|
|
52
|
+
catch {
|
|
53
|
+
return false;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
// execFile is safe — arguments are passed as an array, no shell interpolation.
|
|
57
|
+
async function checkJava() {
|
|
58
|
+
return new Promise((resolve) => {
|
|
59
|
+
execFile("java", ["-version"], (error) => {
|
|
60
|
+
resolve(!error);
|
|
61
|
+
});
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Detailed availability check for the `dep check` command.
|
|
66
|
+
*/
|
|
67
|
+
export async function checkOpendataLoaderStatus() {
|
|
68
|
+
const [packageInstalled, javaResult] = await Promise.all([checkPackage(), getJavaVersion()]);
|
|
69
|
+
return {
|
|
70
|
+
packageInstalled,
|
|
71
|
+
javaAvailable: javaResult !== null,
|
|
72
|
+
javaVersion: javaResult,
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
// execFile is safe — arguments are passed as an array, no shell interpolation.
|
|
76
|
+
function getJavaVersion() {
|
|
77
|
+
return new Promise((resolve) => {
|
|
78
|
+
execFile("java", ["-version"], (error, _stdout, stderr) => {
|
|
79
|
+
if (error) {
|
|
80
|
+
resolve(null);
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
// Java prints version to stderr
|
|
84
|
+
const match = /version\s+"([^"]+)"/.exec(stderr);
|
|
85
|
+
resolve(match?.[1] ?? null);
|
|
86
|
+
});
|
|
87
|
+
});
|
|
88
|
+
}
|
package/dist/index.js
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import { createRequire } from "node:module";
|
|
3
3
|
import { Command } from "commander";
|
|
4
4
|
import { createConfigCommand } from "./commands/config.js";
|
|
5
|
+
import { createDepCommand } from "./commands/dep.js";
|
|
5
6
|
import { createKnowledgeBaseCommand } from "./commands/knowledge-base.js";
|
|
6
7
|
import { createLiteratureCommand } from "./commands/literature.js";
|
|
7
8
|
import { createUtilCommand } from "./commands/util.js";
|
|
@@ -14,6 +15,7 @@ program.hook("preAction", () => {
|
|
|
14
15
|
startup();
|
|
15
16
|
});
|
|
16
17
|
program.addCommand(createConfigCommand());
|
|
18
|
+
program.addCommand(createDepCommand());
|
|
17
19
|
program.addCommand(createKnowledgeBaseCommand());
|
|
18
20
|
program.addCommand(createLiteratureCommand());
|
|
19
21
|
program.addCommand(createUtilCommand());
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "paper-manager",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.10.0",
|
|
4
4
|
"description": "A paper management system.",
|
|
5
5
|
"keywords": [],
|
|
6
6
|
"homepage": "https://github.com/EurFelux/paper-manager",
|
|
@@ -46,6 +46,9 @@
|
|
|
46
46
|
"typescript": "^5.9.3",
|
|
47
47
|
"vitest": "^4.0.18"
|
|
48
48
|
},
|
|
49
|
+
"optionalDependencies": {
|
|
50
|
+
"@opendataloader/pdf": "^2.2.1"
|
|
51
|
+
},
|
|
49
52
|
"engines": {
|
|
50
53
|
"node": ">=24"
|
|
51
54
|
},
|