paper-manager 0.9.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+ import chalk from "chalk";
2
+ import { Command } from "commander";
3
+ import { checkOpendataLoaderStatus } from "../extractor/markdown.js";
4
+ import { log } from "../logger.js";
5
+ const KNOWN_DEPS = new Set(["opendataloader"]);
6
+ export function createDepCommand() {
7
+ const dep = new Command("dep").description("Manage external dependencies");
8
+ dep
9
+ .command("check [dep]")
10
+ .description("Check if an external dependency is available")
11
+ .action(async (depName) => {
12
+ if (depName != null) {
13
+ if (!KNOWN_DEPS.has(depName)) {
14
+ log.error(`Unknown dependency: ${depName}`);
15
+ log.step(`Available: ${[...KNOWN_DEPS].join(", ")}`);
16
+ process.exit(1);
17
+ }
18
+ if (depName === "opendataloader") {
19
+ await checkOpendataLoader();
20
+ }
21
+ }
22
+ else {
23
+ // Check all known dependencies
24
+ await checkOpendataLoader();
25
+ }
26
+ });
27
+ return dep;
28
+ }
29
+ async function checkOpendataLoader() {
30
+ log.info("Checking opendataloader-pdf...");
31
+ const status = await checkOpendataLoaderStatus();
32
+ const pkgIcon = status.packageInstalled ? chalk.green("✔") : chalk.red("✖");
33
+ const javaIcon = status.javaAvailable ? chalk.green("✔") : chalk.red("✖");
34
+ log.plain(` ${pkgIcon} @opendataloader/pdf package`);
35
+ if (status.javaAvailable) {
36
+ log.plain(` ${javaIcon} Java runtime (${status.javaVersion})`);
37
+ }
38
+ else {
39
+ log.plain(` ${javaIcon} Java runtime (not found)`);
40
+ }
41
+ log.newline();
42
+ if (status.packageInstalled && status.javaAvailable) {
43
+ log.success("opendataloader-pdf is ready.");
44
+ }
45
+ else {
46
+ log.error("opendataloader-pdf is not available.");
47
+ if (!status.packageInstalled) {
48
+ log.step("Install: pnpm add @opendataloader/pdf");
49
+ }
50
+ if (!status.javaAvailable) {
51
+ log.step("Install Java 11+: https://adoptium.net/");
52
+ }
53
+ }
54
+ }
@@ -9,6 +9,7 @@ import * as projectLit from "../db/project/literatures.js";
9
9
  import * as userKb from "../db/user/knowledge-bases.js";
10
10
  import * as userLit from "../db/user/literatures.js";
11
11
  import { extractContent, extractPdfMetadata } from "../extractor/index.js";
12
+ import { convertPdfToMarkdown, isOpendataLoaderAvailable } from "../extractor/markdown.js";
12
13
  import { log } from "../logger.js";
13
14
  import { splitDocuments } from "../text-splitter.js";
14
15
  import { addDocuments, createVectorStore } from "../vector-store/index.js";
@@ -94,6 +95,14 @@ export function createLiteratureCommand() {
94
95
  const ext = path.extname(litPath);
95
96
  fs.mkdirSync(filesDir, { recursive: true });
96
97
  fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
98
+ // Convert PDF to Markdown if opendataloader is available
99
+ if (isPdf && (await isOpendataLoaderAvailable())) {
100
+ const markdown = await convertPdfToMarkdown(absolutePath);
101
+ if (markdown) {
102
+ fs.writeFileSync(path.join(filesDir, `${literature.id}.md`), markdown, "utf-8");
103
+ log.step("Converted to Markdown via opendataloader-pdf.");
104
+ }
105
+ }
97
106
  // Split text and add to vector store
98
107
  log.info("Splitting text...");
99
108
  const splitDocs = splitDocuments(docs, { chunkSize: 1000, chunkOverlap: 200 });
@@ -129,6 +138,40 @@ export function createLiteratureCommand() {
129
138
  if (literature.keywords.length > 0)
130
139
  log.label("Keywords:", literature.keywords.join(", "));
131
140
  });
141
+ // ─── lit convert ────────────────────────────────────────────
142
+ lit
143
+ .command("convert <id>")
144
+ .description("Convert an existing literature PDF to Markdown via opendataloader-pdf")
145
+ .action(async (id) => {
146
+ const found = findLiteratureWithScope(id);
147
+ if (!found) {
148
+ log.error(`Literature not found: ${id}`);
149
+ process.exit(1);
150
+ }
151
+ const filesDir = getFilesDir(getBaseDir(found.scope));
152
+ const pdfFile = findLiteratureFiles(filesDir, id).find((f) => f.endsWith(".pdf"));
153
+ if (!pdfFile) {
154
+ log.error(`No PDF file found for literature: ${id}`);
155
+ process.exit(1);
156
+ }
157
+ const mdPath = path.join(filesDir, `${id}.md`);
158
+ if (fs.existsSync(mdPath)) {
159
+ log.error("Markdown file already exists. Delete it first to reconvert.");
160
+ process.exit(1);
161
+ }
162
+ if (!(await isOpendataLoaderAvailable())) {
163
+ log.error("opendataloader-pdf is not available. Run `paper dep check opendataloader` for details.");
164
+ process.exit(1);
165
+ }
166
+ log.info("Converting PDF to Markdown...");
167
+ const markdown = await convertPdfToMarkdown(path.join(filesDir, pdfFile));
168
+ if (!markdown) {
169
+ log.error("Conversion failed.");
170
+ process.exit(1);
171
+ }
172
+ fs.writeFileSync(mdPath, markdown, "utf-8");
173
+ log.success(`Markdown saved: ${id}.md`);
174
+ });
132
175
  // ─── lit remove ────────────────────────────────────────────
133
176
  lit
134
177
  .command("remove <knowledge-base-id> <id>")
@@ -226,11 +269,14 @@ export function createLiteratureCommand() {
226
269
  log.plain(JSON.stringify(literatures, null, 2));
227
270
  return;
228
271
  }
272
+ const filesDir = getFilesDir(getBaseDir(resolved.scope));
229
273
  for (const l of literatures) {
230
274
  log.header(l.id);
231
275
  log.label("Title:", l.title);
232
276
  if (l.author)
233
277
  log.label("Author:", l.author);
278
+ const files = findLiteratureFiles(filesDir, l.id);
279
+ log.label("Files:", files.length > 0 ? files.join(", ") : "(none)");
234
280
  log.label("Created:", l.createdAt.toISOString());
235
281
  log.newline();
236
282
  }
@@ -313,7 +359,8 @@ export function createLiteratureCommand() {
313
359
  log.plain(JSON.stringify(literature, null, 2));
314
360
  return;
315
361
  }
316
- printLiterature(literature);
362
+ const filesDir = getFilesDir(getBaseDir(resolved.scope));
363
+ printLiterature(literature, filesDir);
317
364
  });
318
365
  // ─── lit note ──────────────────────────────────────────────
319
366
  const note = lit.command("note").description("Manage literature notes");
@@ -386,7 +433,15 @@ function findLiteratureWithScope(id) {
386
433
  return { literature: uLit, scope: "user" };
387
434
  return null;
388
435
  }
389
- function printLiterature(lit) {
436
+ function findLiteratureFiles(filesDir, id) {
437
+ if (!fs.existsSync(filesDir))
438
+ return [];
439
+ return fs
440
+ .readdirSync(filesDir)
441
+ .filter((name) => name.startsWith(`${id}.`))
442
+ .sort();
443
+ }
444
+ function printLiterature(lit, filesDir) {
390
445
  log.header(lit.id);
391
446
  log.label("Title:", lit.title);
392
447
  if (lit.titleTranslation)
@@ -405,6 +460,8 @@ function printLiterature(lit) {
405
460
  log.label("DOI:", lit.doi);
406
461
  if (lit.knowledgeBaseId)
407
462
  log.label("Knowledge Base:", lit.knowledgeBaseId);
463
+ const files = findLiteratureFiles(filesDir, lit.id);
464
+ log.label("Files:", files.length > 0 ? files.join(", ") : "(none)");
408
465
  log.label("Created:", lit.createdAt.toISOString());
409
466
  log.label("Updated:", lit.updatedAt.toISOString());
410
467
  const noteEntries = Object.entries(lit.notes);
@@ -0,0 +1,88 @@
1
+ import { execFile } from "node:child_process";
2
+ import { mkdirSync, readdirSync, readFileSync, rmSync } from "node:fs";
3
+ import { tmpdir } from "node:os";
4
+ import * as path from "node:path";
5
+ /**
6
+ * Check whether opendataloader-pdf is available (package installed + Java runtime).
7
+ * Result is cached after the first call.
8
+ */
9
+ export async function isOpendataLoaderAvailable() {
10
+ if (cachedAvailability !== undefined)
11
+ return cachedAvailability;
12
+ cachedAvailability = await detectAvailability();
13
+ return cachedAvailability;
14
+ }
15
+ /**
16
+ * Convert a PDF file to Markdown using opendataloader-pdf.
17
+ * Returns the markdown content on success, or null on failure.
18
+ */
19
+ export async function convertPdfToMarkdown(pdfPath) {
20
+ const outDir = path.join(tmpdir(), `odl-${Date.now()}`);
21
+ mkdirSync(outDir, { recursive: true });
22
+ try {
23
+ const { convert } = await import("@opendataloader/pdf");
24
+ await convert([pdfPath], {
25
+ outputDir: outDir,
26
+ format: "markdown",
27
+ quiet: true,
28
+ });
29
+ const mdFile = readdirSync(outDir).find((f) => f.endsWith(".md"));
30
+ if (!mdFile)
31
+ return null;
32
+ return readFileSync(path.join(outDir, mdFile), "utf-8");
33
+ }
34
+ catch {
35
+ return null;
36
+ }
37
+ finally {
38
+ rmSync(outDir, { recursive: true, force: true });
39
+ }
40
+ }
41
+ // ─── Internal ────────────────────────────────────────────
42
+ let cachedAvailability;
43
+ async function detectAvailability() {
44
+ const [hasPackage, hasJava] = await Promise.all([checkPackage(), checkJava()]);
45
+ return hasPackage && hasJava;
46
+ }
47
+ async function checkPackage() {
48
+ try {
49
+ await import("@opendataloader/pdf");
50
+ return true;
51
+ }
52
+ catch {
53
+ return false;
54
+ }
55
+ }
56
+ // execFile is safe — arguments are passed as an array, no shell interpolation.
57
+ async function checkJava() {
58
+ return new Promise((resolve) => {
59
+ execFile("java", ["-version"], (error) => {
60
+ resolve(!error);
61
+ });
62
+ });
63
+ }
64
+ /**
65
+ * Detailed availability check for the `dep check` command.
66
+ */
67
+ export async function checkOpendataLoaderStatus() {
68
+ const [packageInstalled, javaResult] = await Promise.all([checkPackage(), getJavaVersion()]);
69
+ return {
70
+ packageInstalled,
71
+ javaAvailable: javaResult !== null,
72
+ javaVersion: javaResult,
73
+ };
74
+ }
75
+ // execFile is safe — arguments are passed as an array, no shell interpolation.
76
+ function getJavaVersion() {
77
+ return new Promise((resolve) => {
78
+ execFile("java", ["-version"], (error, _stdout, stderr) => {
79
+ if (error) {
80
+ resolve(null);
81
+ return;
82
+ }
83
+ // Java prints version to stderr
84
+ const match = /version\s+"([^"]+)"/.exec(stderr);
85
+ resolve(match?.[1] ?? null);
86
+ });
87
+ });
88
+ }
package/dist/index.js CHANGED
@@ -2,6 +2,7 @@
2
2
  import { createRequire } from "node:module";
3
3
  import { Command } from "commander";
4
4
  import { createConfigCommand } from "./commands/config.js";
5
+ import { createDepCommand } from "./commands/dep.js";
5
6
  import { createKnowledgeBaseCommand } from "./commands/knowledge-base.js";
6
7
  import { createLiteratureCommand } from "./commands/literature.js";
7
8
  import { createUtilCommand } from "./commands/util.js";
@@ -14,6 +15,7 @@ program.hook("preAction", () => {
14
15
  startup();
15
16
  });
16
17
  program.addCommand(createConfigCommand());
18
+ program.addCommand(createDepCommand());
17
19
  program.addCommand(createKnowledgeBaseCommand());
18
20
  program.addCommand(createLiteratureCommand());
19
21
  program.addCommand(createUtilCommand());
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "paper-manager",
3
- "version": "0.9.0",
3
+ "version": "0.10.1",
4
4
  "description": "A paper management system.",
5
5
  "keywords": [],
6
6
  "homepage": "https://github.com/EurFelux/paper-manager",
@@ -46,6 +46,9 @@
46
46
  "typescript": "^5.9.3",
47
47
  "vitest": "^4.0.18"
48
48
  },
49
+ "optionalDependencies": {
50
+ "@opendataloader/pdf": "^2.2.1"
51
+ },
49
52
  "engines": {
50
53
  "node": ">=24"
51
54
  },