paper-manager 0.10.0 → 0.10.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,15 +6,21 @@ const KNOWN_DEPS = new Set(["opendataloader"]);
6
6
  export function createDepCommand() {
7
7
  const dep = new Command("dep").description("Manage external dependencies");
8
8
  dep
9
- .command("check <dep>")
9
+ .command("check [dep]")
10
10
  .description("Check if an external dependency is available")
11
11
  .action(async (depName) => {
12
- if (!KNOWN_DEPS.has(depName)) {
13
- log.error(`Unknown dependency: ${depName}`);
14
- log.step(`Available: ${[...KNOWN_DEPS].join(", ")}`);
15
- process.exit(1);
12
+ if (depName != null) {
13
+ if (!KNOWN_DEPS.has(depName)) {
14
+ log.error(`Unknown dependency: ${depName}`);
15
+ log.step(`Available: ${[...KNOWN_DEPS].join(", ")}`);
16
+ process.exit(1);
17
+ }
18
+ if (depName === "opendataloader") {
19
+ await checkOpendataLoader();
20
+ }
16
21
  }
17
- if (depName === "opendataloader") {
22
+ else {
23
+ // Check all known dependencies
18
24
  await checkOpendataLoader();
19
25
  }
20
26
  });
@@ -6,6 +6,7 @@ import * as projectKb from "../db/project/knowledge-bases.js";
6
6
  import * as projectLit from "../db/project/literatures.js";
7
7
  import * as userKb from "../db/user/knowledge-bases.js";
8
8
  import * as userLit from "../db/user/literatures.js";
9
+ import { removeImageDir } from "../extractor/markdown.js";
9
10
  import { log } from "../logger.js";
10
11
  import { queryVectorStore } from "../vector-store/index.js";
11
12
  function resolveKnowledgeBase(id) {
@@ -136,7 +137,7 @@ export function createKnowledgeBaseCommand() {
136
137
  const kbOps = scope === "project" ? projectKb : userKb;
137
138
  // 1. Get all literatures in this KB
138
139
  const literatures = litOps.getLiteraturesByKnowledgeBaseId(id);
139
- // 2. Delete stored files
140
+ // 2. Delete stored files and image directories
140
141
  const filesDir = getFilesDir(baseDir);
141
142
  if (fs.existsSync(filesDir)) {
142
143
  for (const lit of literatures) {
@@ -145,6 +146,7 @@ export function createKnowledgeBaseCommand() {
145
146
  fs.unlinkSync(path.join(filesDir, entry.name));
146
147
  }
147
148
  }
149
+ removeImageDir(filesDir, lit.id);
148
150
  }
149
151
  }
150
152
  // 3. Delete literatures from DB
@@ -9,7 +9,7 @@ import * as projectLit from "../db/project/literatures.js";
9
9
  import * as userKb from "../db/user/knowledge-bases.js";
10
10
  import * as userLit from "../db/user/literatures.js";
11
11
  import { extractContent, extractPdfMetadata } from "../extractor/index.js";
12
- import { convertPdfToMarkdown, isOpendataLoaderAvailable } from "../extractor/markdown.js";
12
+ import { convertPdfToMarkdown, isOpendataLoaderAvailable, removeImageDir, saveConvertResult, } from "../extractor/markdown.js";
13
13
  import { log } from "../logger.js";
14
14
  import { splitDocuments } from "../text-splitter.js";
15
15
  import { addDocuments, createVectorStore } from "../vector-store/index.js";
@@ -97,9 +97,9 @@ export function createLiteratureCommand() {
97
97
  fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
98
98
  // Convert PDF to Markdown if opendataloader is available
99
99
  if (isPdf && (await isOpendataLoaderAvailable())) {
100
- const markdown = await convertPdfToMarkdown(absolutePath);
101
- if (markdown) {
102
- fs.writeFileSync(path.join(filesDir, `${literature.id}.md`), markdown, "utf-8");
100
+ const result = await convertPdfToMarkdown(absolutePath);
101
+ if (result) {
102
+ saveConvertResult(filesDir, literature.id, result);
103
103
  log.step("Converted to Markdown via opendataloader-pdf.");
104
104
  }
105
105
  }
@@ -164,12 +164,12 @@ export function createLiteratureCommand() {
164
164
  process.exit(1);
165
165
  }
166
166
  log.info("Converting PDF to Markdown...");
167
- const markdown = await convertPdfToMarkdown(path.join(filesDir, pdfFile));
168
- if (!markdown) {
167
+ const result = await convertPdfToMarkdown(path.join(filesDir, pdfFile));
168
+ if (!result) {
169
169
  log.error("Conversion failed.");
170
170
  process.exit(1);
171
171
  }
172
- fs.writeFileSync(mdPath, markdown, "utf-8");
172
+ saveConvertResult(filesDir, id, result);
173
173
  log.success(`Markdown saved: ${id}.md`);
174
174
  });
175
175
  // ─── lit remove ────────────────────────────────────────────
@@ -190,7 +190,7 @@ export function createLiteratureCommand() {
190
190
  log.error(`Literature not found: ${id}`);
191
191
  process.exit(1);
192
192
  }
193
- // Delete stored file (find by pattern <id>.*)
193
+ // Delete stored files and image directory
194
194
  const filesDir = getFilesDir(baseDir);
195
195
  if (fs.existsSync(filesDir)) {
196
196
  for (const entry of fs.readdirSync(filesDir, { withFileTypes: true })) {
@@ -198,6 +198,7 @@ export function createLiteratureCommand() {
198
198
  fs.unlinkSync(path.join(filesDir, entry.name));
199
199
  }
200
200
  }
201
+ removeImageDir(filesDir, id);
201
202
  }
202
203
  // Delete literature record
203
204
  litOps.deleteLiterature(id);
@@ -1,5 +1,5 @@
1
1
  import { execFile } from "node:child_process";
2
- import { mkdirSync, readdirSync, readFileSync, rmSync } from "node:fs";
2
+ import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
3
3
  import { tmpdir } from "node:os";
4
4
  import * as path from "node:path";
5
5
  /**
@@ -14,7 +14,7 @@ export async function isOpendataLoaderAvailable() {
14
14
  }
15
15
  /**
16
16
  * Convert a PDF file to Markdown using opendataloader-pdf.
17
- * Returns the markdown content on success, or null on failure.
17
+ * Returns the markdown content and extracted images on success, or null on failure.
18
18
  */
19
19
  export async function convertPdfToMarkdown(pdfPath) {
20
20
  const outDir = path.join(tmpdir(), `odl-${Date.now()}`);
@@ -26,10 +26,19 @@ export async function convertPdfToMarkdown(pdfPath) {
26
26
  format: "markdown",
27
27
  quiet: true,
28
28
  });
29
- const mdFile = readdirSync(outDir).find((f) => f.endsWith(".md"));
29
+ const files = readdirSync(outDir);
30
+ const mdFile = files.find((f) => f.endsWith(".md"));
30
31
  if (!mdFile)
31
32
  return null;
32
- return readFileSync(path.join(outDir, mdFile), "utf-8");
33
+ const markdown = readFileSync(path.join(outDir, mdFile), "utf-8");
34
+ const imageExtensions = new Set([".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp"]);
35
+ const images = new Map();
36
+ for (const file of files) {
37
+ if (imageExtensions.has(path.extname(file).toLowerCase())) {
38
+ images.set(file, readFileSync(path.join(outDir, file)));
39
+ }
40
+ }
41
+ return { markdown, images };
33
42
  }
34
43
  catch {
35
44
  return null;
@@ -38,6 +47,33 @@ export async function convertPdfToMarkdown(pdfPath) {
38
47
  rmSync(outDir, { recursive: true, force: true });
39
48
  }
40
49
  }
50
+ /**
51
+ * Save a ConvertResult to disk: writes the markdown file and any extracted images.
52
+ * Images are stored in `filesDir/<id>/` and image references in the markdown are
53
+ * rewritten to use the `<id>/` prefix.
54
+ */
55
+ export function saveConvertResult(filesDir, id, result) {
56
+ let { markdown } = result;
57
+ if (result.images.size > 0) {
58
+ const imageSubDir = path.join(filesDir, id);
59
+ mkdirSync(imageSubDir, { recursive: true });
60
+ for (const [filename, data] of result.images) {
61
+ writeFileSync(path.join(imageSubDir, filename), data);
62
+ // Rewrite image/link references: ](filename) → ](<id>/filename)
63
+ markdown = markdown.replaceAll(`](${filename})`, `](${id}/${filename})`);
64
+ }
65
+ }
66
+ writeFileSync(path.join(filesDir, `${id}.md`), markdown, "utf-8");
67
+ }
68
+ /**
69
+ * Remove the extracted images directory for a literature, if it exists.
70
+ */
71
+ export function removeImageDir(filesDir, id) {
72
+ const imageDir = path.join(filesDir, id);
73
+ if (existsSync(imageDir)) {
74
+ rmSync(imageDir, { recursive: true, force: true });
75
+ }
76
+ }
41
77
  // ─── Internal ────────────────────────────────────────────
42
78
  let cachedAvailability;
43
79
  async function detectAvailability() {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "paper-manager",
3
- "version": "0.10.0",
3
+ "version": "0.10.2",
4
4
  "description": "A paper management system.",
5
5
  "keywords": [],
6
6
  "homepage": "https://github.com/EurFelux/paper-manager",