paper-manager 0.10.0 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/commands/dep.js
CHANGED
|
@@ -6,15 +6,21 @@ const KNOWN_DEPS = new Set(["opendataloader"]);
|
|
|
6
6
|
export function createDepCommand() {
|
|
7
7
|
const dep = new Command("dep").description("Manage external dependencies");
|
|
8
8
|
dep
|
|
9
|
-
.command("check
|
|
9
|
+
.command("check [dep]")
|
|
10
10
|
.description("Check if an external dependency is available")
|
|
11
11
|
.action(async (depName) => {
|
|
12
|
-
if (
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
12
|
+
if (depName != null) {
|
|
13
|
+
if (!KNOWN_DEPS.has(depName)) {
|
|
14
|
+
log.error(`Unknown dependency: ${depName}`);
|
|
15
|
+
log.step(`Available: ${[...KNOWN_DEPS].join(", ")}`);
|
|
16
|
+
process.exit(1);
|
|
17
|
+
}
|
|
18
|
+
if (depName === "opendataloader") {
|
|
19
|
+
await checkOpendataLoader();
|
|
20
|
+
}
|
|
16
21
|
}
|
|
17
|
-
|
|
22
|
+
else {
|
|
23
|
+
// Check all known dependencies
|
|
18
24
|
await checkOpendataLoader();
|
|
19
25
|
}
|
|
20
26
|
});
|
|
@@ -6,6 +6,7 @@ import * as projectKb from "../db/project/knowledge-bases.js";
|
|
|
6
6
|
import * as projectLit from "../db/project/literatures.js";
|
|
7
7
|
import * as userKb from "../db/user/knowledge-bases.js";
|
|
8
8
|
import * as userLit from "../db/user/literatures.js";
|
|
9
|
+
import { removeImageDir } from "../extractor/markdown.js";
|
|
9
10
|
import { log } from "../logger.js";
|
|
10
11
|
import { queryVectorStore } from "../vector-store/index.js";
|
|
11
12
|
function resolveKnowledgeBase(id) {
|
|
@@ -136,7 +137,7 @@ export function createKnowledgeBaseCommand() {
|
|
|
136
137
|
const kbOps = scope === "project" ? projectKb : userKb;
|
|
137
138
|
// 1. Get all literatures in this KB
|
|
138
139
|
const literatures = litOps.getLiteraturesByKnowledgeBaseId(id);
|
|
139
|
-
// 2. Delete stored files
|
|
140
|
+
// 2. Delete stored files and image directories
|
|
140
141
|
const filesDir = getFilesDir(baseDir);
|
|
141
142
|
if (fs.existsSync(filesDir)) {
|
|
142
143
|
for (const lit of literatures) {
|
|
@@ -145,6 +146,7 @@ export function createKnowledgeBaseCommand() {
|
|
|
145
146
|
fs.unlinkSync(path.join(filesDir, entry.name));
|
|
146
147
|
}
|
|
147
148
|
}
|
|
149
|
+
removeImageDir(filesDir, lit.id);
|
|
148
150
|
}
|
|
149
151
|
}
|
|
150
152
|
// 3. Delete literatures from DB
|
|
@@ -9,7 +9,7 @@ import * as projectLit from "../db/project/literatures.js";
|
|
|
9
9
|
import * as userKb from "../db/user/knowledge-bases.js";
|
|
10
10
|
import * as userLit from "../db/user/literatures.js";
|
|
11
11
|
import { extractContent, extractPdfMetadata } from "../extractor/index.js";
|
|
12
|
-
import { convertPdfToMarkdown, isOpendataLoaderAvailable } from "../extractor/markdown.js";
|
|
12
|
+
import { convertPdfToMarkdown, isOpendataLoaderAvailable, removeImageDir, saveConvertResult, } from "../extractor/markdown.js";
|
|
13
13
|
import { log } from "../logger.js";
|
|
14
14
|
import { splitDocuments } from "../text-splitter.js";
|
|
15
15
|
import { addDocuments, createVectorStore } from "../vector-store/index.js";
|
|
@@ -97,9 +97,9 @@ export function createLiteratureCommand() {
|
|
|
97
97
|
fs.copyFileSync(absolutePath, path.join(filesDir, `${literature.id}${ext}`));
|
|
98
98
|
// Convert PDF to Markdown if opendataloader is available
|
|
99
99
|
if (isPdf && (await isOpendataLoaderAvailable())) {
|
|
100
|
-
const
|
|
101
|
-
if (
|
|
102
|
-
|
|
100
|
+
const result = await convertPdfToMarkdown(absolutePath);
|
|
101
|
+
if (result) {
|
|
102
|
+
saveConvertResult(filesDir, literature.id, result);
|
|
103
103
|
log.step("Converted to Markdown via opendataloader-pdf.");
|
|
104
104
|
}
|
|
105
105
|
}
|
|
@@ -164,12 +164,12 @@ export function createLiteratureCommand() {
|
|
|
164
164
|
process.exit(1);
|
|
165
165
|
}
|
|
166
166
|
log.info("Converting PDF to Markdown...");
|
|
167
|
-
const
|
|
168
|
-
if (!
|
|
167
|
+
const result = await convertPdfToMarkdown(path.join(filesDir, pdfFile));
|
|
168
|
+
if (!result) {
|
|
169
169
|
log.error("Conversion failed.");
|
|
170
170
|
process.exit(1);
|
|
171
171
|
}
|
|
172
|
-
|
|
172
|
+
saveConvertResult(filesDir, id, result);
|
|
173
173
|
log.success(`Markdown saved: ${id}.md`);
|
|
174
174
|
});
|
|
175
175
|
// ─── lit remove ────────────────────────────────────────────
|
|
@@ -190,7 +190,7 @@ export function createLiteratureCommand() {
|
|
|
190
190
|
log.error(`Literature not found: ${id}`);
|
|
191
191
|
process.exit(1);
|
|
192
192
|
}
|
|
193
|
-
// Delete stored
|
|
193
|
+
// Delete stored files and image directory
|
|
194
194
|
const filesDir = getFilesDir(baseDir);
|
|
195
195
|
if (fs.existsSync(filesDir)) {
|
|
196
196
|
for (const entry of fs.readdirSync(filesDir, { withFileTypes: true })) {
|
|
@@ -198,6 +198,7 @@ export function createLiteratureCommand() {
|
|
|
198
198
|
fs.unlinkSync(path.join(filesDir, entry.name));
|
|
199
199
|
}
|
|
200
200
|
}
|
|
201
|
+
removeImageDir(filesDir, id);
|
|
201
202
|
}
|
|
202
203
|
// Delete literature record
|
|
203
204
|
litOps.deleteLiterature(id);
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { execFile } from "node:child_process";
|
|
2
|
-
import { mkdirSync, readdirSync, readFileSync, rmSync } from "node:fs";
|
|
2
|
+
import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
|
3
3
|
import { tmpdir } from "node:os";
|
|
4
4
|
import * as path from "node:path";
|
|
5
5
|
/**
|
|
@@ -14,7 +14,7 @@ export async function isOpendataLoaderAvailable() {
|
|
|
14
14
|
}
|
|
15
15
|
/**
|
|
16
16
|
* Convert a PDF file to Markdown using opendataloader-pdf.
|
|
17
|
-
* Returns the markdown content on success, or null on failure.
|
|
17
|
+
* Returns the markdown content and extracted images on success, or null on failure.
|
|
18
18
|
*/
|
|
19
19
|
export async function convertPdfToMarkdown(pdfPath) {
|
|
20
20
|
const outDir = path.join(tmpdir(), `odl-${Date.now()}`);
|
|
@@ -26,10 +26,19 @@ export async function convertPdfToMarkdown(pdfPath) {
|
|
|
26
26
|
format: "markdown",
|
|
27
27
|
quiet: true,
|
|
28
28
|
});
|
|
29
|
-
const
|
|
29
|
+
const files = readdirSync(outDir);
|
|
30
|
+
const mdFile = files.find((f) => f.endsWith(".md"));
|
|
30
31
|
if (!mdFile)
|
|
31
32
|
return null;
|
|
32
|
-
|
|
33
|
+
const markdown = readFileSync(path.join(outDir, mdFile), "utf-8");
|
|
34
|
+
const imageExtensions = new Set([".png", ".jpg", ".jpeg", ".gif", ".svg", ".webp", ".bmp"]);
|
|
35
|
+
const images = new Map();
|
|
36
|
+
for (const file of files) {
|
|
37
|
+
if (imageExtensions.has(path.extname(file).toLowerCase())) {
|
|
38
|
+
images.set(file, readFileSync(path.join(outDir, file)));
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return { markdown, images };
|
|
33
42
|
}
|
|
34
43
|
catch {
|
|
35
44
|
return null;
|
|
@@ -38,6 +47,33 @@ export async function convertPdfToMarkdown(pdfPath) {
|
|
|
38
47
|
rmSync(outDir, { recursive: true, force: true });
|
|
39
48
|
}
|
|
40
49
|
}
|
|
50
|
+
/**
|
|
51
|
+
* Save a ConvertResult to disk: writes the markdown file and any extracted images.
|
|
52
|
+
* Images are stored in `filesDir/<id>/` and image references in the markdown are
|
|
53
|
+
* rewritten to use the `<id>/` prefix.
|
|
54
|
+
*/
|
|
55
|
+
export function saveConvertResult(filesDir, id, result) {
|
|
56
|
+
let { markdown } = result;
|
|
57
|
+
if (result.images.size > 0) {
|
|
58
|
+
const imageSubDir = path.join(filesDir, id);
|
|
59
|
+
mkdirSync(imageSubDir, { recursive: true });
|
|
60
|
+
for (const [filename, data] of result.images) {
|
|
61
|
+
writeFileSync(path.join(imageSubDir, filename), data);
|
|
62
|
+
// Rewrite image/link references: ](filename) → ](<id>/filename)
|
|
63
|
+
markdown = markdown.replaceAll(`](${filename})`, `](${id}/${filename})`);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
writeFileSync(path.join(filesDir, `${id}.md`), markdown, "utf-8");
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Remove the extracted images directory for a literature, if it exists.
|
|
70
|
+
*/
|
|
71
|
+
export function removeImageDir(filesDir, id) {
|
|
72
|
+
const imageDir = path.join(filesDir, id);
|
|
73
|
+
if (existsSync(imageDir)) {
|
|
74
|
+
rmSync(imageDir, { recursive: true, force: true });
|
|
75
|
+
}
|
|
76
|
+
}
|
|
41
77
|
// ─── Internal ────────────────────────────────────────────
|
|
42
78
|
let cachedAvailability;
|
|
43
79
|
async function detectAvailability() {
|