@ncukondo/reference-manager 0.29.5 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunks/{SearchableMultiSelect-CJQVTETt.js → SearchableMultiSelect-CpCrb-G1.js} +2 -2
- package/dist/chunks/{SearchableMultiSelect-CJQVTETt.js.map → SearchableMultiSelect-CpCrb-G1.js.map} +1 -1
- package/dist/chunks/{action-menu-BD9ZISJ4.js → action-menu-DwHspdjM.js} +3 -3
- package/dist/chunks/{action-menu-BD9ZISJ4.js.map → action-menu-DwHspdjM.js.map} +1 -1
- package/dist/chunks/{checker-hg3aQ_DJ.js → checker-DhHnmgq0.js} +4 -4
- package/dist/chunks/{checker-hg3aQ_DJ.js.map → checker-DhHnmgq0.js.map} +1 -1
- package/dist/chunks/{crossref-client-DozuWO_S.js → crossref-client-DG4AA_Ax.js} +2 -2
- package/dist/chunks/{crossref-client-DozuWO_S.js.map → crossref-client-DG4AA_Ax.js.map} +1 -1
- package/dist/chunks/{fix-interaction-BWrqxar5.js → fix-interaction-CIstlQZN.js} +5 -5
- package/dist/chunks/{fix-interaction-BWrqxar5.js.map → fix-interaction-CIstlQZN.js.map} +1 -1
- package/dist/chunks/{index-Cjeu8f2F.js → index-4KRTx7Fg.js} +3 -3
- package/dist/chunks/index-4KRTx7Fg.js.map +1 -0
- package/dist/chunks/{index-CPAAAjz7.js → index-AvQ2XqcY.js} +4 -4
- package/dist/chunks/{index-CPAAAjz7.js.map → index-AvQ2XqcY.js.map} +1 -1
- package/dist/chunks/{index-G9TlyVWo.js → index-C7BVI2qL.js} +71 -41
- package/dist/chunks/index-C7BVI2qL.js.map +1 -0
- package/dist/chunks/{index-6Aild0Uk.js → index-DoOmAYKd.js} +450 -80
- package/dist/chunks/index-DoOmAYKd.js.map +1 -0
- package/dist/chunks/{loader-B6sytmQd.js → loader-6TuXzBbw.js} +44 -1
- package/dist/chunks/loader-6TuXzBbw.js.map +1 -0
- package/dist/chunks/{pubmed-client-DJdEV1PO.js → pubmed-client-CyaNS4Y1.js} +2 -2
- package/dist/chunks/{pubmed-client-DJdEV1PO.js.map → pubmed-client-CyaNS4Y1.js.map} +1 -1
- package/dist/chunks/{reference-select-Bb4dUnM2.js → reference-select-CES2SpzK.js} +3 -3
- package/dist/chunks/{reference-select-Bb4dUnM2.js.map → reference-select-CES2SpzK.js.map} +1 -1
- package/dist/chunks/{style-select-DQGJOnnP.js → style-select-DrECo2bW.js} +3 -3
- package/dist/chunks/{style-select-DQGJOnnP.js.map → style-select-DrECo2bW.js.map} +1 -1
- package/dist/cli/commands/fulltext.d.ts +8 -1
- package/dist/cli/commands/fulltext.d.ts.map +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/spawn-args.d.ts.map +1 -1
- package/dist/cli.js +1 -1
- package/dist/config/defaults.d.ts.map +1 -1
- package/dist/config/loader.d.ts.map +1 -1
- package/dist/config/schema.d.ts +77 -1
- package/dist/config/schema.d.ts.map +1 -1
- package/dist/features/operations/fulltext/builtin-converters.d.ts +13 -0
- package/dist/features/operations/fulltext/builtin-converters.d.ts.map +1 -0
- package/dist/features/operations/fulltext/command-template.d.ts +16 -0
- package/dist/features/operations/fulltext/command-template.d.ts.map +1 -0
- package/dist/features/operations/fulltext/convert.d.ts +16 -1
- package/dist/features/operations/fulltext/convert.d.ts.map +1 -1
- package/dist/features/operations/fulltext/converter-check.d.ts +8 -0
- package/dist/features/operations/fulltext/converter-check.d.ts.map +1 -0
- package/dist/features/operations/fulltext/converter-resolver.d.ts +20 -0
- package/dist/features/operations/fulltext/converter-resolver.d.ts.map +1 -0
- package/dist/features/operations/fulltext/custom-converter.d.ts +17 -0
- package/dist/features/operations/fulltext/custom-converter.d.ts.map +1 -0
- package/dist/features/operations/fulltext/fetch.d.ts.map +1 -1
- package/dist/features/operations/fulltext/pdf-converter.d.ts +39 -0
- package/dist/features/operations/fulltext/pdf-converter.d.ts.map +1 -0
- package/dist/index.js +1 -1
- package/dist/mcp/tools/fulltext.d.ts +4 -0
- package/dist/mcp/tools/fulltext.d.ts.map +1 -1
- package/dist/server/routes/references.d.ts.map +1 -1
- package/dist/server.js +1 -1
- package/package.json +1 -1
- package/dist/chunks/index-6Aild0Uk.js.map +0 -1
- package/dist/chunks/index-Cjeu8f2F.js.map +0 -1
- package/dist/chunks/index-G9TlyVWo.js.map +0 -1
- package/dist/chunks/loader-B6sytmQd.js.map +0 -1
|
@@ -9,11 +9,11 @@ import "@citation-js/plugin-bibtex";
|
|
|
9
9
|
import "@citation-js/plugin-ris";
|
|
10
10
|
import { z } from "zod";
|
|
11
11
|
import * as path from "node:path";
|
|
12
|
-
import path__default, { extname, join, dirname } from "node:path";
|
|
12
|
+
import path__default, { extname, join, dirname, basename } from "node:path";
|
|
13
13
|
import "@citation-js/plugin-csl";
|
|
14
|
-
import fs__default, { stat, rename, copyFile, unlink, rm, readFile, mkdir, writeFile,
|
|
14
|
+
import fs__default, { stat, rename, copyFile, unlink, rm, readFile, mkdir, writeFile, access, mkdtemp, rmdir } from "node:fs/promises";
|
|
15
15
|
import { tmpdir } from "node:os";
|
|
16
|
-
import "node:child_process";
|
|
16
|
+
import { exec, execFile } from "node:child_process";
|
|
17
17
|
import "node:crypto";
|
|
18
18
|
function slugifyLabel(label) {
|
|
19
19
|
return label.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/-+/g, "-").replace(/^-|-$/g, "");
|
|
@@ -31,16 +31,16 @@ function parseFilename(filename) {
|
|
|
31
31
|
}
|
|
32
32
|
const ext = path__default.extname(filename);
|
|
33
33
|
const extWithoutDot = ext.startsWith(".") ? ext.slice(1) : ext;
|
|
34
|
-
const
|
|
35
|
-
const firstHyphenIndex =
|
|
34
|
+
const basename2 = ext ? filename.slice(0, -ext.length) : filename;
|
|
35
|
+
const firstHyphenIndex = basename2.indexOf("-");
|
|
36
36
|
if (firstHyphenIndex === -1) {
|
|
37
37
|
return {
|
|
38
|
-
role:
|
|
38
|
+
role: basename2,
|
|
39
39
|
ext: extWithoutDot
|
|
40
40
|
};
|
|
41
41
|
}
|
|
42
|
-
const role =
|
|
43
|
-
const label =
|
|
42
|
+
const role = basename2.slice(0, firstHyphenIndex);
|
|
43
|
+
const label = basename2.slice(firstHyphenIndex + 1);
|
|
44
44
|
if (label) {
|
|
45
45
|
return {
|
|
46
46
|
role,
|
|
@@ -10313,6 +10313,393 @@ async function fulltextDiscover(library, options) {
|
|
|
10313
10313
|
}
|
|
10314
10314
|
return discoverResult;
|
|
10315
10315
|
}
|
|
10316
|
+
function shellQuote(value) {
|
|
10317
|
+
return `'${value.replaceAll("'", "'\\''")}'`;
|
|
10318
|
+
}
|
|
10319
|
+
function expandTemplate(template, vars) {
|
|
10320
|
+
return template.replaceAll("{input_dir}", shellQuote(dirname(vars.input))).replaceAll("{input_name}", shellQuote(basename(vars.input))).replaceAll("{output_name}", shellQuote(basename(vars.output))).replaceAll("{input}", shellQuote(vars.input)).replaceAll("{output}", shellQuote(vars.output));
|
|
10321
|
+
}
|
|
10322
|
+
async function isCommandAvailable(command) {
|
|
10323
|
+
const lookupCmd = process.platform === "win32" ? "where" : "which";
|
|
10324
|
+
return new Promise((resolve) => {
|
|
10325
|
+
execFile(lookupCmd, [command], (err) => {
|
|
10326
|
+
resolve(err === null);
|
|
10327
|
+
});
|
|
10328
|
+
});
|
|
10329
|
+
}
|
|
10330
|
+
async function runCheckCommand(checkCommand) {
|
|
10331
|
+
return new Promise((resolve) => {
|
|
10332
|
+
exec(checkCommand, (err) => {
|
|
10333
|
+
resolve(err === null);
|
|
10334
|
+
});
|
|
10335
|
+
});
|
|
10336
|
+
}
|
|
10337
|
+
class CustomPdfConverter {
|
|
10338
|
+
name;
|
|
10339
|
+
config;
|
|
10340
|
+
constructor(name, config) {
|
|
10341
|
+
this.name = name;
|
|
10342
|
+
this.config = config;
|
|
10343
|
+
}
|
|
10344
|
+
async isAvailable() {
|
|
10345
|
+
const checkCmd = this.getCheckCommand();
|
|
10346
|
+
if (checkCmd) {
|
|
10347
|
+
return runCheckCommand(checkCmd);
|
|
10348
|
+
}
|
|
10349
|
+
const command = this.getCommand();
|
|
10350
|
+
const binary = command.split(/\s+/)[0] ?? command;
|
|
10351
|
+
return isCommandAvailable(binary);
|
|
10352
|
+
}
|
|
10353
|
+
async convert(pdfPath, outputPath) {
|
|
10354
|
+
const command = this.getCommand();
|
|
10355
|
+
const expanded = expandTemplate(command, { input: pdfPath, output: outputPath });
|
|
10356
|
+
const outputMode = this.config.outputMode ?? "file";
|
|
10357
|
+
const timeoutMs = (this.config.timeout ?? 300) * 1e3;
|
|
10358
|
+
try {
|
|
10359
|
+
const { stdout } = await this.execCommand(expanded, timeoutMs);
|
|
10360
|
+
if (outputMode === "stdout") {
|
|
10361
|
+
await writeFile(outputPath, stdout, "utf-8");
|
|
10362
|
+
return { success: true, outputPath };
|
|
10363
|
+
}
|
|
10364
|
+
try {
|
|
10365
|
+
await access(outputPath);
|
|
10366
|
+
} catch {
|
|
10367
|
+
return {
|
|
10368
|
+
success: false,
|
|
10369
|
+
error: `Output file was not created: ${outputPath}`,
|
|
10370
|
+
code: "output-not-created"
|
|
10371
|
+
};
|
|
10372
|
+
}
|
|
10373
|
+
return { success: true, outputPath };
|
|
10374
|
+
} catch (err) {
|
|
10375
|
+
return this.handleExecError(err);
|
|
10376
|
+
}
|
|
10377
|
+
}
|
|
10378
|
+
getCommand() {
|
|
10379
|
+
if (process.platform === "win32" && this.config.commandWindows) {
|
|
10380
|
+
return this.config.commandWindows;
|
|
10381
|
+
}
|
|
10382
|
+
return this.config.command;
|
|
10383
|
+
}
|
|
10384
|
+
getCheckCommand() {
|
|
10385
|
+
if (process.platform === "win32" && this.config.checkCommandWindows) {
|
|
10386
|
+
return this.config.checkCommandWindows;
|
|
10387
|
+
}
|
|
10388
|
+
return this.config.checkCommand;
|
|
10389
|
+
}
|
|
10390
|
+
execCommand(command, timeoutMs) {
|
|
10391
|
+
return new Promise((resolve, reject) => {
|
|
10392
|
+
exec(command, { timeout: timeoutMs }, (err, stdout, stderr) => {
|
|
10393
|
+
if (err) {
|
|
10394
|
+
reject(Object.assign(err, { stderr: stderr || err.stderr }));
|
|
10395
|
+
} else {
|
|
10396
|
+
resolve({ stdout, stderr });
|
|
10397
|
+
}
|
|
10398
|
+
});
|
|
10399
|
+
});
|
|
10400
|
+
}
|
|
10401
|
+
handleExecError(err) {
|
|
10402
|
+
const error = err;
|
|
10403
|
+
if (error.killed || error.signal === "SIGTERM") {
|
|
10404
|
+
return {
|
|
10405
|
+
success: false,
|
|
10406
|
+
error: `PDF conversion timed out after ${this.config.timeout ?? 300} seconds`,
|
|
10407
|
+
code: "timeout"
|
|
10408
|
+
};
|
|
10409
|
+
}
|
|
10410
|
+
return {
|
|
10411
|
+
success: false,
|
|
10412
|
+
error: error.message ?? "Conversion failed",
|
|
10413
|
+
code: "conversion-failed",
|
|
10414
|
+
...error.stderr ? { stderr: error.stderr } : {}
|
|
10415
|
+
};
|
|
10416
|
+
}
|
|
10417
|
+
}
|
|
10418
|
+
const BUILTIN_CONVERTER_NAMES = ["marker", "docling", "mineru", "pymupdf"];
|
|
10419
|
+
const BUILTIN_CONVERTER_INFO = {
|
|
10420
|
+
marker: {
|
|
10421
|
+
install: "pip install marker-pdf",
|
|
10422
|
+
description: "GPU recommended, best quality"
|
|
10423
|
+
},
|
|
10424
|
+
docling: {
|
|
10425
|
+
install: "pip install docling",
|
|
10426
|
+
description: "CPU OK, good tables"
|
|
10427
|
+
},
|
|
10428
|
+
mineru: {
|
|
10429
|
+
install: "pip install mineru[all]",
|
|
10430
|
+
description: "GPU recommended, fastest"
|
|
10431
|
+
},
|
|
10432
|
+
pymupdf: {
|
|
10433
|
+
install: "pip install pymupdf4llm",
|
|
10434
|
+
description: "CPU only, lightweight"
|
|
10435
|
+
}
|
|
10436
|
+
};
|
|
10437
|
+
const BUILTIN_CONFIGS = {
|
|
10438
|
+
marker: {
|
|
10439
|
+
command: "marker_single {input} --output_dir {input_dir}",
|
|
10440
|
+
checkCommand: "marker_single --help"
|
|
10441
|
+
},
|
|
10442
|
+
docling: {
|
|
10443
|
+
command: "docling --from pdf --to md --output {input_dir} {input}",
|
|
10444
|
+
checkCommand: "docling --help"
|
|
10445
|
+
},
|
|
10446
|
+
mineru: {
|
|
10447
|
+
command: "mineru -p {input} -o {input_dir} -m auto",
|
|
10448
|
+
checkCommand: "mineru --help"
|
|
10449
|
+
},
|
|
10450
|
+
pymupdf: {
|
|
10451
|
+
command: `python3 -c "import pymupdf4llm, pathlib; md=pymupdf4llm.to_markdown('{input}'); pathlib.Path('{output}').write_text(md)"`,
|
|
10452
|
+
checkCommand: 'python3 -c "import pymupdf4llm"'
|
|
10453
|
+
}
|
|
10454
|
+
};
|
|
10455
|
+
function getBuiltinConverter(name) {
|
|
10456
|
+
if (!isBuiltinName(name)) return void 0;
|
|
10457
|
+
return new CustomPdfConverter(name, BUILTIN_CONFIGS[name]);
|
|
10458
|
+
}
|
|
10459
|
+
function isBuiltinName(name) {
|
|
10460
|
+
return BUILTIN_CONVERTER_NAMES.includes(name);
|
|
10461
|
+
}
|
|
10462
|
+
async function resolveConverter(name, options) {
|
|
10463
|
+
if (name === "auto") {
|
|
10464
|
+
return resolveAuto(options);
|
|
10465
|
+
}
|
|
10466
|
+
return resolveExplicit(name, options);
|
|
10467
|
+
}
|
|
10468
|
+
async function resolveAuto(options) {
|
|
10469
|
+
const { priority, customConverters } = options;
|
|
10470
|
+
for (const converterName of priority) {
|
|
10471
|
+
const converter = getConverterByName(converterName, customConverters);
|
|
10472
|
+
if (!converter) continue;
|
|
10473
|
+
if (await converter.isAvailable()) {
|
|
10474
|
+
return { success: true, converter };
|
|
10475
|
+
}
|
|
10476
|
+
}
|
|
10477
|
+
return {
|
|
10478
|
+
success: false,
|
|
10479
|
+
code: "no-converter",
|
|
10480
|
+
error: "No PDF converter found",
|
|
10481
|
+
hints: buildNoConverterHints(priority)
|
|
10482
|
+
};
|
|
10483
|
+
}
|
|
10484
|
+
async function resolveExplicit(name, options) {
|
|
10485
|
+
const converter = getConverterByName(name, options.customConverters);
|
|
10486
|
+
if (!converter) {
|
|
10487
|
+
return {
|
|
10488
|
+
success: false,
|
|
10489
|
+
code: "not-installed",
|
|
10490
|
+
error: `PDF converter '${name}' not found`
|
|
10491
|
+
};
|
|
10492
|
+
}
|
|
10493
|
+
if (!await converter.isAvailable()) {
|
|
10494
|
+
const isBuiltin = BUILTIN_CONVERTER_NAMES.includes(name);
|
|
10495
|
+
const installHint = isBuiltin ? BUILTIN_CONVERTER_INFO[name].install : void 0;
|
|
10496
|
+
const result = {
|
|
10497
|
+
success: false,
|
|
10498
|
+
code: "not-installed",
|
|
10499
|
+
error: `PDF converter '${name}' is not installed`
|
|
10500
|
+
};
|
|
10501
|
+
if (installHint) {
|
|
10502
|
+
result.hints = `Install with: ${installHint}`;
|
|
10503
|
+
}
|
|
10504
|
+
return result;
|
|
10505
|
+
}
|
|
10506
|
+
return { success: true, converter };
|
|
10507
|
+
}
|
|
10508
|
+
function getConverterByName(name, customConverters) {
|
|
10509
|
+
const customConfig = customConverters[name];
|
|
10510
|
+
if (customConfig) {
|
|
10511
|
+
return new CustomPdfConverter(name, customConfig);
|
|
10512
|
+
}
|
|
10513
|
+
return getBuiltinConverter(name);
|
|
10514
|
+
}
|
|
10515
|
+
function buildNoConverterHints(checked) {
|
|
10516
|
+
const lines = [
|
|
10517
|
+
`Checked: ${checked.join(", ")} (none available)`,
|
|
10518
|
+
"",
|
|
10519
|
+
"Install one of the following:",
|
|
10520
|
+
""
|
|
10521
|
+
];
|
|
10522
|
+
for (const name of BUILTIN_CONVERTER_NAMES) {
|
|
10523
|
+
const info = BUILTIN_CONVERTER_INFO[name];
|
|
10524
|
+
lines.push(` ${name.padEnd(9)} ${info.install.padEnd(28)} (${info.description})`);
|
|
10525
|
+
}
|
|
10526
|
+
lines.push("", "Or configure a custom converter in your config file.");
|
|
10527
|
+
return lines.join("\n");
|
|
10528
|
+
}
|
|
10529
|
+
function findXmlFile(item) {
|
|
10530
|
+
const attachments = item.custom?.attachments;
|
|
10531
|
+
if (!attachments?.files) return void 0;
|
|
10532
|
+
const xmlFile = attachments.files.find(
|
|
10533
|
+
(f) => f.role === "fulltext" && f.filename.endsWith(".xml")
|
|
10534
|
+
);
|
|
10535
|
+
return xmlFile?.filename;
|
|
10536
|
+
}
|
|
10537
|
+
function findPdfFile(item) {
|
|
10538
|
+
const attachments = item.custom?.attachments;
|
|
10539
|
+
if (!attachments?.files) return void 0;
|
|
10540
|
+
const pdfFile = attachments.files.find(
|
|
10541
|
+
(f) => f.role === "fulltext" && f.filename.endsWith(".pdf")
|
|
10542
|
+
);
|
|
10543
|
+
return pdfFile?.filename;
|
|
10544
|
+
}
|
|
10545
|
+
function getFilePath(item, filename, fulltextDirectory) {
|
|
10546
|
+
const attachments = item.custom?.attachments;
|
|
10547
|
+
const directory = attachments?.directory ?? "";
|
|
10548
|
+
return join(fulltextDirectory, directory, filename);
|
|
10549
|
+
}
|
|
10550
|
+
async function fulltextConvert(library, options) {
|
|
10551
|
+
const { identifier, idType = "id", from } = options;
|
|
10552
|
+
const item = await library.find(identifier, { idType });
|
|
10553
|
+
if (!item) {
|
|
10554
|
+
return { success: false, error: `Reference '${identifier}' not found` };
|
|
10555
|
+
}
|
|
10556
|
+
const format = resolveFormat(item, from);
|
|
10557
|
+
if (!format.success) {
|
|
10558
|
+
return format.result;
|
|
10559
|
+
}
|
|
10560
|
+
if (format.type === "xml") {
|
|
10561
|
+
return convertXml(library, item, options);
|
|
10562
|
+
}
|
|
10563
|
+
return convertPdf(library, item, options);
|
|
10564
|
+
}
|
|
10565
|
+
function resolveFormat(item, from) {
|
|
10566
|
+
const hasXml = findXmlFile(item) !== void 0;
|
|
10567
|
+
const hasPdf = findPdfFile(item) !== void 0;
|
|
10568
|
+
if (from === "xml") {
|
|
10569
|
+
if (!hasXml) {
|
|
10570
|
+
return {
|
|
10571
|
+
success: false,
|
|
10572
|
+
result: {
|
|
10573
|
+
success: false,
|
|
10574
|
+
error: `No PMC XML file attached to '${item.id}'`
|
|
10575
|
+
}
|
|
10576
|
+
};
|
|
10577
|
+
}
|
|
10578
|
+
return { success: true, type: "xml" };
|
|
10579
|
+
}
|
|
10580
|
+
if (from === "pdf") {
|
|
10581
|
+
if (!hasPdf) {
|
|
10582
|
+
return {
|
|
10583
|
+
success: false,
|
|
10584
|
+
result: {
|
|
10585
|
+
success: false,
|
|
10586
|
+
error: `No PDF file attached to '${item.id}'`,
|
|
10587
|
+
code: "no-pdf",
|
|
10588
|
+
hints: [
|
|
10589
|
+
"This reference has no fulltext PDF. You can:",
|
|
10590
|
+
` 1. Download OA fulltext: ref fulltext fetch ${item.id}`,
|
|
10591
|
+
` 2. Attach a local PDF: ref fulltext attach ${item.id} /path/to/paper.pdf`
|
|
10592
|
+
].join("\n")
|
|
10593
|
+
}
|
|
10594
|
+
};
|
|
10595
|
+
}
|
|
10596
|
+
return { success: true, type: "pdf" };
|
|
10597
|
+
}
|
|
10598
|
+
if (hasXml) return { success: true, type: "xml" };
|
|
10599
|
+
if (hasPdf) return { success: true, type: "pdf" };
|
|
10600
|
+
return {
|
|
10601
|
+
success: false,
|
|
10602
|
+
result: {
|
|
10603
|
+
success: false,
|
|
10604
|
+
error: `No PMC XML file attached to '${item.id}'`
|
|
10605
|
+
}
|
|
10606
|
+
};
|
|
10607
|
+
}
|
|
10608
|
+
async function convertXml(library, item, options) {
|
|
10609
|
+
const { identifier, idType = "id", fulltextDirectory } = options;
|
|
10610
|
+
const xmlFilename = findXmlFile(item);
|
|
10611
|
+
if (!xmlFilename) {
|
|
10612
|
+
return { success: false, error: `No PMC XML file attached to '${identifier}'` };
|
|
10613
|
+
}
|
|
10614
|
+
const xmlPath = getFilePath(item, xmlFilename, fulltextDirectory);
|
|
10615
|
+
try {
|
|
10616
|
+
await access(xmlPath);
|
|
10617
|
+
} catch {
|
|
10618
|
+
return { success: false, error: `XML file not found on disk: ${xmlPath}` };
|
|
10619
|
+
}
|
|
10620
|
+
const mdPath = xmlPath.replace(/\.xml$/, ".md");
|
|
10621
|
+
const convertResult2 = await convertPmcXmlToMarkdown(xmlPath, mdPath);
|
|
10622
|
+
if (!convertResult2.success) {
|
|
10623
|
+
return {
|
|
10624
|
+
success: false,
|
|
10625
|
+
error: `Failed to convert PMC XML to Markdown: ${convertResult2.error ?? "unknown error"}`
|
|
10626
|
+
};
|
|
10627
|
+
}
|
|
10628
|
+
const attachResult = await fulltextAttach(library, {
|
|
10629
|
+
identifier,
|
|
10630
|
+
idType,
|
|
10631
|
+
filePath: mdPath,
|
|
10632
|
+
type: "markdown",
|
|
10633
|
+
force: true,
|
|
10634
|
+
move: true,
|
|
10635
|
+
fulltextDirectory
|
|
10636
|
+
});
|
|
10637
|
+
if (!attachResult.success) {
|
|
10638
|
+
return { success: false, error: attachResult.error ?? "Attach failed" };
|
|
10639
|
+
}
|
|
10640
|
+
return {
|
|
10641
|
+
success: true,
|
|
10642
|
+
filename: attachResult.filename ?? "fulltext.md"
|
|
10643
|
+
};
|
|
10644
|
+
}
|
|
10645
|
+
async function convertPdf(library, item, options) {
|
|
10646
|
+
const { identifier, idType = "id", fulltextDirectory, fulltextConfig } = options;
|
|
10647
|
+
const converterName = options.converter ?? fulltextConfig?.pdfConverter ?? "auto";
|
|
10648
|
+
const pdfFilename = findPdfFile(item);
|
|
10649
|
+
if (!pdfFilename) {
|
|
10650
|
+
return { success: false, error: `No PDF file attached to '${identifier}'`, code: "no-pdf" };
|
|
10651
|
+
}
|
|
10652
|
+
const pdfPath = getFilePath(item, pdfFilename, fulltextDirectory);
|
|
10653
|
+
try {
|
|
10654
|
+
await access(pdfPath);
|
|
10655
|
+
} catch {
|
|
10656
|
+
return { success: false, error: `PDF file not found on disk: ${pdfPath}` };
|
|
10657
|
+
}
|
|
10658
|
+
const resolveResult = await resolveConverter(converterName, {
|
|
10659
|
+
priority: fulltextConfig?.pdfConverterPriority ?? ["marker", "docling", "mineru", "pymupdf"],
|
|
10660
|
+
customConverters: fulltextConfig?.converters ?? {}
|
|
10661
|
+
});
|
|
10662
|
+
if (!resolveResult.success) {
|
|
10663
|
+
const result = {
|
|
10664
|
+
success: false,
|
|
10665
|
+
error: resolveResult.error,
|
|
10666
|
+
code: resolveResult.code
|
|
10667
|
+
};
|
|
10668
|
+
if (resolveResult.hints) {
|
|
10669
|
+
result.hints = resolveResult.hints;
|
|
10670
|
+
}
|
|
10671
|
+
return result;
|
|
10672
|
+
}
|
|
10673
|
+
const mdPath = pdfPath.replace(/\.pdf$/, ".md");
|
|
10674
|
+
const pdfResult = await resolveResult.converter.convert(pdfPath, mdPath);
|
|
10675
|
+
if (!pdfResult.success) {
|
|
10676
|
+
const result = {
|
|
10677
|
+
success: false,
|
|
10678
|
+
error: `Failed to convert PDF to Markdown using ${resolveResult.converter.name}: ${pdfResult.error}`,
|
|
10679
|
+
code: pdfResult.code
|
|
10680
|
+
};
|
|
10681
|
+
if (pdfResult.stderr) {
|
|
10682
|
+
result.stderr = pdfResult.stderr;
|
|
10683
|
+
}
|
|
10684
|
+
return result;
|
|
10685
|
+
}
|
|
10686
|
+
const attachResult = await fulltextAttach(library, {
|
|
10687
|
+
identifier,
|
|
10688
|
+
idType,
|
|
10689
|
+
filePath: mdPath,
|
|
10690
|
+
type: "markdown",
|
|
10691
|
+
force: true,
|
|
10692
|
+
move: true,
|
|
10693
|
+
fulltextDirectory
|
|
10694
|
+
});
|
|
10695
|
+
if (!attachResult.success) {
|
|
10696
|
+
return { success: false, error: attachResult.error ?? "Attach failed" };
|
|
10697
|
+
}
|
|
10698
|
+
return {
|
|
10699
|
+
success: true,
|
|
10700
|
+
filename: attachResult.filename ?? "fulltext.md"
|
|
10701
|
+
};
|
|
10702
|
+
}
|
|
10316
10703
|
function extractPmcidFromLocations(locations) {
|
|
10317
10704
|
for (const loc of locations) {
|
|
10318
10705
|
if (loc.source !== "pmc") continue;
|
|
@@ -10533,7 +10920,8 @@ async function fulltextFetch(library, options) {
|
|
|
10533
10920
|
identifier,
|
|
10534
10921
|
idType,
|
|
10535
10922
|
fulltextDirectory,
|
|
10536
|
-
force: force ?? false
|
|
10923
|
+
force: force ?? false,
|
|
10924
|
+
fulltextConfig
|
|
10537
10925
|
};
|
|
10538
10926
|
try {
|
|
10539
10927
|
const result = await downloadAndAttach(
|
|
@@ -10573,6 +10961,19 @@ function buildDownloadError(locations, identifier, attempts) {
|
|
|
10573
10961
|
}
|
|
10574
10962
|
return { success: false, error: `Failed to download fulltext for ${identifier}`, hint };
|
|
10575
10963
|
}
|
|
10964
|
+
async function tryMarkdownConversion(locations, pmcid, tempDir, ctx, attachedFiles, attempts) {
|
|
10965
|
+
if (pmcid) {
|
|
10966
|
+
const mdAttached = await tryDownloadPmcXmlAndConvert(pmcid, tempDir, ctx, attempts);
|
|
10967
|
+
if (mdAttached) return "pmc";
|
|
10968
|
+
}
|
|
10969
|
+
const arxivResult = await tryArxivHtmlFromLocations(locations, tempDir, ctx, attempts);
|
|
10970
|
+
if (arxivResult.attached) return arxivResult.source;
|
|
10971
|
+
if (attachedFiles.includes("pdf") && ctx.fulltextConfig.preferredType === "markdown") {
|
|
10972
|
+
const converted = await tryPdfToMarkdownConvert(ctx);
|
|
10973
|
+
if (converted) return "pdf-convert";
|
|
10974
|
+
}
|
|
10975
|
+
return void 0;
|
|
10976
|
+
}
|
|
10576
10977
|
async function downloadAndAttach(locations, pmcid, tempDir, ctx, referenceId, identifier) {
|
|
10577
10978
|
const attachedFiles = [];
|
|
10578
10979
|
let usedSource = "";
|
|
@@ -10582,19 +10983,17 @@ async function downloadAndAttach(locations, pmcid, tempDir, ctx, referenceId, id
|
|
|
10582
10983
|
attachedFiles.push("pdf");
|
|
10583
10984
|
usedSource = pdfResult.source;
|
|
10584
10985
|
}
|
|
10585
|
-
|
|
10586
|
-
|
|
10587
|
-
|
|
10588
|
-
|
|
10589
|
-
|
|
10590
|
-
|
|
10591
|
-
|
|
10592
|
-
|
|
10593
|
-
|
|
10594
|
-
|
|
10595
|
-
|
|
10596
|
-
if (!usedSource) usedSource = arxivResult.source;
|
|
10597
|
-
}
|
|
10986
|
+
const mdSource = await tryMarkdownConversion(
|
|
10987
|
+
locations,
|
|
10988
|
+
pmcid,
|
|
10989
|
+
tempDir,
|
|
10990
|
+
ctx,
|
|
10991
|
+
attachedFiles,
|
|
10992
|
+
attempts
|
|
10993
|
+
);
|
|
10994
|
+
if (mdSource) {
|
|
10995
|
+
attachedFiles.push("markdown");
|
|
10996
|
+
if (!usedSource) usedSource = mdSource;
|
|
10598
10997
|
}
|
|
10599
10998
|
if (attachedFiles.length > 0) {
|
|
10600
10999
|
return { success: true, referenceId, source: usedSource, attachedFiles };
|
|
@@ -10604,64 +11003,24 @@ async function downloadAndAttach(locations, pmcid, tempDir, ctx, referenceId, id
|
|
|
10604
11003
|
attempts: attempts.length > 0 ? attempts : void 0
|
|
10605
11004
|
};
|
|
10606
11005
|
}
|
|
10607
|
-
|
|
10608
|
-
__proto__: null,
|
|
10609
|
-
fulltextFetch
|
|
10610
|
-
}, Symbol.toStringTag, { value: "Module" }));
|
|
10611
|
-
function findXmlFile(item) {
|
|
10612
|
-
const attachments = item.custom?.attachments;
|
|
10613
|
-
if (!attachments?.files) return void 0;
|
|
10614
|
-
const xmlFile = attachments.files.find(
|
|
10615
|
-
(f) => f.role === "fulltext" && f.filename.endsWith(".xml")
|
|
10616
|
-
);
|
|
10617
|
-
return xmlFile?.filename;
|
|
10618
|
-
}
|
|
10619
|
-
function getXmlPath(item, xmlFilename, fulltextDirectory) {
|
|
10620
|
-
const attachments = item.custom?.attachments;
|
|
10621
|
-
const directory = attachments?.directory ?? "";
|
|
10622
|
-
return join(fulltextDirectory, directory, xmlFilename);
|
|
10623
|
-
}
|
|
10624
|
-
async function fulltextConvert(library, options) {
|
|
10625
|
-
const { identifier, idType = "id", fulltextDirectory } = options;
|
|
10626
|
-
const item = await library.find(identifier, { idType });
|
|
10627
|
-
if (!item) {
|
|
10628
|
-
return { success: false, error: `Reference '${identifier}' not found` };
|
|
10629
|
-
}
|
|
10630
|
-
const xmlFilename = findXmlFile(item);
|
|
10631
|
-
if (!xmlFilename) {
|
|
10632
|
-
return { success: false, error: `No PMC XML file attached to '${identifier}'` };
|
|
10633
|
-
}
|
|
10634
|
-
const xmlPath = getXmlPath(item, xmlFilename, fulltextDirectory);
|
|
11006
|
+
async function tryPdfToMarkdownConvert(ctx) {
|
|
10635
11007
|
try {
|
|
10636
|
-
await
|
|
11008
|
+
const result = await fulltextConvert(ctx.library, {
|
|
11009
|
+
identifier: ctx.identifier,
|
|
11010
|
+
idType: ctx.idType,
|
|
11011
|
+
fulltextDirectory: ctx.fulltextDirectory,
|
|
11012
|
+
from: "pdf",
|
|
11013
|
+
fulltextConfig: ctx.fulltextConfig
|
|
11014
|
+
});
|
|
11015
|
+
return result.success;
|
|
10637
11016
|
} catch {
|
|
10638
|
-
return
|
|
10639
|
-
}
|
|
10640
|
-
const mdPath = xmlPath.replace(/\.xml$/, ".md");
|
|
10641
|
-
const convertResult2 = await convertPmcXmlToMarkdown(xmlPath, mdPath);
|
|
10642
|
-
if (!convertResult2.success) {
|
|
10643
|
-
return {
|
|
10644
|
-
success: false,
|
|
10645
|
-
error: `Failed to convert PMC XML to Markdown: ${convertResult2.error ?? "unknown error"}`
|
|
10646
|
-
};
|
|
10647
|
-
}
|
|
10648
|
-
const attachResult = await fulltextAttach(library, {
|
|
10649
|
-
identifier,
|
|
10650
|
-
idType,
|
|
10651
|
-
filePath: mdPath,
|
|
10652
|
-
type: "markdown",
|
|
10653
|
-
force: true,
|
|
10654
|
-
move: true,
|
|
10655
|
-
fulltextDirectory
|
|
10656
|
-
});
|
|
10657
|
-
if (!attachResult.success) {
|
|
10658
|
-
return { success: false, error: attachResult.error ?? "Attach failed" };
|
|
11017
|
+
return false;
|
|
10659
11018
|
}
|
|
10660
|
-
return {
|
|
10661
|
-
success: true,
|
|
10662
|
-
filename: attachResult.filename ?? "fulltext.md"
|
|
10663
|
-
};
|
|
10664
11019
|
}
|
|
11020
|
+
const fetch$1 = /* @__PURE__ */ Object.freeze(/* @__PURE__ */ Object.defineProperty({
|
|
11021
|
+
__proto__: null,
|
|
11022
|
+
fulltextFetch
|
|
11023
|
+
}, Symbol.toStringTag, { value: "Module" }));
|
|
10665
11024
|
function formatFirstAuthor(item) {
|
|
10666
11025
|
if (!item.author || item.author.length === 0) {
|
|
10667
11026
|
return "Unknown";
|
|
@@ -12421,7 +12780,7 @@ function createAddRoute(library, config) {
|
|
|
12421
12780
|
}
|
|
12422
12781
|
const CHECK_CONCURRENCY = 5;
|
|
12423
12782
|
async function checkReferences(library, options) {
|
|
12424
|
-
const { checkReference } = await import("./checker-
|
|
12783
|
+
const { checkReference } = await import("./checker-DhHnmgq0.js");
|
|
12425
12784
|
const save = options.save !== false;
|
|
12426
12785
|
const skipDays = options.skipDays ?? 7;
|
|
12427
12786
|
const items = await resolveItems(library, options);
|
|
@@ -12940,13 +13299,24 @@ function createReferencesRoute(library, config) {
|
|
|
12940
13299
|
});
|
|
12941
13300
|
route.post("/uuid/:uuid/fulltext/convert", async (c) => {
|
|
12942
13301
|
const uuid = c.req.param("uuid");
|
|
13302
|
+
const body = await c.req.json().catch(() => ({}));
|
|
13303
|
+
const validFrom = ["xml", "pdf", void 0];
|
|
13304
|
+
if (body.from !== void 0 && !validFrom.includes(body.from)) {
|
|
13305
|
+
return c.json({ error: `Invalid 'from' value: must be 'xml', 'pdf', or omitted` }, 400);
|
|
13306
|
+
}
|
|
13307
|
+
if (body.converter !== void 0 && typeof body.converter !== "string") {
|
|
13308
|
+
return c.json({ error: `Invalid 'converter' value: must be a string or omitted` }, 400);
|
|
13309
|
+
}
|
|
12943
13310
|
const result = await fulltextConvert(library, {
|
|
12944
13311
|
identifier: uuid,
|
|
12945
13312
|
idType: "uuid",
|
|
12946
|
-
fulltextDirectory: config.attachments.directory
|
|
13313
|
+
fulltextDirectory: config.attachments.directory,
|
|
13314
|
+
from: body.from,
|
|
13315
|
+
converter: body.converter,
|
|
13316
|
+
fulltextConfig: config.fulltext
|
|
12947
13317
|
});
|
|
12948
13318
|
if (!result.success) {
|
|
12949
|
-
return c.json({ error: result.error }, 400);
|
|
13319
|
+
return c.json({ error: result.error, code: result.code, hints: result.hints }, 400);
|
|
12950
13320
|
}
|
|
12951
13321
|
return c.json(result);
|
|
12952
13322
|
});
|
|
@@ -13100,4 +13470,4 @@ export {
|
|
|
13100
13470
|
fetcher as y,
|
|
13101
13471
|
add as z
|
|
13102
13472
|
};
|
|
13103
|
-
//# sourceMappingURL=index-
|
|
13473
|
+
//# sourceMappingURL=index-DoOmAYKd.js.map
|