paper-manager 0.10.3 → 0.10.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/dep.js +10 -0
- package/dist/extractor/markdown.js +27 -1
- package/package.json +1 -1
package/dist/commands/dep.js
CHANGED
|
@@ -31,6 +31,7 @@ async function checkOpendataLoader() {
|
|
|
31
31
|
const status = await checkOpendataLoaderStatus();
|
|
32
32
|
const pkgIcon = status.packageInstalled ? chalk.green("✔") : chalk.red("✖");
|
|
33
33
|
const javaIcon = status.javaAvailable ? chalk.green("✔") : chalk.red("✖");
|
|
34
|
+
const hybridIcon = status.hybridBackendAvailable ? chalk.green("✔") : chalk.dim("○");
|
|
34
35
|
log.plain(` ${pkgIcon} @opendataloader/pdf package`);
|
|
35
36
|
if (status.javaAvailable) {
|
|
36
37
|
log.plain(` ${javaIcon} Java runtime (${status.javaVersion})`);
|
|
@@ -38,9 +39,18 @@ async function checkOpendataLoader() {
|
|
|
38
39
|
else {
|
|
39
40
|
log.plain(` ${javaIcon} Java runtime (not found)`);
|
|
40
41
|
}
|
|
42
|
+
if (status.hybridBackendAvailable) {
|
|
43
|
+
log.plain(` ${hybridIcon} Hybrid backend (localhost:5002)`);
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
log.plain(` ${hybridIcon} Hybrid backend (not running, optional)`);
|
|
47
|
+
}
|
|
41
48
|
log.newline();
|
|
42
49
|
if (status.packageInstalled && status.javaAvailable) {
|
|
43
50
|
log.success("opendataloader-pdf is ready.");
|
|
51
|
+
if (status.hybridBackendAvailable) {
|
|
52
|
+
log.step("Hybrid mode enabled — using docling backend for improved extraction.");
|
|
53
|
+
}
|
|
44
54
|
}
|
|
45
55
|
else {
|
|
46
56
|
log.error("opendataloader-pdf is not available.");
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { execFile } from "node:child_process";
|
|
2
2
|
import { existsSync, mkdirSync, readdirSync, readFileSync, rmSync, writeFileSync } from "node:fs";
|
|
3
|
+
import { request } from "node:http";
|
|
3
4
|
import { tmpdir } from "node:os";
|
|
4
5
|
import * as path from "node:path";
|
|
5
6
|
/**
|
|
@@ -21,10 +22,12 @@ export async function convertPdfToMarkdown(pdfPath) {
|
|
|
21
22
|
mkdirSync(outDir, { recursive: true });
|
|
22
23
|
try {
|
|
23
24
|
const { convert } = await import("@opendataloader/pdf");
|
|
25
|
+
const hybridAvailable = await isHybridBackendAvailable();
|
|
24
26
|
await convert([pdfPath], {
|
|
25
27
|
outputDir: outDir,
|
|
26
28
|
format: "markdown",
|
|
27
29
|
quiet: true,
|
|
30
|
+
...(hybridAvailable ? { hybrid: "docling-fast", hybridFallback: true } : {}),
|
|
28
31
|
});
|
|
29
32
|
const mdFile = readdirSync(outDir).find((f) => f.endsWith(".md"));
|
|
30
33
|
if (!mdFile)
|
|
@@ -109,13 +112,36 @@ async function checkJava() {
|
|
|
109
112
|
* Detailed availability check for the `dep check` command.
|
|
110
113
|
*/
|
|
111
114
|
export async function checkOpendataLoaderStatus() {
|
|
112
|
-
const [packageInstalled, javaResult] = await Promise.all([
|
|
115
|
+
const [packageInstalled, javaResult, hybridBackendAvailable] = await Promise.all([
|
|
116
|
+
checkPackage(),
|
|
117
|
+
getJavaVersion(),
|
|
118
|
+
isHybridBackendAvailable(),
|
|
119
|
+
]);
|
|
113
120
|
return {
|
|
114
121
|
packageInstalled,
|
|
115
122
|
javaAvailable: javaResult !== null,
|
|
116
123
|
javaVersion: javaResult,
|
|
124
|
+
hybridBackendAvailable,
|
|
117
125
|
};
|
|
118
126
|
}
|
|
127
|
+
const HYBRID_BACKEND_URL = "http://localhost:5002";
|
|
128
|
+
const HYBRID_PROBE_TIMEOUT_MS = 1500;
|
|
129
|
+
/** Check if the opendataloader hybrid backend is reachable at localhost:5002. */
|
|
130
|
+
function isHybridBackendAvailable() {
|
|
131
|
+
return new Promise((resolve) => {
|
|
132
|
+
const req = request(HYBRID_BACKEND_URL, { method: "GET", timeout: HYBRID_PROBE_TIMEOUT_MS }, (res) => {
|
|
133
|
+
// Any response means the server is running
|
|
134
|
+
res.resume();
|
|
135
|
+
resolve(true);
|
|
136
|
+
});
|
|
137
|
+
req.on("error", () => resolve(false));
|
|
138
|
+
req.on("timeout", () => {
|
|
139
|
+
req.destroy();
|
|
140
|
+
resolve(false);
|
|
141
|
+
});
|
|
142
|
+
req.end();
|
|
143
|
+
});
|
|
144
|
+
}
|
|
119
145
|
// execFile is safe — arguments are passed as an array, no shell interpolation.
|
|
120
146
|
function getJavaVersion() {
|
|
121
147
|
return new Promise((resolve) => {
|