@opendataloader/pdf 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +362 -0
- package/NOTICE.md +15 -0
- package/README.md +402 -0
- package/THIRD_PARTY/THIRD_PARTY_LICENSES.md +57 -0
- package/THIRD_PARTY/THIRD_PARTY_NOTICES.md +258 -0
- package/THIRD_PARTY/licenses/Apache-2.0.txt +202 -0
- package/THIRD_PARTY/licenses/BSD-3-Clause.txt +30 -0
- package/THIRD_PARTY/licenses/CDDL-1.1.txt +352 -0
- package/THIRD_PARTY/licenses/EDL-1.0.txt +31 -0
- package/THIRD_PARTY/licenses/EPL-2.0.txt +267 -0
- package/THIRD_PARTY/licenses/LICENSE-JJ2000.txt +28 -0
- package/THIRD_PARTY/licenses/MIT.txt +21 -0
- package/THIRD_PARTY/licenses/MPL-2.0.txt +408 -0
- package/THIRD_PARTY/licenses/Plexus Classworlds License.txt +37 -0
- package/dist/index.cjs +138 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +16 -0
- package/dist/index.d.ts +16 -0
- package/dist/index.js +102 -0
- package/dist/index.js.map +1 -0
- package/lib/opendataloader-pdf-cli.jar +0 -0
- package/package.json +63 -0
package/dist/index.js
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
// src/index.ts
|
|
2
|
+
import { spawn } from "child_process";
|
|
3
|
+
import * as path from "path";
|
|
4
|
+
import * as fs from "fs";
|
|
5
|
+
import { fileURLToPath } from "url";
|
|
6
|
+
var __filename = fileURLToPath(import.meta.url);
|
|
7
|
+
var __dirname = path.dirname(__filename);
|
|
8
|
+
var JAR_NAME = "opendataloader-pdf-cli.jar";
|
|
9
|
+
function run(inputPath, options = {}) {
|
|
10
|
+
return new Promise((resolve, reject) => {
|
|
11
|
+
if (!fs.existsSync(inputPath)) {
|
|
12
|
+
return reject(new Error(`Input file or folder not found: ${inputPath}`));
|
|
13
|
+
}
|
|
14
|
+
const args = [];
|
|
15
|
+
if (options.outputFolder) {
|
|
16
|
+
args.push("--output-dir", options.outputFolder);
|
|
17
|
+
}
|
|
18
|
+
if (options.password) {
|
|
19
|
+
args.push("--password", options.password);
|
|
20
|
+
}
|
|
21
|
+
if (options.replaceInvalidChars) {
|
|
22
|
+
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
23
|
+
}
|
|
24
|
+
if (options.generateMarkdown) {
|
|
25
|
+
args.push("--markdown");
|
|
26
|
+
}
|
|
27
|
+
if (options.generateHtml) {
|
|
28
|
+
args.push("--html");
|
|
29
|
+
}
|
|
30
|
+
if (options.generateAnnotatedPdf) {
|
|
31
|
+
args.push("--pdf");
|
|
32
|
+
}
|
|
33
|
+
if (options.keepLineBreaks) {
|
|
34
|
+
args.push("--keep-line-breaks");
|
|
35
|
+
}
|
|
36
|
+
if (options.findHiddenText) {
|
|
37
|
+
args.push("--findhiddentext");
|
|
38
|
+
}
|
|
39
|
+
if (options.htmlInMarkdown) {
|
|
40
|
+
args.push("--markdown-with-html");
|
|
41
|
+
}
|
|
42
|
+
if (options.addImageToMarkdown) {
|
|
43
|
+
args.push("--markdown-with-images");
|
|
44
|
+
}
|
|
45
|
+
args.push(inputPath);
|
|
46
|
+
const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
|
|
47
|
+
if (!fs.existsSync(jarPath)) {
|
|
48
|
+
return reject(
|
|
49
|
+
new Error(`JAR file not found at ${jarPath}. Please run the build script first.`)
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
const command = "java";
|
|
53
|
+
const commandArgs = ["-jar", jarPath, ...args];
|
|
54
|
+
if (options.debug) {
|
|
55
|
+
console.error(`Running command: ${command} ${commandArgs.join(" ")}`);
|
|
56
|
+
}
|
|
57
|
+
const javaProcess = spawn(command, commandArgs);
|
|
58
|
+
let stdout = "";
|
|
59
|
+
let stderr = "";
|
|
60
|
+
javaProcess.stdout.on("data", (data) => {
|
|
61
|
+
const chunk = data.toString();
|
|
62
|
+
if (options.debug) {
|
|
63
|
+
process.stdout.write(chunk);
|
|
64
|
+
}
|
|
65
|
+
stdout += chunk;
|
|
66
|
+
});
|
|
67
|
+
javaProcess.stderr.on("data", (data) => {
|
|
68
|
+
const chunk = data.toString();
|
|
69
|
+
if (options.debug) {
|
|
70
|
+
process.stderr.write(chunk);
|
|
71
|
+
}
|
|
72
|
+
stderr += chunk;
|
|
73
|
+
});
|
|
74
|
+
javaProcess.on("close", (code) => {
|
|
75
|
+
if (code === 0) {
|
|
76
|
+
resolve(stdout);
|
|
77
|
+
} else {
|
|
78
|
+
const error = new Error(
|
|
79
|
+
`The opendataloader-pdf CLI exited with code ${code}.
|
|
80
|
+
|
|
81
|
+
${stderr}`
|
|
82
|
+
);
|
|
83
|
+
reject(error);
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
javaProcess.on("error", (err) => {
|
|
87
|
+
if (err.message.includes("ENOENT")) {
|
|
88
|
+
reject(
|
|
89
|
+
new Error(
|
|
90
|
+
"'java' command not found. Please ensure Java is installed and in your system's PATH."
|
|
91
|
+
)
|
|
92
|
+
);
|
|
93
|
+
} else {
|
|
94
|
+
reject(err);
|
|
95
|
+
}
|
|
96
|
+
});
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
export {
|
|
100
|
+
run
|
|
101
|
+
};
|
|
102
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n findHiddenText?: boolean;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.findHiddenText) {\n args.push('--findhiddentext');\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n\n args.push(inputPath);\n\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n if (options.debug) {\n console.error(`Running command: ${command} ${commandArgs.join(' ')}`);\n }\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${stderr}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n"],"mappings":";AAAA,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAE9B,IAAM,aAAa,cAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAgBV,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,QAAI,CAAI,cAAW,SAAS,GAAG;AAC7B,aAAO,OAAO,IAAI,MAAM,mCAAmC,SAAS,EAAE,CAAC;AAAA,IACzE;AAEA,UAAM,OAAiB,CAAC;AACxB,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,gBAAgB,QAAQ,YAAY;AAAA,IAChD;AACA,QAAI,QAAQ,UAAU;AACpB,WAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,IAC1C;AACA,QAAI,QAAQ,qBAAqB;AAC/B,WAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,IAClE;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,YAAY;AAAA,IACxB;AACA,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,QAAQ;AAAA,IACpB;AACA,QAAI,QAAQ,sBAAsB;AAChC,WAAK,KAAK,OAAO;AAAA,IACnB;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,oBAAoB;AAAA,IAChC;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,kBAAkB;AAAA,IAC9B;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,sBAAsB;AAAA,IAClC;AACA,QAAI,QAAQ,oBAAoB;AAC9B,WAAK,KAAK,wBAAwB;AAAA,IACpC;AAEA,SAAK,KAAK,SAAS;AAEnB,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,QAAI,QAAQ,OAAO;AACjB,cAAQ,MAAM,oBAAoB,OAAO,IAAI,YAAY,KAAK,GAAG,CAAC,EAAE;AAAA,IACtE;AAEA,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,MAAM;AAAA,QACnE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAQ;AAC/B,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;","names":[]}
|
|
Binary file
|
package/package.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@opendataloader/pdf",
|
|
3
|
+
"version": "0.0.0",
|
|
4
|
+
"description": "A Node.js wrapper for the opendataloader-pdf Java CLI.",
|
|
5
|
+
"main": "./dist/index.cjs",
|
|
6
|
+
"module": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"type": "module",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"require": "./dist/index.cjs"
|
|
13
|
+
}
|
|
14
|
+
},
|
|
15
|
+
"repository": {
|
|
16
|
+
"type": "git",
|
|
17
|
+
"url": "git+https://github.com/opendataloader-project/opendataloader-pdf.git"
|
|
18
|
+
},
|
|
19
|
+
"keywords": [
|
|
20
|
+
"pdf",
|
|
21
|
+
"markdown",
|
|
22
|
+
"html",
|
|
23
|
+
"convert",
|
|
24
|
+
"pdf-convert",
|
|
25
|
+
"pdf-parser",
|
|
26
|
+
"pdf-parsing",
|
|
27
|
+
"pdf-to-json",
|
|
28
|
+
"pdf-to-markdown",
|
|
29
|
+
"pdf-to-html"
|
|
30
|
+
],
|
|
31
|
+
"author": "opendataloader-project",
|
|
32
|
+
"license": "MPL-2.0",
|
|
33
|
+
"bugs": {
|
|
34
|
+
"url": "https://github.com/opendataloader-project/opendataloader-pdf/issues"
|
|
35
|
+
},
|
|
36
|
+
"homepage": "https://github.com/opendataloader-project/opendataloader-pdf#readme",
|
|
37
|
+
"publishConfig": {
|
|
38
|
+
"access": "public"
|
|
39
|
+
},
|
|
40
|
+
"devDependencies": {
|
|
41
|
+
"@types/glob": "^8.1.0",
|
|
42
|
+
"@types/node": "^24.3.3",
|
|
43
|
+
"glob": "^11.0.3",
|
|
44
|
+
"prettier": "^3.6.2",
|
|
45
|
+
"tsup": "^8.5.0",
|
|
46
|
+
"typescript": "^5.9.2",
|
|
47
|
+
"vitest": "^3.2.4"
|
|
48
|
+
},
|
|
49
|
+
"files": [
|
|
50
|
+
"dist",
|
|
51
|
+
"lib",
|
|
52
|
+
"LICENSE",
|
|
53
|
+
"NOTICE.md",
|
|
54
|
+
"README.md",
|
|
55
|
+
"THIRD_PARTY"
|
|
56
|
+
],
|
|
57
|
+
"scripts": {
|
|
58
|
+
"setup": "node ./scripts/setup.cjs",
|
|
59
|
+
"build": "pnpm run setup && tsup",
|
|
60
|
+
"test": "vitest",
|
|
61
|
+
"format": "prettier --write \"**/*.{ts,js,json,md}\""
|
|
62
|
+
}
|
|
63
|
+
}
|