@opendataloader/pdf 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,102 @@
1
+ // src/index.ts
2
+ import { spawn } from "child_process";
3
+ import * as path from "path";
4
+ import * as fs from "fs";
5
+ import { fileURLToPath } from "url";
6
+ var __filename = fileURLToPath(import.meta.url);
7
+ var __dirname = path.dirname(__filename);
8
+ var JAR_NAME = "opendataloader-pdf-cli.jar";
9
+ function run(inputPath, options = {}) {
10
+ return new Promise((resolve, reject) => {
11
+ if (!fs.existsSync(inputPath)) {
12
+ return reject(new Error(`Input file or folder not found: ${inputPath}`));
13
+ }
14
+ const args = [];
15
+ if (options.outputFolder) {
16
+ args.push("--output-dir", options.outputFolder);
17
+ }
18
+ if (options.password) {
19
+ args.push("--password", options.password);
20
+ }
21
+ if (options.replaceInvalidChars) {
22
+ args.push("--replace-invalid-chars", options.replaceInvalidChars);
23
+ }
24
+ if (options.generateMarkdown) {
25
+ args.push("--markdown");
26
+ }
27
+ if (options.generateHtml) {
28
+ args.push("--html");
29
+ }
30
+ if (options.generateAnnotatedPdf) {
31
+ args.push("--pdf");
32
+ }
33
+ if (options.keepLineBreaks) {
34
+ args.push("--keep-line-breaks");
35
+ }
36
+ if (options.findHiddenText) {
37
+ args.push("--findhiddentext");
38
+ }
39
+ if (options.htmlInMarkdown) {
40
+ args.push("--markdown-with-html");
41
+ }
42
+ if (options.addImageToMarkdown) {
43
+ args.push("--markdown-with-images");
44
+ }
45
+ args.push(inputPath);
46
+ const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
47
+ if (!fs.existsSync(jarPath)) {
48
+ return reject(
49
+ new Error(`JAR file not found at ${jarPath}. Please run the build script first.`)
50
+ );
51
+ }
52
+ const command = "java";
53
+ const commandArgs = ["-jar", jarPath, ...args];
54
+ if (options.debug) {
55
+ console.error(`Running command: ${command} ${commandArgs.join(" ")}`);
56
+ }
57
+ const javaProcess = spawn(command, commandArgs);
58
+ let stdout = "";
59
+ let stderr = "";
60
+ javaProcess.stdout.on("data", (data) => {
61
+ const chunk = data.toString();
62
+ if (options.debug) {
63
+ process.stdout.write(chunk);
64
+ }
65
+ stdout += chunk;
66
+ });
67
+ javaProcess.stderr.on("data", (data) => {
68
+ const chunk = data.toString();
69
+ if (options.debug) {
70
+ process.stderr.write(chunk);
71
+ }
72
+ stderr += chunk;
73
+ });
74
+ javaProcess.on("close", (code) => {
75
+ if (code === 0) {
76
+ resolve(stdout);
77
+ } else {
78
+ const error = new Error(
79
+ `The opendataloader-pdf CLI exited with code ${code}.
80
+
81
+ ${stderr}`
82
+ );
83
+ reject(error);
84
+ }
85
+ });
86
+ javaProcess.on("error", (err) => {
87
+ if (err.message.includes("ENOENT")) {
88
+ reject(
89
+ new Error(
90
+ "'java' command not found. Please ensure Java is installed and in your system's PATH."
91
+ )
92
+ );
93
+ } else {
94
+ reject(err);
95
+ }
96
+ });
97
+ });
98
+ }
99
+ export {
100
+ run
101
+ };
102
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/index.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n findHiddenText?: boolean;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.findHiddenText) {\n args.push('--findhiddentext');\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n\n args.push(inputPath);\n\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n if (options.debug) {\n console.error(`Running command: ${command} ${commandArgs.join(' ')}`);\n }\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${stderr}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n"],"mappings":";AAAA,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAE9B,IAAM,aAAa,cAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAgBV,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,QAAI,CAAI,cAAW,SAAS,GAAG;AAC7B,aAAO,OAAO,IAAI,MAAM,mCAAmC,SAAS,EAAE,CAAC;AAAA,IACzE;AAEA,UAAM,OAAiB,CAAC;AACxB,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,gBAAgB,QAAQ,YAAY;AAAA,IAChD;AACA,QAAI,QAAQ,UAAU;AACpB,WAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,IAC1C;AACA,QAAI,QAAQ,qBAAqB;AAC/B,WAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,IAClE;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,YAAY;AAAA,IACxB;AACA,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,QAAQ;AAAA,IACpB;AACA,QAAI,QAAQ,sBAAsB;AAChC,WAAK,KAAK,OAAO;AAAA,IACnB;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,oBAAoB;AAAA,IAChC;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,kBAAkB;AAAA,IAC9B;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,sBAAsB;AAAA,IAClC;AACA,QAAI,QAAQ,oBAAoB;AAC9B,WAAK,KAAK,wBAAwB;AAAA,IACpC;AAEA,SAAK,KAAK,SAAS;AAEnB,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,QAAI,QAAQ,OAAO;AACjB,cAAQ,MAAM,oBAAoB,OAAO,IAAI,YAAY,KAAK,GAAG,CAAC,EAAE;AAAA,IACtE;AAEA,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,MAAM;AAAA,QACnE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAQ;AAC/B,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;","names":[]}
Binary file
package/package.json ADDED
@@ -0,0 +1,63 @@
1
+ {
2
+ "name": "@opendataloader/pdf",
3
+ "version": "0.0.0",
4
+ "description": "A Node.js wrapper for the opendataloader-pdf Java CLI.",
5
+ "main": "./dist/index.cjs",
6
+ "module": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "type": "module",
9
+ "exports": {
10
+ ".": {
11
+ "import": "./dist/index.js",
12
+ "require": "./dist/index.cjs"
13
+ }
14
+ },
15
+ "repository": {
16
+ "type": "git",
17
+ "url": "git+https://github.com/opendataloader-project/opendataloader-pdf.git"
18
+ },
19
+ "keywords": [
20
+ "pdf",
21
+ "markdown",
22
+ "html",
23
+ "convert",
24
+ "pdf-convert",
25
+ "pdf-parser",
26
+ "pdf-parsing",
27
+ "pdf-to-json",
28
+ "pdf-to-markdown",
29
+ "pdf-to-html"
30
+ ],
31
+ "author": "opendataloader-project",
32
+ "license": "MPL-2.0",
33
+ "bugs": {
34
+ "url": "https://github.com/opendataloader-project/opendataloader-pdf/issues"
35
+ },
36
+ "homepage": "https://github.com/opendataloader-project/opendataloader-pdf#readme",
37
+ "publishConfig": {
38
+ "access": "public"
39
+ },
40
+ "devDependencies": {
41
+ "@types/glob": "^8.1.0",
42
+ "@types/node": "^24.3.3",
43
+ "glob": "^11.0.3",
44
+ "prettier": "^3.6.2",
45
+ "tsup": "^8.5.0",
46
+ "typescript": "^5.9.2",
47
+ "vitest": "^3.2.4"
48
+ },
49
+ "files": [
50
+ "dist",
51
+ "lib",
52
+ "LICENSE",
53
+ "NOTICE.md",
54
+ "README.md",
55
+ "THIRD_PARTY"
56
+ ],
57
+ "scripts": {
58
+ "setup": "node ./scripts/setup.cjs",
59
+ "build": "pnpm run setup && tsup",
60
+ "test": "vitest",
61
+ "format": "prettier --write \"**/*.{ts,js,json,md}\""
62
+ }
63
+ }