@opendataloader/pdf 1.1.1 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -45
- package/dist/cli.cjs +131 -163
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +131 -163
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +89 -53
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +11 -1
- package/dist/index.d.ts +11 -1
- package/dist/index.js +88 -53
- package/dist/index.js.map +1 -1
- package/lib/opendataloader-pdf-cli.jar +0 -0
- package/package.json +4 -1
package/dist/cli.js
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
|
+
// src/cli.ts
|
|
4
|
+
import { Command, CommanderError } from "commander";
|
|
5
|
+
|
|
3
6
|
// src/index.ts
|
|
4
7
|
import { spawn } from "child_process";
|
|
5
8
|
import * as path from "path";
|
|
@@ -8,54 +11,9 @@ import { fileURLToPath } from "url";
|
|
|
8
11
|
var __filename = fileURLToPath(import.meta.url);
|
|
9
12
|
var __dirname = path.dirname(__filename);
|
|
10
13
|
var JAR_NAME = "opendataloader-pdf-cli.jar";
|
|
11
|
-
function
|
|
12
|
-
const
|
|
13
|
-
const passwordIndex = commandArgsForLogging.indexOf("--password");
|
|
14
|
-
if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {
|
|
15
|
-
commandArgsForLogging[passwordIndex + 1] = "[REDACTED]";
|
|
16
|
-
}
|
|
17
|
-
return `${command} ${commandArgsForLogging.join(" ")}`;
|
|
18
|
-
}
|
|
19
|
-
function run(inputPath, options = {}) {
|
|
14
|
+
function executeJar(args, executionOptions = {}) {
|
|
15
|
+
const { debug = false, streamOutput = false } = executionOptions;
|
|
20
16
|
return new Promise((resolve, reject) => {
|
|
21
|
-
if (!fs.existsSync(inputPath)) {
|
|
22
|
-
return reject(new Error(`Input file or folder not found: ${inputPath}`));
|
|
23
|
-
}
|
|
24
|
-
const args = [];
|
|
25
|
-
if (options.outputFolder) {
|
|
26
|
-
args.push("--output-dir", options.outputFolder);
|
|
27
|
-
}
|
|
28
|
-
if (options.password) {
|
|
29
|
-
args.push("--password", options.password);
|
|
30
|
-
}
|
|
31
|
-
if (options.replaceInvalidChars) {
|
|
32
|
-
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
33
|
-
}
|
|
34
|
-
if (options.generateMarkdown) {
|
|
35
|
-
args.push("--markdown");
|
|
36
|
-
}
|
|
37
|
-
if (options.generateHtml) {
|
|
38
|
-
args.push("--html");
|
|
39
|
-
}
|
|
40
|
-
if (options.generateAnnotatedPdf) {
|
|
41
|
-
args.push("--pdf");
|
|
42
|
-
}
|
|
43
|
-
if (options.keepLineBreaks) {
|
|
44
|
-
args.push("--keep-line-breaks");
|
|
45
|
-
}
|
|
46
|
-
if (options.contentSafetyOff) {
|
|
47
|
-
args.push("--content-safety-off", options.contentSafetyOff);
|
|
48
|
-
}
|
|
49
|
-
if (options.htmlInMarkdown) {
|
|
50
|
-
args.push("--markdown-with-html");
|
|
51
|
-
}
|
|
52
|
-
if (options.addImageToMarkdown) {
|
|
53
|
-
args.push("--markdown-with-images");
|
|
54
|
-
}
|
|
55
|
-
if (options.noJson) {
|
|
56
|
-
args.push("--no-json");
|
|
57
|
-
}
|
|
58
|
-
args.push(inputPath);
|
|
59
17
|
const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
|
|
60
18
|
if (!fs.existsSync(jarPath)) {
|
|
61
19
|
return reject(
|
|
@@ -64,22 +22,19 @@ function run(inputPath, options = {}) {
|
|
|
64
22
|
}
|
|
65
23
|
const command = "java";
|
|
66
24
|
const commandArgs = ["-jar", jarPath, ...args];
|
|
67
|
-
if (options.debug) {
|
|
68
|
-
console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);
|
|
69
|
-
}
|
|
70
25
|
const javaProcess = spawn(command, commandArgs);
|
|
71
26
|
let stdout = "";
|
|
72
27
|
let stderr = "";
|
|
73
28
|
javaProcess.stdout.on("data", (data) => {
|
|
74
29
|
const chunk = data.toString();
|
|
75
|
-
if (
|
|
30
|
+
if (streamOutput) {
|
|
76
31
|
process.stdout.write(chunk);
|
|
77
32
|
}
|
|
78
33
|
stdout += chunk;
|
|
79
34
|
});
|
|
80
35
|
javaProcess.stderr.on("data", (data) => {
|
|
81
36
|
const chunk = data.toString();
|
|
82
|
-
if (
|
|
37
|
+
if (streamOutput) {
|
|
83
38
|
process.stderr.write(chunk);
|
|
84
39
|
}
|
|
85
40
|
stderr += chunk;
|
|
@@ -88,10 +43,11 @@ function run(inputPath, options = {}) {
|
|
|
88
43
|
if (code === 0) {
|
|
89
44
|
resolve(stdout);
|
|
90
45
|
} else {
|
|
46
|
+
const errorOutput = stderr || stdout;
|
|
91
47
|
const error = new Error(
|
|
92
48
|
`The opendataloader-pdf CLI exited with code ${code}.
|
|
93
49
|
|
|
94
|
-
${
|
|
50
|
+
${errorOutput}`
|
|
95
51
|
);
|
|
96
52
|
reject(error);
|
|
97
53
|
}
|
|
@@ -109,133 +65,145 @@ ${stderr}`
|
|
|
109
65
|
});
|
|
110
66
|
});
|
|
111
67
|
}
|
|
68
|
+
function convert(inputPaths, options = {}) {
|
|
69
|
+
if (inputPaths.length === 0) {
|
|
70
|
+
return Promise.reject(new Error("At least one input path must be provided."));
|
|
71
|
+
}
|
|
72
|
+
for (const input of inputPaths) {
|
|
73
|
+
if (!fs.existsSync(input)) {
|
|
74
|
+
return Promise.reject(new Error(`Input file or folder not found: ${input}`));
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
const args = [...inputPaths];
|
|
78
|
+
if (options.outputDir) {
|
|
79
|
+
args.push("--output-dir", options.outputDir);
|
|
80
|
+
}
|
|
81
|
+
if (options.password) {
|
|
82
|
+
args.push("--password", options.password);
|
|
83
|
+
}
|
|
84
|
+
if (options.format && options.format.length > 0) {
|
|
85
|
+
args.push("--format", ...options.format);
|
|
86
|
+
}
|
|
87
|
+
if (options.quiet) {
|
|
88
|
+
args.push("--quiet");
|
|
89
|
+
}
|
|
90
|
+
if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
|
|
91
|
+
args.push("--content-safety-off", ...options.contentSafetyOff);
|
|
92
|
+
}
|
|
93
|
+
if (options.keepLineBreaks) {
|
|
94
|
+
args.push("--keep-line-breaks");
|
|
95
|
+
}
|
|
96
|
+
if (options.replaceInvalidChars) {
|
|
97
|
+
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
98
|
+
}
|
|
99
|
+
return executeJar(args, {
|
|
100
|
+
streamOutput: !options.quiet
|
|
101
|
+
});
|
|
102
|
+
}
|
|
112
103
|
|
|
113
104
|
// src/cli.ts
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
};
|
|
143
|
-
for (let i = 0; i < argv.length; i += 1) {
|
|
144
|
-
const arg = argv[i];
|
|
145
|
-
switch (arg) {
|
|
146
|
-
case "--help":
|
|
147
|
-
case "-h":
|
|
148
|
-
showHelp = true;
|
|
149
|
-
i = argv.length;
|
|
150
|
-
break;
|
|
151
|
-
case "--output-dir":
|
|
152
|
-
case "-o": {
|
|
153
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
154
|
-
options.outputFolder = value;
|
|
155
|
-
i = nextIndex;
|
|
156
|
-
break;
|
|
157
|
-
}
|
|
158
|
-
case "--password":
|
|
159
|
-
case "-p": {
|
|
160
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
161
|
-
options.password = value;
|
|
162
|
-
i = nextIndex;
|
|
163
|
-
break;
|
|
164
|
-
}
|
|
165
|
-
case "--replace-invalid-chars": {
|
|
166
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
167
|
-
options.replaceInvalidChars = value;
|
|
168
|
-
i = nextIndex;
|
|
169
|
-
break;
|
|
170
|
-
}
|
|
171
|
-
case "--content-safety-off": {
|
|
172
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
173
|
-
options.contentSafetyOff = value;
|
|
174
|
-
i = nextIndex;
|
|
175
|
-
break;
|
|
176
|
-
}
|
|
177
|
-
case "--markdown":
|
|
178
|
-
options.generateMarkdown = true;
|
|
179
|
-
break;
|
|
180
|
-
case "--html":
|
|
181
|
-
options.generateHtml = true;
|
|
182
|
-
break;
|
|
183
|
-
case "--pdf":
|
|
184
|
-
options.generateAnnotatedPdf = true;
|
|
185
|
-
break;
|
|
186
|
-
case "--keep-line-breaks":
|
|
187
|
-
options.keepLineBreaks = true;
|
|
188
|
-
break;
|
|
189
|
-
case "--markdown-with-html":
|
|
190
|
-
options.htmlInMarkdown = true;
|
|
191
|
-
break;
|
|
192
|
-
case "--markdown-with-images":
|
|
193
|
-
options.addImageToMarkdown = true;
|
|
194
|
-
break;
|
|
195
|
-
case "--no-json":
|
|
196
|
-
options.noJson = true;
|
|
197
|
-
break;
|
|
198
|
-
case "--debug":
|
|
199
|
-
options.debug = true;
|
|
200
|
-
break;
|
|
201
|
-
default:
|
|
202
|
-
if (arg.startsWith("-")) {
|
|
203
|
-
throw new Error(`Unknown option: ${arg}`);
|
|
204
|
-
}
|
|
205
|
-
if (inputPath) {
|
|
206
|
-
throw new Error("Multiple input paths provided. Only one input path is allowed.");
|
|
207
|
-
}
|
|
208
|
-
inputPath = arg;
|
|
105
|
+
var VALID_FORMATS = /* @__PURE__ */ new Set([
|
|
106
|
+
"json",
|
|
107
|
+
"text",
|
|
108
|
+
"html",
|
|
109
|
+
"pdf",
|
|
110
|
+
"markdown",
|
|
111
|
+
"markdown-with-html",
|
|
112
|
+
"markdown-with-images"
|
|
113
|
+
]);
|
|
114
|
+
var VALID_CONTENT_SAFETY_MODES = /* @__PURE__ */ new Set([
|
|
115
|
+
"all",
|
|
116
|
+
"hidden-text",
|
|
117
|
+
"off-page",
|
|
118
|
+
"tiny",
|
|
119
|
+
"hidden-ocg"
|
|
120
|
+
]);
|
|
121
|
+
function createProgram() {
|
|
122
|
+
const program = new Command();
|
|
123
|
+
program.name("opendataloader-pdf").usage("[options] <input...>").description("Convert PDFs using the OpenDataLoader CLI.").showHelpAfterError("Use '--help' to see available options.").showSuggestionAfterError(false).argument("<input...>", "Input files or directories to convert").option("-o, --output-dir <path>", "Directory where outputs are written").option("-p, --password <password>", "Password for encrypted PDFs").option(
|
|
124
|
+
"-f, --format <value...>",
|
|
125
|
+
"Output formats to generate (json, text, html, pdf, markdown, markdown-with-html, markdown-with-images)"
|
|
126
|
+
).option("-q, --quiet", "Suppress CLI logging output").option("--content-safety-off <mode...>", "Disable one or more content safety filters").option("--keep-line-breaks", "Preserve line breaks in text output").option("--replace-invalid-chars <c>", "Replacement character for invalid characters");
|
|
127
|
+
program.configureOutput({
|
|
128
|
+
writeErr: (str) => {
|
|
129
|
+
console.error(str.trimEnd());
|
|
130
|
+
},
|
|
131
|
+
outputError: (str, write) => {
|
|
132
|
+
write(str);
|
|
209
133
|
}
|
|
134
|
+
});
|
|
135
|
+
return program;
|
|
136
|
+
}
|
|
137
|
+
function buildConvertOptions(options) {
|
|
138
|
+
const convertOptions = {};
|
|
139
|
+
if (options.outputDir) {
|
|
140
|
+
convertOptions.outputDir = options.outputDir;
|
|
141
|
+
}
|
|
142
|
+
if (options.password) {
|
|
143
|
+
convertOptions.password = options.password;
|
|
144
|
+
}
|
|
145
|
+
if (options.format && options.format.length > 0) {
|
|
146
|
+
convertOptions.format = options.format;
|
|
147
|
+
}
|
|
148
|
+
if (options.quiet) {
|
|
149
|
+
convertOptions.quiet = true;
|
|
210
150
|
}
|
|
211
|
-
|
|
151
|
+
if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
|
|
152
|
+
convertOptions.contentSafetyOff = options.contentSafetyOff;
|
|
153
|
+
}
|
|
154
|
+
if (options.keepLineBreaks) {
|
|
155
|
+
convertOptions.keepLineBreaks = true;
|
|
156
|
+
}
|
|
157
|
+
if (options.replaceInvalidChars) {
|
|
158
|
+
convertOptions.replaceInvalidChars = options.replaceInvalidChars;
|
|
159
|
+
}
|
|
160
|
+
return convertOptions;
|
|
212
161
|
}
|
|
213
162
|
async function main() {
|
|
214
|
-
|
|
163
|
+
const program = createProgram();
|
|
164
|
+
program.exitOverride();
|
|
215
165
|
try {
|
|
216
|
-
|
|
166
|
+
program.parse(process.argv);
|
|
217
167
|
} catch (err) {
|
|
168
|
+
if (err instanceof CommanderError) {
|
|
169
|
+
if (err.code === "commander.helpDisplayed") {
|
|
170
|
+
return 0;
|
|
171
|
+
}
|
|
172
|
+
return err.exitCode ?? 1;
|
|
173
|
+
}
|
|
218
174
|
const message = err instanceof Error ? err.message : String(err);
|
|
219
175
|
console.error(message);
|
|
220
176
|
console.error("Use '--help' to see available options.");
|
|
221
177
|
return 1;
|
|
222
178
|
}
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
179
|
+
const cliOptions = program.opts();
|
|
180
|
+
const inputPaths = program.args;
|
|
181
|
+
if (cliOptions.format) {
|
|
182
|
+
for (const value of cliOptions.format) {
|
|
183
|
+
if (!VALID_FORMATS.has(value)) {
|
|
184
|
+
console.error(`Invalid format '${value}'. See '--help' for allowed values.`);
|
|
185
|
+
console.error("Use '--help' to see available options.");
|
|
186
|
+
return 1;
|
|
187
|
+
}
|
|
188
|
+
}
|
|
226
189
|
}
|
|
227
|
-
if (
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
190
|
+
if (cliOptions.contentSafetyOff) {
|
|
191
|
+
for (const value of cliOptions.contentSafetyOff) {
|
|
192
|
+
if (!VALID_CONTENT_SAFETY_MODES.has(value)) {
|
|
193
|
+
console.error(`Invalid content safety mode '${value}'. See '--help' for allowed values.`);
|
|
194
|
+
console.error("Use '--help' to see available options.");
|
|
195
|
+
return 1;
|
|
196
|
+
}
|
|
197
|
+
}
|
|
231
198
|
}
|
|
199
|
+
const convertOptions = buildConvertOptions(cliOptions);
|
|
232
200
|
try {
|
|
233
|
-
const output = await
|
|
234
|
-
if (output && !
|
|
201
|
+
const output = await convert(inputPaths, convertOptions);
|
|
202
|
+
if (output && !convertOptions.quiet) {
|
|
235
203
|
process.stdout.write(output);
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
204
|
+
if (!output.endsWith("\n")) {
|
|
205
|
+
process.stdout.write("\n");
|
|
206
|
+
}
|
|
239
207
|
}
|
|
240
208
|
return 0;
|
|
241
209
|
} catch (err) {
|
package/dist/cli.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/cli.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\nfunction getRedactedCommandString(command: string, commandArgs: string[]): string {\n const commandArgsForLogging = [...commandArgs];\n const passwordIndex = commandArgsForLogging.indexOf('--password');\n if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {\n commandArgsForLogging[passwordIndex + 1] = '[REDACTED]';\n }\n return `${command} ${commandArgsForLogging.join(' ')}`;\n}\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.contentSafetyOff) {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n if (options.noJson) {\n args.push('--no-json');\n }\n\n args.push(inputPath);\n\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n if (options.debug) {\n console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);\n }\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${stderr}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n","#!/usr/bin/env node\nimport { run, RunOptions } from './index.js';\n\ninterface ParsedArgs {\n inputPath?: string;\n options: RunOptions;\n showHelp: boolean;\n}\n\nfunction printHelp(): void {\n console.log(`Usage: opendataloader-pdf [options] <input>`);\n console.log('');\n console.log('Options:');\n console.log(' -o, --output-dir <path> Directory where outputs are written');\n console.log(' -p, --password <password> Password for encrypted PDFs');\n console.log(' --replace-invalid-chars <c> Replacement character for invalid characters');\n console.log(' --content-safety-off <mode> Disable content safety filtering (provide mode)');\n console.log(' --markdown Generate Markdown output');\n console.log(' --html Generate HTML output');\n console.log(' --pdf Generate annotated PDF output');\n console.log(' --keep-line-breaks Preserve line breaks in text output');\n console.log(' --markdown-with-html Allow raw HTML within Markdown output');\n console.log(' --markdown-with-images Embed images in Markdown output');\n console.log(' --no-json Disable JSON output generation');\n console.log(' --debug Stream CLI logs directly to stdout/stderr');\n console.log(' -h, --help Show this message and exit');\n}\n\nfunction parseArgs(argv: string[]): ParsedArgs {\n const options: RunOptions = {};\n let inputPath: string | undefined;\n let showHelp = false;\n\n const readValue = (currentIndex: number, option: string): { value: string; nextIndex: number } => {\n const nextValue = argv[currentIndex + 1];\n if (!nextValue || nextValue.startsWith('-')) {\n throw new Error(`Option ${option} requires a value.`);\n }\n return { value: nextValue, nextIndex: currentIndex + 1 };\n };\n\n for (let i = 0; i < argv.length; i += 1) {\n const arg = argv[i];\n\n switch (arg) {\n case '--help':\n case '-h':\n showHelp = true;\n i = argv.length; // exit loop\n break;\n case '--output-dir':\n case '-o': {\n const { value, nextIndex } = readValue(i, arg);\n options.outputFolder = value;\n i = nextIndex;\n break;\n }\n case '--password':\n case '-p': {\n const { value, nextIndex } = readValue(i, arg);\n options.password = value;\n i = nextIndex;\n break;\n }\n case '--replace-invalid-chars': {\n const { value, nextIndex } = readValue(i, arg);\n options.replaceInvalidChars = value;\n i = nextIndex;\n break;\n }\n case '--content-safety-off': {\n const { value, nextIndex } = readValue(i, arg);\n options.contentSafetyOff = value;\n i = nextIndex;\n break;\n }\n case '--markdown':\n options.generateMarkdown = true;\n break;\n case '--html':\n options.generateHtml = true;\n break;\n case '--pdf':\n options.generateAnnotatedPdf = true;\n break;\n case '--keep-line-breaks':\n options.keepLineBreaks = true;\n break;\n case '--markdown-with-html':\n options.htmlInMarkdown = true;\n break;\n case '--markdown-with-images':\n options.addImageToMarkdown = true;\n break;\n case '--no-json':\n options.noJson = true;\n break;\n case '--debug':\n options.debug = true;\n break;\n default:\n if (arg.startsWith('-')) {\n throw new Error(`Unknown option: ${arg}`);\n }\n\n if (inputPath) {\n throw new Error('Multiple input paths provided. Only one input path is allowed.');\n }\n inputPath = arg;\n }\n }\n\n return { inputPath, options, showHelp };\n}\n\nasync function main(): Promise<number> {\n let parsed: ParsedArgs;\n\n try {\n parsed = parseArgs(process.argv.slice(2));\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n if (parsed.showHelp) {\n printHelp();\n return 0;\n }\n\n if (!parsed.inputPath) {\n console.error('Missing required input path.');\n console.error(\"Use '--help' to see usage information.\");\n return 1;\n }\n\n try {\n const output = await run(parsed.inputPath, parsed.options);\n if (output && !parsed.options.debug) {\n process.stdout.write(output);\n }\n if (output && !output.endsWith('\\n') && !parsed.options.debug) {\n process.stdout.write('\\n');\n }\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n"],"mappings":";;;AAAA,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAE9B,IAAM,aAAa,cAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAEjB,SAAS,yBAAyB,SAAiB,aAA+B;AAChF,QAAM,wBAAwB,CAAC,GAAG,WAAW;AAC7C,QAAM,gBAAgB,sBAAsB,QAAQ,YAAY;AAChE,MAAI,gBAAgB,MAAM,gBAAgB,IAAI,sBAAsB,QAAQ;AAC1E,0BAAsB,gBAAgB,CAAC,IAAI;AAAA,EAC7C;AACA,SAAO,GAAG,OAAO,IAAI,sBAAsB,KAAK,GAAG,CAAC;AACtD;AAiBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,QAAI,CAAI,cAAW,SAAS,GAAG;AAC7B,aAAO,OAAO,IAAI,MAAM,mCAAmC,SAAS,EAAE,CAAC;AAAA,IACzE;AAEA,UAAM,OAAiB,CAAC;AACxB,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,gBAAgB,QAAQ,YAAY;AAAA,IAChD;AACA,QAAI,QAAQ,UAAU;AACpB,WAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,IAC1C;AACA,QAAI,QAAQ,qBAAqB;AAC/B,WAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,IAClE;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,YAAY;AAAA,IACxB;AACA,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,QAAQ;AAAA,IACpB;AACA,QAAI,QAAQ,sBAAsB;AAChC,WAAK,KAAK,OAAO;AAAA,IACnB;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,oBAAoB;AAAA,IAChC;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,sBAAsB;AAAA,IAClC;AACA,QAAI,QAAQ,oBAAoB;AAC9B,WAAK,KAAK,wBAAwB;AAAA,IACpC;AACA,QAAI,QAAQ,QAAQ;AAClB,WAAK,KAAK,WAAW;AAAA,IACvB;AAEA,SAAK,KAAK,SAAS;AAEnB,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,QAAI,QAAQ,OAAO;AACjB,cAAQ,MAAM,oBAAoB,yBAAyB,SAAS,WAAW,CAAC,EAAE;AAAA,IACpF;AAEA,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,MAAM;AAAA,QACnE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAQ;AAC/B,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;;;AC/HA,SAAS,YAAkB;AACzB,UAAQ,IAAI,6CAA6C;AACzD,UAAQ,IAAI,EAAE;AACd,UAAQ,IAAI,UAAU;AACtB,UAAQ,IAAI,oEAAoE;AAChF,UAAQ,IAAI,4DAA4D;AACxE,UAAQ,IAAI,6EAA6E;AACzF,UAAQ,IAAI,gFAAgF;AAC5F,UAAQ,IAAI,yDAAyD;AACrE,UAAQ,IAAI,qDAAqD;AACjE,UAAQ,IAAI,8DAA8D;AAC1E,UAAQ,IAAI,oEAAoE;AAChF,UAAQ,IAAI,sEAAsE;AAClF,UAAQ,IAAI,gEAAgE;AAC5E,UAAQ,IAAI,+DAA+D;AAC3E,UAAQ,IAAI,0EAA0E;AACtF,UAAQ,IAAI,2DAA2D;AACzE;AAEA,SAAS,UAAU,MAA4B;AAC7C,QAAM,UAAsB,CAAC;AAC7B,MAAI;AACJ,MAAI,WAAW;AAEf,QAAM,YAAY,CAAC,cAAsB,WAAyD;AAChG,UAAM,YAAY,KAAK,eAAe,CAAC;AACvC,QAAI,CAAC,aAAa,UAAU,WAAW,GAAG,GAAG;AAC3C,YAAM,IAAI,MAAM,UAAU,MAAM,oBAAoB;AAAA,IACtD;AACA,WAAO,EAAE,OAAO,WAAW,WAAW,eAAe,EAAE;AAAA,EACzD;AAEA,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,GAAG;AACvC,UAAM,MAAM,KAAK,CAAC;AAElB,YAAQ,KAAK;AAAA,MACX,KAAK;AAAA,MACL,KAAK;AACH,mBAAW;AACX,YAAI,KAAK;AACT;AAAA,MACF,KAAK;AAAA,MACL,KAAK,MAAM;AACT,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,eAAe;AACvB,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK;AAAA,MACL,KAAK,MAAM;AACT,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,WAAW;AACnB,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK,2BAA2B;AAC9B,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,sBAAsB;AAC9B,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK,wBAAwB;AAC3B,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,mBAAmB;AAC3B,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK;AACH,gBAAQ,mBAAmB;AAC3B;AAAA,MACF,KAAK;AACH,gBAAQ,eAAe;AACvB;AAAA,MACF,KAAK;AACH,gBAAQ,uBAAuB;AAC/B;AAAA,MACF,KAAK;AACH,gBAAQ,iBAAiB;AACzB;AAAA,MACF,KAAK;AACH,gBAAQ,iBAAiB;AACzB;AAAA,MACF,KAAK;AACH,gBAAQ,qBAAqB;AAC7B;AAAA,MACF,KAAK;AACH,gBAAQ,SAAS;AACjB;AAAA,MACF,KAAK;AACH,gBAAQ,QAAQ;AAChB;AAAA,MACF;AACE,YAAI,IAAI,WAAW,GAAG,GAAG;AACvB,gBAAM,IAAI,MAAM,mBAAmB,GAAG,EAAE;AAAA,QAC1C;AAEA,YAAI,WAAW;AACb,gBAAM,IAAI,MAAM,gEAAgE;AAAA,QAClF;AACA,oBAAY;AAAA,IAChB;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,SAAS,SAAS;AACxC;AAEA,eAAe,OAAwB;AACrC,MAAI;AAEJ,MAAI;AACF,aAAS,UAAU,QAAQ,KAAK,MAAM,CAAC,CAAC;AAAA,EAC1C,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,UAAU;AACnB,cAAU;AACV,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,OAAO,WAAW;AACrB,YAAQ,MAAM,8BAA8B;AAC5C,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,MAAI;AACF,UAAM,SAAS,MAAM,IAAI,OAAO,WAAW,OAAO,OAAO;AACzD,QAAI,UAAU,CAAC,OAAO,QAAQ,OAAO;AACnC,cAAQ,OAAO,MAAM,MAAM;AAAA,IAC7B;AACA,QAAI,UAAU,CAAC,OAAO,SAAS,IAAI,KAAK,CAAC,OAAO,QAAQ,OAAO;AAC7D,cAAQ,OAAO,MAAM,IAAI;AAAA,IAC3B;AACA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts","../src/index.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { Command, CommanderError } from 'commander';\nimport { convert, ConvertOptions } from './index.js';\n\ninterface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string[];\n quiet?: boolean;\n contentSafetyOff?: string[];\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n}\n\nconst VALID_FORMATS = new Set([\n 'json',\n 'text',\n 'html',\n 'pdf',\n 'markdown',\n 'markdown-with-html',\n 'markdown-with-images',\n]);\n\nconst VALID_CONTENT_SAFETY_MODES = new Set([\n 'all',\n 'hidden-text',\n 'off-page',\n 'tiny',\n 'hidden-ocg',\n]);\n\nfunction createProgram(): Command {\n const program = new Command();\n\n program\n .name('opendataloader-pdf')\n .usage('[options] <input...>')\n .description('Convert PDFs using the OpenDataLoader CLI.')\n .showHelpAfterError(\"Use '--help' to see available options.\")\n .showSuggestionAfterError(false)\n .argument('<input...>', 'Input files or directories to convert')\n .option('-o, --output-dir <path>', 'Directory where outputs are written')\n .option('-p, --password <password>', 'Password for encrypted PDFs')\n .option(\n '-f, --format <value...>',\n 'Output formats to generate (json, text, html, pdf, markdown, markdown-with-html, markdown-with-images)',\n )\n .option('-q, --quiet', 'Suppress CLI logging output')\n .option('--content-safety-off <mode...>', 'Disable one or more content safety filters')\n .option('--keep-line-breaks', 'Preserve line breaks in text output')\n .option('--replace-invalid-chars <c>', 'Replacement character for invalid characters');\n\n program.configureOutput({\n writeErr: (str) => {\n console.error(str.trimEnd());\n },\n outputError: (str, write) => {\n write(str);\n },\n });\n\n return program;\n}\n\nfunction buildConvertOptions(options: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (options.outputDir) {\n convertOptions.outputDir = options.outputDir;\n }\n if (options.password) {\n convertOptions.password = options.password;\n }\n if (options.format && options.format.length > 0) {\n convertOptions.format = options.format;\n }\n if (options.quiet) {\n convertOptions.quiet = true;\n }\n if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {\n convertOptions.contentSafetyOff = options.contentSafetyOff;\n }\n if (options.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (options.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = options.replaceInvalidChars;\n }\n\n return convertOptions;\n}\n\nasync function main(): Promise<number> {\n const program = createProgram();\n\n program.exitOverride();\n\n try {\n program.parse(process.argv);\n } catch (err) {\n if (err instanceof CommanderError) {\n if (err.code === 'commander.helpDisplayed') {\n return 0;\n }\n return err.exitCode ?? 1;\n }\n\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n const cliOptions = program.opts<CliOptions>();\n const inputPaths = program.args;\n\n if (cliOptions.format) {\n for (const value of cliOptions.format) {\n if (!VALID_FORMATS.has(value)) {\n console.error(`Invalid format '${value}'. See '--help' for allowed values.`);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n }\n }\n\n if (cliOptions.contentSafetyOff) {\n for (const value of cliOptions.contentSafetyOff) {\n if (!VALID_CONTENT_SAFETY_MODES.has(value)) {\n console.error(`Invalid content safety mode '${value}'. See '--help' for allowed values.`);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n }\n }\n\n const convertOptions = buildConvertOptions(cliOptions);\n\n try {\n const output = await convert(inputPaths, convertOptions);\n if (output && !convertOptions.quiet) {\n process.stdout.write(output);\n if (!output.endsWith('\\n')) {\n process.stdout.write('\\n');\n }\n }\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n","import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n debug?: boolean;\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { debug = false, streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.contentSafetyOff) {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n if (options.noJson) {\n args.push('--no-json');\n }\n\n args.push(inputPath);\n executeJar(args, {\n debug: options.debug,\n streamOutput: Boolean(options.debug),\n })\n .then(resolve)\n .catch(reject);\n });\n}\n\nexport interface ConvertOptions {\n outputDir?: string;\n password?: string;\n format?: string[];\n quiet?: boolean;\n contentSafetyOff?: string[];\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n}\n\nexport function convert(inputPaths: string[], options: ConvertOptions = {}): Promise<string> {\n if (inputPaths.length === 0) {\n return Promise.reject(new Error('At least one input path must be provided.'));\n }\n\n for (const input of inputPaths) {\n if (!fs.existsSync(input)) {\n return Promise.reject(new Error(`Input file or folder not found: ${input}`));\n }\n }\n\n const args: string[] = [...inputPaths];\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format && options.format.length > 0) {\n args.push('--format', ...options.format);\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', ...options.contentSafetyOff);\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n\n return executeJar(args, {\n streamOutput: !options.quiet,\n });\n}\n"],"mappings":";;;AACA,SAAS,SAAS,sBAAsB;;;ACDxC,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAE9B,IAAM,aAAa,cAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAOjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,QAAQ,OAAO,eAAe,MAAM,IAAI;AAEhD,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AA8EO,SAAS,QAAQ,YAAsB,UAA0B,CAAC,GAAoB;AAC3F,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,QAAQ,OAAO,IAAI,MAAM,2CAA2C,CAAC;AAAA,EAC9E;AAEA,aAAW,SAAS,YAAY;AAC9B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,QAAQ,OAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE,CAAC;AAAA,IAC7E;AAAA,EACF;AAEA,QAAM,OAAiB,CAAC,GAAG,UAAU;AACrC,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,UAAU,QAAQ,OAAO,SAAS,GAAG;AAC/C,SAAK,KAAK,YAAY,GAAG,QAAQ,MAAM;AAAA,EACzC;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,oBAAoB,QAAQ,iBAAiB,SAAS,GAAG;AACnE,SAAK,KAAK,wBAAwB,GAAG,QAAQ,gBAAgB;AAAA,EAC/D;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AAEA,SAAO,WAAW,MAAM;AAAA,IACtB,cAAc,CAAC,QAAQ;AAAA,EACzB,CAAC;AACH;;;ADhLA,IAAM,gBAAgB,oBAAI,IAAI;AAAA,EAC5B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,IAAM,6BAA6B,oBAAI,IAAI;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,SAAS,gBAAyB;AAChC,QAAM,UAAU,IAAI,QAAQ;AAE5B,UACG,KAAK,oBAAoB,EACzB,MAAM,sBAAsB,EAC5B,YAAY,4CAA4C,EACxD,mBAAmB,wCAAwC,EAC3D,yBAAyB,KAAK,EAC9B,SAAS,cAAc,uCAAuC,EAC9D,OAAO,2BAA2B,qCAAqC,EACvE,OAAO,6BAA6B,6BAA6B,EACjE;AAAA,IACC;AAAA,IACA;AAAA,EACF,EACC,OAAO,eAAe,6BAA6B,EACnD,OAAO,kCAAkC,4CAA4C,EACrF,OAAO,sBAAsB,qCAAqC,EAClE,OAAO,+BAA+B,8CAA8C;AAEvF,UAAQ,gBAAgB;AAAA,IACtB,UAAU,CAAC,QAAQ;AACjB,cAAQ,MAAM,IAAI,QAAQ,CAAC;AAAA,IAC7B;AAAA,IACA,aAAa,CAAC,KAAK,UAAU;AAC3B,YAAM,GAAG;AAAA,IACX;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,SAAS,oBAAoB,SAAqC;AAChE,QAAM,iBAAiC,CAAC;AAExC,MAAI,QAAQ,WAAW;AACrB,mBAAe,YAAY,QAAQ;AAAA,EACrC;AACA,MAAI,QAAQ,UAAU;AACpB,mBAAe,WAAW,QAAQ;AAAA,EACpC;AACA,MAAI,QAAQ,UAAU,QAAQ,OAAO,SAAS,GAAG;AAC/C,mBAAe,SAAS,QAAQ;AAAA,EAClC;AACA,MAAI,QAAQ,OAAO;AACjB,mBAAe,QAAQ;AAAA,EACzB;AACA,MAAI,QAAQ,oBAAoB,QAAQ,iBAAiB,SAAS,GAAG;AACnE,mBAAe,mBAAmB,QAAQ;AAAA,EAC5C;AACA,MAAI,QAAQ,gBAAgB;AAC1B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,mBAAe,sBAAsB,QAAQ;AAAA,EAC/C;AAEA,SAAO;AACT;AAEA,eAAe,OAAwB;AACrC,QAAM,UAAU,cAAc;AAE9B,UAAQ,aAAa;AAErB,MAAI;AACF,YAAQ,MAAM,QAAQ,IAAI;AAAA,EAC5B,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB;AACjC,UAAI,IAAI,SAAS,2BAA2B;AAC1C,eAAO;AAAA,MACT;AACA,aAAO,IAAI,YAAY;AAAA,IACzB;AAEA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,QAAQ,KAAiB;AAC5C,QAAM,aAAa,QAAQ;AAE3B,MAAI,WAAW,QAAQ;AACrB,eAAW,SAAS,WAAW,QAAQ;AACrC,UAAI,CAAC,cAAc,IAAI,KAAK,GAAG;AAC7B,gBAAQ,MAAM,mBAAmB,KAAK,qCAAqC;AAC3E,gBAAQ,MAAM,wCAAwC;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,MAAI,WAAW,kBAAkB;AAC/B,eAAW,SAAS,WAAW,kBAAkB;AAC/C,UAAI,CAAC,2BAA2B,IAAI,KAAK,GAAG;AAC1C,gBAAQ,MAAM,gCAAgC,KAAK,qCAAqC;AACxF,gBAAQ,MAAM,wCAAwC;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,QAAM,iBAAiB,oBAAoB,UAAU;AAErD,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,YAAY,cAAc;AACvD,QAAI,UAAU,CAAC,eAAe,OAAO;AACnC,cAAQ,OAAO,MAAM,MAAM;AAC3B,UAAI,CAAC,OAAO,SAAS,IAAI,GAAG;AAC1B,gBAAQ,OAAO,MAAM,IAAI;AAAA,MAC3B;AAAA,IACF;AACA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":[]}
|
package/dist/index.cjs
CHANGED
|
@@ -30,6 +30,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
|
|
|
30
30
|
// src/index.ts
|
|
31
31
|
var index_exports = {};
|
|
32
32
|
__export(index_exports, {
|
|
33
|
+
convert: () => convert,
|
|
33
34
|
run: () => run
|
|
34
35
|
});
|
|
35
36
|
module.exports = __toCommonJS(index_exports);
|
|
@@ -41,54 +42,9 @@ var import_meta = {};
|
|
|
41
42
|
var __filename = (0, import_url.fileURLToPath)(import_meta.url);
|
|
42
43
|
var __dirname = path.dirname(__filename);
|
|
43
44
|
var JAR_NAME = "opendataloader-pdf-cli.jar";
|
|
44
|
-
function
|
|
45
|
-
const
|
|
46
|
-
const passwordIndex = commandArgsForLogging.indexOf("--password");
|
|
47
|
-
if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {
|
|
48
|
-
commandArgsForLogging[passwordIndex + 1] = "[REDACTED]";
|
|
49
|
-
}
|
|
50
|
-
return `${command} ${commandArgsForLogging.join(" ")}`;
|
|
51
|
-
}
|
|
52
|
-
function run(inputPath, options = {}) {
|
|
45
|
+
function executeJar(args, executionOptions = {}) {
|
|
46
|
+
const { debug = false, streamOutput = false } = executionOptions;
|
|
53
47
|
return new Promise((resolve, reject) => {
|
|
54
|
-
if (!fs.existsSync(inputPath)) {
|
|
55
|
-
return reject(new Error(`Input file or folder not found: ${inputPath}`));
|
|
56
|
-
}
|
|
57
|
-
const args = [];
|
|
58
|
-
if (options.outputFolder) {
|
|
59
|
-
args.push("--output-dir", options.outputFolder);
|
|
60
|
-
}
|
|
61
|
-
if (options.password) {
|
|
62
|
-
args.push("--password", options.password);
|
|
63
|
-
}
|
|
64
|
-
if (options.replaceInvalidChars) {
|
|
65
|
-
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
66
|
-
}
|
|
67
|
-
if (options.generateMarkdown) {
|
|
68
|
-
args.push("--markdown");
|
|
69
|
-
}
|
|
70
|
-
if (options.generateHtml) {
|
|
71
|
-
args.push("--html");
|
|
72
|
-
}
|
|
73
|
-
if (options.generateAnnotatedPdf) {
|
|
74
|
-
args.push("--pdf");
|
|
75
|
-
}
|
|
76
|
-
if (options.keepLineBreaks) {
|
|
77
|
-
args.push("--keep-line-breaks");
|
|
78
|
-
}
|
|
79
|
-
if (options.contentSafetyOff) {
|
|
80
|
-
args.push("--content-safety-off", options.contentSafetyOff);
|
|
81
|
-
}
|
|
82
|
-
if (options.htmlInMarkdown) {
|
|
83
|
-
args.push("--markdown-with-html");
|
|
84
|
-
}
|
|
85
|
-
if (options.addImageToMarkdown) {
|
|
86
|
-
args.push("--markdown-with-images");
|
|
87
|
-
}
|
|
88
|
-
if (options.noJson) {
|
|
89
|
-
args.push("--no-json");
|
|
90
|
-
}
|
|
91
|
-
args.push(inputPath);
|
|
92
48
|
const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
|
|
93
49
|
if (!fs.existsSync(jarPath)) {
|
|
94
50
|
return reject(
|
|
@@ -97,22 +53,19 @@ function run(inputPath, options = {}) {
|
|
|
97
53
|
}
|
|
98
54
|
const command = "java";
|
|
99
55
|
const commandArgs = ["-jar", jarPath, ...args];
|
|
100
|
-
if (options.debug) {
|
|
101
|
-
console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);
|
|
102
|
-
}
|
|
103
56
|
const javaProcess = (0, import_child_process.spawn)(command, commandArgs);
|
|
104
57
|
let stdout = "";
|
|
105
58
|
let stderr = "";
|
|
106
59
|
javaProcess.stdout.on("data", (data) => {
|
|
107
60
|
const chunk = data.toString();
|
|
108
|
-
if (
|
|
61
|
+
if (streamOutput) {
|
|
109
62
|
process.stdout.write(chunk);
|
|
110
63
|
}
|
|
111
64
|
stdout += chunk;
|
|
112
65
|
});
|
|
113
66
|
javaProcess.stderr.on("data", (data) => {
|
|
114
67
|
const chunk = data.toString();
|
|
115
|
-
if (
|
|
68
|
+
if (streamOutput) {
|
|
116
69
|
process.stderr.write(chunk);
|
|
117
70
|
}
|
|
118
71
|
stderr += chunk;
|
|
@@ -121,10 +74,11 @@ function run(inputPath, options = {}) {
|
|
|
121
74
|
if (code === 0) {
|
|
122
75
|
resolve(stdout);
|
|
123
76
|
} else {
|
|
77
|
+
const errorOutput = stderr || stdout;
|
|
124
78
|
const error = new Error(
|
|
125
79
|
`The opendataloader-pdf CLI exited with code ${code}.
|
|
126
80
|
|
|
127
|
-
${
|
|
81
|
+
${errorOutput}`
|
|
128
82
|
);
|
|
129
83
|
reject(error);
|
|
130
84
|
}
|
|
@@ -142,8 +96,90 @@ ${stderr}`
|
|
|
142
96
|
});
|
|
143
97
|
});
|
|
144
98
|
}
|
|
99
|
+
function run(inputPath, options = {}) {
|
|
100
|
+
return new Promise((resolve, reject) => {
|
|
101
|
+
if (!fs.existsSync(inputPath)) {
|
|
102
|
+
return reject(new Error(`Input file or folder not found: ${inputPath}`));
|
|
103
|
+
}
|
|
104
|
+
const args = [];
|
|
105
|
+
if (options.outputFolder) {
|
|
106
|
+
args.push("--output-dir", options.outputFolder);
|
|
107
|
+
}
|
|
108
|
+
if (options.password) {
|
|
109
|
+
args.push("--password", options.password);
|
|
110
|
+
}
|
|
111
|
+
if (options.replaceInvalidChars) {
|
|
112
|
+
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
113
|
+
}
|
|
114
|
+
if (options.generateMarkdown) {
|
|
115
|
+
args.push("--markdown");
|
|
116
|
+
}
|
|
117
|
+
if (options.generateHtml) {
|
|
118
|
+
args.push("--html");
|
|
119
|
+
}
|
|
120
|
+
if (options.generateAnnotatedPdf) {
|
|
121
|
+
args.push("--pdf");
|
|
122
|
+
}
|
|
123
|
+
if (options.keepLineBreaks) {
|
|
124
|
+
args.push("--keep-line-breaks");
|
|
125
|
+
}
|
|
126
|
+
if (options.contentSafetyOff) {
|
|
127
|
+
args.push("--content-safety-off", options.contentSafetyOff);
|
|
128
|
+
}
|
|
129
|
+
if (options.htmlInMarkdown) {
|
|
130
|
+
args.push("--markdown-with-html");
|
|
131
|
+
}
|
|
132
|
+
if (options.addImageToMarkdown) {
|
|
133
|
+
args.push("--markdown-with-images");
|
|
134
|
+
}
|
|
135
|
+
if (options.noJson) {
|
|
136
|
+
args.push("--no-json");
|
|
137
|
+
}
|
|
138
|
+
args.push(inputPath);
|
|
139
|
+
executeJar(args, {
|
|
140
|
+
debug: options.debug,
|
|
141
|
+
streamOutput: Boolean(options.debug)
|
|
142
|
+
}).then(resolve).catch(reject);
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
function convert(inputPaths, options = {}) {
|
|
146
|
+
if (inputPaths.length === 0) {
|
|
147
|
+
return Promise.reject(new Error("At least one input path must be provided."));
|
|
148
|
+
}
|
|
149
|
+
for (const input of inputPaths) {
|
|
150
|
+
if (!fs.existsSync(input)) {
|
|
151
|
+
return Promise.reject(new Error(`Input file or folder not found: ${input}`));
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
const args = [...inputPaths];
|
|
155
|
+
if (options.outputDir) {
|
|
156
|
+
args.push("--output-dir", options.outputDir);
|
|
157
|
+
}
|
|
158
|
+
if (options.password) {
|
|
159
|
+
args.push("--password", options.password);
|
|
160
|
+
}
|
|
161
|
+
if (options.format && options.format.length > 0) {
|
|
162
|
+
args.push("--format", ...options.format);
|
|
163
|
+
}
|
|
164
|
+
if (options.quiet) {
|
|
165
|
+
args.push("--quiet");
|
|
166
|
+
}
|
|
167
|
+
if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
|
|
168
|
+
args.push("--content-safety-off", ...options.contentSafetyOff);
|
|
169
|
+
}
|
|
170
|
+
if (options.keepLineBreaks) {
|
|
171
|
+
args.push("--keep-line-breaks");
|
|
172
|
+
}
|
|
173
|
+
if (options.replaceInvalidChars) {
|
|
174
|
+
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
175
|
+
}
|
|
176
|
+
return executeJar(args, {
|
|
177
|
+
streamOutput: !options.quiet
|
|
178
|
+
});
|
|
179
|
+
}
|
|
145
180
|
// Annotate the CommonJS export names for ESM import in node:
|
|
146
181
|
0 && (module.exports = {
|
|
182
|
+
convert,
|
|
147
183
|
run
|
|
148
184
|
});
|
|
149
185
|
//# sourceMappingURL=index.cjs.map
|