@opendataloader/pdf 1.1.1 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1,5 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
 
3
+ // src/cli.ts
4
+ import { Command, CommanderError } from "commander";
5
+
3
6
  // src/index.ts
4
7
  import { spawn } from "child_process";
5
8
  import * as path from "path";
@@ -8,54 +11,9 @@ import { fileURLToPath } from "url";
8
11
  var __filename = fileURLToPath(import.meta.url);
9
12
  var __dirname = path.dirname(__filename);
10
13
  var JAR_NAME = "opendataloader-pdf-cli.jar";
11
- function getRedactedCommandString(command, commandArgs) {
12
- const commandArgsForLogging = [...commandArgs];
13
- const passwordIndex = commandArgsForLogging.indexOf("--password");
14
- if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {
15
- commandArgsForLogging[passwordIndex + 1] = "[REDACTED]";
16
- }
17
- return `${command} ${commandArgsForLogging.join(" ")}`;
18
- }
19
- function run(inputPath, options = {}) {
14
+ function executeJar(args, executionOptions = {}) {
15
+ const { debug = false, streamOutput = false } = executionOptions;
20
16
  return new Promise((resolve, reject) => {
21
- if (!fs.existsSync(inputPath)) {
22
- return reject(new Error(`Input file or folder not found: ${inputPath}`));
23
- }
24
- const args = [];
25
- if (options.outputFolder) {
26
- args.push("--output-dir", options.outputFolder);
27
- }
28
- if (options.password) {
29
- args.push("--password", options.password);
30
- }
31
- if (options.replaceInvalidChars) {
32
- args.push("--replace-invalid-chars", options.replaceInvalidChars);
33
- }
34
- if (options.generateMarkdown) {
35
- args.push("--markdown");
36
- }
37
- if (options.generateHtml) {
38
- args.push("--html");
39
- }
40
- if (options.generateAnnotatedPdf) {
41
- args.push("--pdf");
42
- }
43
- if (options.keepLineBreaks) {
44
- args.push("--keep-line-breaks");
45
- }
46
- if (options.contentSafetyOff) {
47
- args.push("--content-safety-off", options.contentSafetyOff);
48
- }
49
- if (options.htmlInMarkdown) {
50
- args.push("--markdown-with-html");
51
- }
52
- if (options.addImageToMarkdown) {
53
- args.push("--markdown-with-images");
54
- }
55
- if (options.noJson) {
56
- args.push("--no-json");
57
- }
58
- args.push(inputPath);
59
17
  const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
60
18
  if (!fs.existsSync(jarPath)) {
61
19
  return reject(
@@ -64,22 +22,19 @@ function run(inputPath, options = {}) {
64
22
  }
65
23
  const command = "java";
66
24
  const commandArgs = ["-jar", jarPath, ...args];
67
- if (options.debug) {
68
- console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);
69
- }
70
25
  const javaProcess = spawn(command, commandArgs);
71
26
  let stdout = "";
72
27
  let stderr = "";
73
28
  javaProcess.stdout.on("data", (data) => {
74
29
  const chunk = data.toString();
75
- if (options.debug) {
30
+ if (streamOutput) {
76
31
  process.stdout.write(chunk);
77
32
  }
78
33
  stdout += chunk;
79
34
  });
80
35
  javaProcess.stderr.on("data", (data) => {
81
36
  const chunk = data.toString();
82
- if (options.debug) {
37
+ if (streamOutput) {
83
38
  process.stderr.write(chunk);
84
39
  }
85
40
  stderr += chunk;
@@ -88,10 +43,11 @@ function run(inputPath, options = {}) {
88
43
  if (code === 0) {
89
44
  resolve(stdout);
90
45
  } else {
46
+ const errorOutput = stderr || stdout;
91
47
  const error = new Error(
92
48
  `The opendataloader-pdf CLI exited with code ${code}.
93
49
 
94
- ${stderr}`
50
+ ${errorOutput}`
95
51
  );
96
52
  reject(error);
97
53
  }
@@ -109,133 +65,145 @@ ${stderr}`
109
65
  });
110
66
  });
111
67
  }
68
+ function convert(inputPaths, options = {}) {
69
+ if (inputPaths.length === 0) {
70
+ return Promise.reject(new Error("At least one input path must be provided."));
71
+ }
72
+ for (const input of inputPaths) {
73
+ if (!fs.existsSync(input)) {
74
+ return Promise.reject(new Error(`Input file or folder not found: ${input}`));
75
+ }
76
+ }
77
+ const args = [...inputPaths];
78
+ if (options.outputDir) {
79
+ args.push("--output-dir", options.outputDir);
80
+ }
81
+ if (options.password) {
82
+ args.push("--password", options.password);
83
+ }
84
+ if (options.format && options.format.length > 0) {
85
+ args.push("--format", ...options.format);
86
+ }
87
+ if (options.quiet) {
88
+ args.push("--quiet");
89
+ }
90
+ if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
91
+ args.push("--content-safety-off", ...options.contentSafetyOff);
92
+ }
93
+ if (options.keepLineBreaks) {
94
+ args.push("--keep-line-breaks");
95
+ }
96
+ if (options.replaceInvalidChars) {
97
+ args.push("--replace-invalid-chars", options.replaceInvalidChars);
98
+ }
99
+ return executeJar(args, {
100
+ streamOutput: !options.quiet
101
+ });
102
+ }
112
103
 
113
104
  // src/cli.ts
114
- function printHelp() {
115
- console.log(`Usage: opendataloader-pdf [options] <input>`);
116
- console.log("");
117
- console.log("Options:");
118
- console.log(" -o, --output-dir <path> Directory where outputs are written");
119
- console.log(" -p, --password <password> Password for encrypted PDFs");
120
- console.log(" --replace-invalid-chars <c> Replacement character for invalid characters");
121
- console.log(" --content-safety-off <mode> Disable content safety filtering (provide mode)");
122
- console.log(" --markdown Generate Markdown output");
123
- console.log(" --html Generate HTML output");
124
- console.log(" --pdf Generate annotated PDF output");
125
- console.log(" --keep-line-breaks Preserve line breaks in text output");
126
- console.log(" --markdown-with-html Allow raw HTML within Markdown output");
127
- console.log(" --markdown-with-images Embed images in Markdown output");
128
- console.log(" --no-json Disable JSON output generation");
129
- console.log(" --debug Stream CLI logs directly to stdout/stderr");
130
- console.log(" -h, --help Show this message and exit");
131
- }
132
- function parseArgs(argv) {
133
- const options = {};
134
- let inputPath;
135
- let showHelp = false;
136
- const readValue = (currentIndex, option) => {
137
- const nextValue = argv[currentIndex + 1];
138
- if (!nextValue || nextValue.startsWith("-")) {
139
- throw new Error(`Option ${option} requires a value.`);
140
- }
141
- return { value: nextValue, nextIndex: currentIndex + 1 };
142
- };
143
- for (let i = 0; i < argv.length; i += 1) {
144
- const arg = argv[i];
145
- switch (arg) {
146
- case "--help":
147
- case "-h":
148
- showHelp = true;
149
- i = argv.length;
150
- break;
151
- case "--output-dir":
152
- case "-o": {
153
- const { value, nextIndex } = readValue(i, arg);
154
- options.outputFolder = value;
155
- i = nextIndex;
156
- break;
157
- }
158
- case "--password":
159
- case "-p": {
160
- const { value, nextIndex } = readValue(i, arg);
161
- options.password = value;
162
- i = nextIndex;
163
- break;
164
- }
165
- case "--replace-invalid-chars": {
166
- const { value, nextIndex } = readValue(i, arg);
167
- options.replaceInvalidChars = value;
168
- i = nextIndex;
169
- break;
170
- }
171
- case "--content-safety-off": {
172
- const { value, nextIndex } = readValue(i, arg);
173
- options.contentSafetyOff = value;
174
- i = nextIndex;
175
- break;
176
- }
177
- case "--markdown":
178
- options.generateMarkdown = true;
179
- break;
180
- case "--html":
181
- options.generateHtml = true;
182
- break;
183
- case "--pdf":
184
- options.generateAnnotatedPdf = true;
185
- break;
186
- case "--keep-line-breaks":
187
- options.keepLineBreaks = true;
188
- break;
189
- case "--markdown-with-html":
190
- options.htmlInMarkdown = true;
191
- break;
192
- case "--markdown-with-images":
193
- options.addImageToMarkdown = true;
194
- break;
195
- case "--no-json":
196
- options.noJson = true;
197
- break;
198
- case "--debug":
199
- options.debug = true;
200
- break;
201
- default:
202
- if (arg.startsWith("-")) {
203
- throw new Error(`Unknown option: ${arg}`);
204
- }
205
- if (inputPath) {
206
- throw new Error("Multiple input paths provided. Only one input path is allowed.");
207
- }
208
- inputPath = arg;
105
+ var VALID_FORMATS = /* @__PURE__ */ new Set([
106
+ "json",
107
+ "text",
108
+ "html",
109
+ "pdf",
110
+ "markdown",
111
+ "markdown-with-html",
112
+ "markdown-with-images"
113
+ ]);
114
+ var VALID_CONTENT_SAFETY_MODES = /* @__PURE__ */ new Set([
115
+ "all",
116
+ "hidden-text",
117
+ "off-page",
118
+ "tiny",
119
+ "hidden-ocg"
120
+ ]);
121
+ function createProgram() {
122
+ const program = new Command();
123
+ program.name("opendataloader-pdf").usage("[options] <input...>").description("Convert PDFs using the OpenDataLoader CLI.").showHelpAfterError("Use '--help' to see available options.").showSuggestionAfterError(false).argument("<input...>", "Input files or directories to convert").option("-o, --output-dir <path>", "Directory where outputs are written").option("-p, --password <password>", "Password for encrypted PDFs").option(
124
+ "-f, --format <value...>",
125
+ "Output formats to generate (json, text, html, pdf, markdown, markdown-with-html, markdown-with-images)"
126
+ ).option("-q, --quiet", "Suppress CLI logging output").option("--content-safety-off <mode...>", "Disable one or more content safety filters").option("--keep-line-breaks", "Preserve line breaks in text output").option("--replace-invalid-chars <c>", "Replacement character for invalid characters");
127
+ program.configureOutput({
128
+ writeErr: (str) => {
129
+ console.error(str.trimEnd());
130
+ },
131
+ outputError: (str, write) => {
132
+ write(str);
209
133
  }
134
+ });
135
+ return program;
136
+ }
137
+ function buildConvertOptions(options) {
138
+ const convertOptions = {};
139
+ if (options.outputDir) {
140
+ convertOptions.outputDir = options.outputDir;
141
+ }
142
+ if (options.password) {
143
+ convertOptions.password = options.password;
144
+ }
145
+ if (options.format && options.format.length > 0) {
146
+ convertOptions.format = options.format;
147
+ }
148
+ if (options.quiet) {
149
+ convertOptions.quiet = true;
210
150
  }
211
- return { inputPath, options, showHelp };
151
+ if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
152
+ convertOptions.contentSafetyOff = options.contentSafetyOff;
153
+ }
154
+ if (options.keepLineBreaks) {
155
+ convertOptions.keepLineBreaks = true;
156
+ }
157
+ if (options.replaceInvalidChars) {
158
+ convertOptions.replaceInvalidChars = options.replaceInvalidChars;
159
+ }
160
+ return convertOptions;
212
161
  }
213
162
  async function main() {
214
- let parsed;
163
+ const program = createProgram();
164
+ program.exitOverride();
215
165
  try {
216
- parsed = parseArgs(process.argv.slice(2));
166
+ program.parse(process.argv);
217
167
  } catch (err) {
168
+ if (err instanceof CommanderError) {
169
+ if (err.code === "commander.helpDisplayed") {
170
+ return 0;
171
+ }
172
+ return err.exitCode ?? 1;
173
+ }
218
174
  const message = err instanceof Error ? err.message : String(err);
219
175
  console.error(message);
220
176
  console.error("Use '--help' to see available options.");
221
177
  return 1;
222
178
  }
223
- if (parsed.showHelp) {
224
- printHelp();
225
- return 0;
179
+ const cliOptions = program.opts();
180
+ const inputPaths = program.args;
181
+ if (cliOptions.format) {
182
+ for (const value of cliOptions.format) {
183
+ if (!VALID_FORMATS.has(value)) {
184
+ console.error(`Invalid format '${value}'. See '--help' for allowed values.`);
185
+ console.error("Use '--help' to see available options.");
186
+ return 1;
187
+ }
188
+ }
226
189
  }
227
- if (!parsed.inputPath) {
228
- console.error("Missing required input path.");
229
- console.error("Use '--help' to see usage information.");
230
- return 1;
190
+ if (cliOptions.contentSafetyOff) {
191
+ for (const value of cliOptions.contentSafetyOff) {
192
+ if (!VALID_CONTENT_SAFETY_MODES.has(value)) {
193
+ console.error(`Invalid content safety mode '${value}'. See '--help' for allowed values.`);
194
+ console.error("Use '--help' to see available options.");
195
+ return 1;
196
+ }
197
+ }
231
198
  }
199
+ const convertOptions = buildConvertOptions(cliOptions);
232
200
  try {
233
- const output = await run(parsed.inputPath, parsed.options);
234
- if (output && !parsed.options.debug) {
201
+ const output = await convert(inputPaths, convertOptions);
202
+ if (output && !convertOptions.quiet) {
235
203
  process.stdout.write(output);
236
- }
237
- if (output && !output.endsWith("\n") && !parsed.options.debug) {
238
- process.stdout.write("\n");
204
+ if (!output.endsWith("\n")) {
205
+ process.stdout.write("\n");
206
+ }
239
207
  }
240
208
  return 0;
241
209
  } catch (err) {
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/cli.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\nfunction getRedactedCommandString(command: string, commandArgs: string[]): string {\n const commandArgsForLogging = [...commandArgs];\n const passwordIndex = commandArgsForLogging.indexOf('--password');\n if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {\n commandArgsForLogging[passwordIndex + 1] = '[REDACTED]';\n }\n return `${command} ${commandArgsForLogging.join(' ')}`;\n}\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.contentSafetyOff) {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n if (options.noJson) {\n args.push('--no-json');\n }\n\n args.push(inputPath);\n\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n if (options.debug) {\n console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);\n }\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${stderr}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n","#!/usr/bin/env node\nimport { run, RunOptions } from './index.js';\n\ninterface ParsedArgs {\n inputPath?: string;\n options: RunOptions;\n showHelp: boolean;\n}\n\nfunction printHelp(): void {\n console.log(`Usage: opendataloader-pdf [options] <input>`);\n console.log('');\n console.log('Options:');\n console.log(' -o, --output-dir <path> Directory where outputs are written');\n console.log(' -p, --password <password> Password for encrypted PDFs');\n console.log(' --replace-invalid-chars <c> Replacement character for invalid characters');\n console.log(' --content-safety-off <mode> Disable content safety filtering (provide mode)');\n console.log(' --markdown Generate Markdown output');\n console.log(' --html Generate HTML output');\n console.log(' --pdf Generate annotated PDF output');\n console.log(' --keep-line-breaks Preserve line breaks in text output');\n console.log(' --markdown-with-html Allow raw HTML within Markdown output');\n console.log(' --markdown-with-images Embed images in Markdown output');\n console.log(' --no-json Disable JSON output generation');\n console.log(' --debug Stream CLI logs directly to stdout/stderr');\n console.log(' -h, --help Show this message and exit');\n}\n\nfunction parseArgs(argv: string[]): ParsedArgs {\n const options: RunOptions = {};\n let inputPath: string | undefined;\n let showHelp = false;\n\n const readValue = (currentIndex: number, option: string): { value: string; nextIndex: number } => {\n const nextValue = argv[currentIndex + 1];\n if (!nextValue || nextValue.startsWith('-')) {\n throw new Error(`Option ${option} requires a value.`);\n }\n return { value: nextValue, nextIndex: currentIndex + 1 };\n };\n\n for (let i = 0; i < argv.length; i += 1) {\n const arg = argv[i];\n\n switch (arg) {\n case '--help':\n case '-h':\n showHelp = true;\n i = argv.length; // exit loop\n break;\n case '--output-dir':\n case '-o': {\n const { value, nextIndex } = readValue(i, arg);\n options.outputFolder = value;\n i = nextIndex;\n break;\n }\n case '--password':\n case '-p': {\n const { value, nextIndex } = readValue(i, arg);\n options.password = value;\n i = nextIndex;\n break;\n }\n case '--replace-invalid-chars': {\n const { value, nextIndex } = readValue(i, arg);\n options.replaceInvalidChars = value;\n i = nextIndex;\n break;\n }\n case '--content-safety-off': {\n const { value, nextIndex } = readValue(i, arg);\n options.contentSafetyOff = value;\n i = nextIndex;\n break;\n }\n case '--markdown':\n options.generateMarkdown = true;\n break;\n case '--html':\n options.generateHtml = true;\n break;\n case '--pdf':\n options.generateAnnotatedPdf = true;\n break;\n case '--keep-line-breaks':\n options.keepLineBreaks = true;\n break;\n case '--markdown-with-html':\n options.htmlInMarkdown = true;\n break;\n case '--markdown-with-images':\n options.addImageToMarkdown = true;\n break;\n case '--no-json':\n options.noJson = true;\n break;\n case '--debug':\n options.debug = true;\n break;\n default:\n if (arg.startsWith('-')) {\n throw new Error(`Unknown option: ${arg}`);\n }\n\n if (inputPath) {\n throw new Error('Multiple input paths provided. Only one input path is allowed.');\n }\n inputPath = arg;\n }\n }\n\n return { inputPath, options, showHelp };\n}\n\nasync function main(): Promise<number> {\n let parsed: ParsedArgs;\n\n try {\n parsed = parseArgs(process.argv.slice(2));\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n if (parsed.showHelp) {\n printHelp();\n return 0;\n }\n\n if (!parsed.inputPath) {\n console.error('Missing required input path.');\n console.error(\"Use '--help' to see usage information.\");\n return 1;\n }\n\n try {\n const output = await run(parsed.inputPath, parsed.options);\n if (output && !parsed.options.debug) {\n process.stdout.write(output);\n }\n if (output && !output.endsWith('\\n') && !parsed.options.debug) {\n process.stdout.write('\\n');\n }\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n"],"mappings":";;;AAAA,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAE9B,IAAM,aAAa,cAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAEjB,SAAS,yBAAyB,SAAiB,aAA+B;AAChF,QAAM,wBAAwB,CAAC,GAAG,WAAW;AAC7C,QAAM,gBAAgB,sBAAsB,QAAQ,YAAY;AAChE,MAAI,gBAAgB,MAAM,gBAAgB,IAAI,sBAAsB,QAAQ;AAC1E,0BAAsB,gBAAgB,CAAC,IAAI;AAAA,EAC7C;AACA,SAAO,GAAG,OAAO,IAAI,sBAAsB,KAAK,GAAG,CAAC;AACtD;AAiBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,QAAI,CAAI,cAAW,SAAS,GAAG;AAC7B,aAAO,OAAO,IAAI,MAAM,mCAAmC,SAAS,EAAE,CAAC;AAAA,IACzE;AAEA,UAAM,OAAiB,CAAC;AACxB,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,gBAAgB,QAAQ,YAAY;AAAA,IAChD;AACA,QAAI,QAAQ,UAAU;AACpB,WAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,IAC1C;AACA,QAAI,QAAQ,qBAAqB;AAC/B,WAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,IAClE;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,YAAY;AAAA,IACxB;AACA,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,QAAQ;AAAA,IACpB;AACA,QAAI,QAAQ,sBAAsB;AAChC,WAAK,KAAK,OAAO;AAAA,IACnB;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,oBAAoB;AAAA,IAChC;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,sBAAsB;AAAA,IAClC;AACA,QAAI,QAAQ,oBAAoB;AAC9B,WAAK,KAAK,wBAAwB;AAAA,IACpC;AACA,QAAI,QAAQ,QAAQ;AAClB,WAAK,KAAK,WAAW;AAAA,IACvB;AAEA,SAAK,KAAK,SAAS;AAEnB,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,QAAI,QAAQ,OAAO;AACjB,cAAQ,MAAM,oBAAoB,yBAAyB,SAAS,WAAW,CAAC,EAAE;AAAA,IACpF;AAEA,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,MAAM;AAAA,QACnE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAQ;AAC/B,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;;;AC/HA,SAAS,YAAkB;AACzB,UAAQ,IAAI,6CAA6C;AACzD,UAAQ,IAAI,EAAE;AACd,UAAQ,IAAI,UAAU;AACtB,UAAQ,IAAI,oEAAoE;AAChF,UAAQ,IAAI,4DAA4D;AACxE,UAAQ,IAAI,6EAA6E;AACzF,UAAQ,IAAI,gFAAgF;AAC5F,UAAQ,IAAI,yDAAyD;AACrE,UAAQ,IAAI,qDAAqD;AACjE,UAAQ,IAAI,8DAA8D;AAC1E,UAAQ,IAAI,oEAAoE;AAChF,UAAQ,IAAI,sEAAsE;AAClF,UAAQ,IAAI,gEAAgE;AAC5E,UAAQ,IAAI,+DAA+D;AAC3E,UAAQ,IAAI,0EAA0E;AACtF,UAAQ,IAAI,2DAA2D;AACzE;AAEA,SAAS,UAAU,MAA4B;AAC7C,QAAM,UAAsB,CAAC;AAC7B,MAAI;AACJ,MAAI,WAAW;AAEf,QAAM,YAAY,CAAC,cAAsB,WAAyD;AAChG,UAAM,YAAY,KAAK,eAAe,CAAC;AACvC,QAAI,CAAC,aAAa,UAAU,WAAW,GAAG,GAAG;AAC3C,YAAM,IAAI,MAAM,UAAU,MAAM,oBAAoB;AAAA,IACtD;AACA,WAAO,EAAE,OAAO,WAAW,WAAW,eAAe,EAAE;AAAA,EACzD;AAEA,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,GAAG;AACvC,UAAM,MAAM,KAAK,CAAC;AAElB,YAAQ,KAAK;AAAA,MACX,KAAK;AAAA,MACL,KAAK;AACH,mBAAW;AACX,YAAI,KAAK;AACT;AAAA,MACF,KAAK;AAAA,MACL,KAAK,MAAM;AACT,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,eAAe;AACvB,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK;AAAA,MACL,KAAK,MAAM;AACT,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,WAAW;AACnB,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK,2BAA2B;AAC9B,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,sBAAsB;AAC9B,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK,wBAAwB;AAC3B,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,mBAAmB;AAC3B,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK;AACH,gBAAQ,mBAAmB;AAC3B;AAAA,MACF,KAAK;AACH,gBAAQ,eAAe;AACvB;AAAA,MACF,KAAK;AACH,gBAAQ,uBAAuB;AAC/B;AAAA,MACF,KAAK;AACH,gBAAQ,iBAAiB;AACzB;AAAA,MACF,KAAK;AACH,gBAAQ,iBAAiB;AACzB;AAAA,MACF,KAAK;AACH,gBAAQ,qBAAqB;AAC7B;AAAA,MACF,KAAK;AACH,gBAAQ,SAAS;AACjB;AAAA,MACF,KAAK;AACH,gBAAQ,QAAQ;AAChB;AAAA,MACF;AACE,YAAI,IAAI,WAAW,GAAG,GAAG;AACvB,gBAAM,IAAI,MAAM,mBAAmB,GAAG,EAAE;AAAA,QAC1C;AAEA,YAAI,WAAW;AACb,gBAAM,IAAI,MAAM,gEAAgE;AAAA,QAClF;AACA,oBAAY;AAAA,IAChB;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,SAAS,SAAS;AACxC;AAEA,eAAe,OAAwB;AACrC,MAAI;AAEJ,MAAI;AACF,aAAS,UAAU,QAAQ,KAAK,MAAM,CAAC,CAAC;AAAA,EAC1C,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,UAAU;AACnB,cAAU;AACV,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,OAAO,WAAW;AACrB,YAAQ,MAAM,8BAA8B;AAC5C,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,MAAI;AACF,UAAM,SAAS,MAAM,IAAI,OAAO,WAAW,OAAO,OAAO;AACzD,QAAI,UAAU,CAAC,OAAO,QAAQ,OAAO;AACnC,cAAQ,OAAO,MAAM,MAAM;AAAA,IAC7B;AACA,QAAI,UAAU,CAAC,OAAO,SAAS,IAAI,KAAK,CAAC,OAAO,QAAQ,OAAO;AAC7D,cAAQ,OAAO,MAAM,IAAI;AAAA,IAC3B;AACA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":[]}
1
+ {"version":3,"sources":["../src/cli.ts","../src/index.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { Command, CommanderError } from 'commander';\nimport { convert, ConvertOptions } from './index.js';\n\ninterface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string[];\n quiet?: boolean;\n contentSafetyOff?: string[];\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n}\n\nconst VALID_FORMATS = new Set([\n 'json',\n 'text',\n 'html',\n 'pdf',\n 'markdown',\n 'markdown-with-html',\n 'markdown-with-images',\n]);\n\nconst VALID_CONTENT_SAFETY_MODES = new Set([\n 'all',\n 'hidden-text',\n 'off-page',\n 'tiny',\n 'hidden-ocg',\n]);\n\nfunction createProgram(): Command {\n const program = new Command();\n\n program\n .name('opendataloader-pdf')\n .usage('[options] <input...>')\n .description('Convert PDFs using the OpenDataLoader CLI.')\n .showHelpAfterError(\"Use '--help' to see available options.\")\n .showSuggestionAfterError(false)\n .argument('<input...>', 'Input files or directories to convert')\n .option('-o, --output-dir <path>', 'Directory where outputs are written')\n .option('-p, --password <password>', 'Password for encrypted PDFs')\n .option(\n '-f, --format <value...>',\n 'Output formats to generate (json, text, html, pdf, markdown, markdown-with-html, markdown-with-images)',\n )\n .option('-q, --quiet', 'Suppress CLI logging output')\n .option('--content-safety-off <mode...>', 'Disable one or more content safety filters')\n .option('--keep-line-breaks', 'Preserve line breaks in text output')\n .option('--replace-invalid-chars <c>', 'Replacement character for invalid characters');\n\n program.configureOutput({\n writeErr: (str) => {\n console.error(str.trimEnd());\n },\n outputError: (str, write) => {\n write(str);\n },\n });\n\n return program;\n}\n\nfunction buildConvertOptions(options: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (options.outputDir) {\n convertOptions.outputDir = options.outputDir;\n }\n if (options.password) {\n convertOptions.password = options.password;\n }\n if (options.format && options.format.length > 0) {\n convertOptions.format = options.format;\n }\n if (options.quiet) {\n convertOptions.quiet = true;\n }\n if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {\n convertOptions.contentSafetyOff = options.contentSafetyOff;\n }\n if (options.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (options.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = options.replaceInvalidChars;\n }\n\n return convertOptions;\n}\n\nasync function main(): Promise<number> {\n const program = createProgram();\n\n program.exitOverride();\n\n try {\n program.parse(process.argv);\n } catch (err) {\n if (err instanceof CommanderError) {\n if (err.code === 'commander.helpDisplayed') {\n return 0;\n }\n return err.exitCode ?? 1;\n }\n\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n const cliOptions = program.opts<CliOptions>();\n const inputPaths = program.args;\n\n if (cliOptions.format) {\n for (const value of cliOptions.format) {\n if (!VALID_FORMATS.has(value)) {\n console.error(`Invalid format '${value}'. See '--help' for allowed values.`);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n }\n }\n\n if (cliOptions.contentSafetyOff) {\n for (const value of cliOptions.contentSafetyOff) {\n if (!VALID_CONTENT_SAFETY_MODES.has(value)) {\n console.error(`Invalid content safety mode '${value}'. See '--help' for allowed values.`);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n }\n }\n\n const convertOptions = buildConvertOptions(cliOptions);\n\n try {\n const output = await convert(inputPaths, convertOptions);\n if (output && !convertOptions.quiet) {\n process.stdout.write(output);\n if (!output.endsWith('\\n')) {\n process.stdout.write('\\n');\n }\n }\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n","import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n debug?: boolean;\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { debug = false, streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.contentSafetyOff) {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n if (options.noJson) {\n args.push('--no-json');\n }\n\n args.push(inputPath);\n executeJar(args, {\n debug: options.debug,\n streamOutput: Boolean(options.debug),\n })\n .then(resolve)\n .catch(reject);\n });\n}\n\nexport interface ConvertOptions {\n outputDir?: string;\n password?: string;\n format?: string[];\n quiet?: boolean;\n contentSafetyOff?: string[];\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n}\n\nexport function convert(inputPaths: string[], options: ConvertOptions = {}): Promise<string> {\n if (inputPaths.length === 0) {\n return Promise.reject(new Error('At least one input path must be provided.'));\n }\n\n for (const input of inputPaths) {\n if (!fs.existsSync(input)) {\n return Promise.reject(new Error(`Input file or folder not found: ${input}`));\n }\n }\n\n const args: string[] = [...inputPaths];\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format && options.format.length > 0) {\n args.push('--format', ...options.format);\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', ...options.contentSafetyOff);\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n\n return executeJar(args, {\n streamOutput: !options.quiet,\n });\n}\n"],"mappings":";;;AACA,SAAS,SAAS,sBAAsB;;;ACDxC,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAE9B,IAAM,aAAa,cAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAOjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,QAAQ,OAAO,eAAe,MAAM,IAAI;AAEhD,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AA8EO,SAAS,QAAQ,YAAsB,UAA0B,CAAC,GAAoB;AAC3F,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,QAAQ,OAAO,IAAI,MAAM,2CAA2C,CAAC;AAAA,EAC9E;AAEA,aAAW,SAAS,YAAY;AAC9B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,QAAQ,OAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE,CAAC;AAAA,IAC7E;AAAA,EACF;AAEA,QAAM,OAAiB,CAAC,GAAG,UAAU;AACrC,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,UAAU,QAAQ,OAAO,SAAS,GAAG;AAC/C,SAAK,KAAK,YAAY,GAAG,QAAQ,MAAM;AAAA,EACzC;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,oBAAoB,QAAQ,iBAAiB,SAAS,GAAG;AACnE,SAAK,KAAK,wBAAwB,GAAG,QAAQ,gBAAgB;AAAA,EAC/D;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AAEA,SAAO,WAAW,MAAM;AAAA,IACtB,cAAc,CAAC,QAAQ;AAAA,EACzB,CAAC;AACH;;;ADhLA,IAAM,gBAAgB,oBAAI,IAAI;AAAA,EAC5B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,IAAM,6BAA6B,oBAAI,IAAI;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,SAAS,gBAAyB;AAChC,QAAM,UAAU,IAAI,QAAQ;AAE5B,UACG,KAAK,oBAAoB,EACzB,MAAM,sBAAsB,EAC5B,YAAY,4CAA4C,EACxD,mBAAmB,wCAAwC,EAC3D,yBAAyB,KAAK,EAC9B,SAAS,cAAc,uCAAuC,EAC9D,OAAO,2BAA2B,qCAAqC,EACvE,OAAO,6BAA6B,6BAA6B,EACjE;AAAA,IACC;AAAA,IACA;AAAA,EACF,EACC,OAAO,eAAe,6BAA6B,EACnD,OAAO,kCAAkC,4CAA4C,EACrF,OAAO,sBAAsB,qCAAqC,EAClE,OAAO,+BAA+B,8CAA8C;AAEvF,UAAQ,gBAAgB;AAAA,IACtB,UAAU,CAAC,QAAQ;AACjB,cAAQ,MAAM,IAAI,QAAQ,CAAC;AAAA,IAC7B;AAAA,IACA,aAAa,CAAC,KAAK,UAAU;AAC3B,YAAM,GAAG;AAAA,IACX;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,SAAS,oBAAoB,SAAqC;AAChE,QAAM,iBAAiC,CAAC;AAExC,MAAI,QAAQ,WAAW;AACrB,mBAAe,YAAY,QAAQ;AAAA,EACrC;AACA,MAAI,QAAQ,UAAU;AACpB,mBAAe,WAAW,QAAQ;AAAA,EACpC;AACA,MAAI,QAAQ,UAAU,QAAQ,OAAO,SAAS,GAAG;AAC/C,mBAAe,SAAS,QAAQ;AAAA,EAClC;AACA,MAAI,QAAQ,OAAO;AACjB,mBAAe,QAAQ;AAAA,EACzB;AACA,MAAI,QAAQ,oBAAoB,QAAQ,iBAAiB,SAAS,GAAG;AACnE,mBAAe,mBAAmB,QAAQ;AAAA,EAC5C;AACA,MAAI,QAAQ,gBAAgB;AAC1B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,mBAAe,sBAAsB,QAAQ;AAAA,EAC/C;AAEA,SAAO;AACT;AAEA,eAAe,OAAwB;AACrC,QAAM,UAAU,cAAc;AAE9B,UAAQ,aAAa;AAErB,MAAI;AACF,YAAQ,MAAM,QAAQ,IAAI;AAAA,EAC5B,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB;AACjC,UAAI,IAAI,SAAS,2BAA2B;AAC1C,eAAO;AAAA,MACT;AACA,aAAO,IAAI,YAAY;AAAA,IACzB;AAEA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,QAAQ,KAAiB;AAC5C,QAAM,aAAa,QAAQ;AAE3B,MAAI,WAAW,QAAQ;AACrB,eAAW,SAAS,WAAW,QAAQ;AACrC,UAAI,CAAC,cAAc,IAAI,KAAK,GAAG;AAC7B,gBAAQ,MAAM,mBAAmB,KAAK,qCAAqC;AAC3E,gBAAQ,MAAM,wCAAwC;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,MAAI,WAAW,kBAAkB;AAC/B,eAAW,SAAS,WAAW,kBAAkB;AAC/C,UAAI,CAAC,2BAA2B,IAAI,KAAK,GAAG;AAC1C,gBAAQ,MAAM,gCAAgC,KAAK,qCAAqC;AACxF,gBAAQ,MAAM,wCAAwC;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,QAAM,iBAAiB,oBAAoB,UAAU;AAErD,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,YAAY,cAAc;AACvD,QAAI,UAAU,CAAC,eAAe,OAAO;AACnC,cAAQ,OAAO,MAAM,MAAM;AAC3B,UAAI,CAAC,OAAO,SAAS,IAAI,GAAG;AAC1B,gBAAQ,OAAO,MAAM,IAAI;AAAA,MAC3B;AAAA,IACF;AACA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":[]}
package/dist/index.cjs CHANGED
@@ -30,6 +30,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ convert: () => convert,
33
34
  run: () => run
34
35
  });
35
36
  module.exports = __toCommonJS(index_exports);
@@ -41,54 +42,9 @@ var import_meta = {};
41
42
  var __filename = (0, import_url.fileURLToPath)(import_meta.url);
42
43
  var __dirname = path.dirname(__filename);
43
44
  var JAR_NAME = "opendataloader-pdf-cli.jar";
44
- function getRedactedCommandString(command, commandArgs) {
45
- const commandArgsForLogging = [...commandArgs];
46
- const passwordIndex = commandArgsForLogging.indexOf("--password");
47
- if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {
48
- commandArgsForLogging[passwordIndex + 1] = "[REDACTED]";
49
- }
50
- return `${command} ${commandArgsForLogging.join(" ")}`;
51
- }
52
- function run(inputPath, options = {}) {
45
+ function executeJar(args, executionOptions = {}) {
46
+ const { debug = false, streamOutput = false } = executionOptions;
53
47
  return new Promise((resolve, reject) => {
54
- if (!fs.existsSync(inputPath)) {
55
- return reject(new Error(`Input file or folder not found: ${inputPath}`));
56
- }
57
- const args = [];
58
- if (options.outputFolder) {
59
- args.push("--output-dir", options.outputFolder);
60
- }
61
- if (options.password) {
62
- args.push("--password", options.password);
63
- }
64
- if (options.replaceInvalidChars) {
65
- args.push("--replace-invalid-chars", options.replaceInvalidChars);
66
- }
67
- if (options.generateMarkdown) {
68
- args.push("--markdown");
69
- }
70
- if (options.generateHtml) {
71
- args.push("--html");
72
- }
73
- if (options.generateAnnotatedPdf) {
74
- args.push("--pdf");
75
- }
76
- if (options.keepLineBreaks) {
77
- args.push("--keep-line-breaks");
78
- }
79
- if (options.contentSafetyOff) {
80
- args.push("--content-safety-off", options.contentSafetyOff);
81
- }
82
- if (options.htmlInMarkdown) {
83
- args.push("--markdown-with-html");
84
- }
85
- if (options.addImageToMarkdown) {
86
- args.push("--markdown-with-images");
87
- }
88
- if (options.noJson) {
89
- args.push("--no-json");
90
- }
91
- args.push(inputPath);
92
48
  const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
93
49
  if (!fs.existsSync(jarPath)) {
94
50
  return reject(
@@ -97,22 +53,19 @@ function run(inputPath, options = {}) {
97
53
  }
98
54
  const command = "java";
99
55
  const commandArgs = ["-jar", jarPath, ...args];
100
- if (options.debug) {
101
- console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);
102
- }
103
56
  const javaProcess = (0, import_child_process.spawn)(command, commandArgs);
104
57
  let stdout = "";
105
58
  let stderr = "";
106
59
  javaProcess.stdout.on("data", (data) => {
107
60
  const chunk = data.toString();
108
- if (options.debug) {
61
+ if (streamOutput) {
109
62
  process.stdout.write(chunk);
110
63
  }
111
64
  stdout += chunk;
112
65
  });
113
66
  javaProcess.stderr.on("data", (data) => {
114
67
  const chunk = data.toString();
115
- if (options.debug) {
68
+ if (streamOutput) {
116
69
  process.stderr.write(chunk);
117
70
  }
118
71
  stderr += chunk;
@@ -121,10 +74,11 @@ function run(inputPath, options = {}) {
121
74
  if (code === 0) {
122
75
  resolve(stdout);
123
76
  } else {
77
+ const errorOutput = stderr || stdout;
124
78
  const error = new Error(
125
79
  `The opendataloader-pdf CLI exited with code ${code}.
126
80
 
127
- ${stderr}`
81
+ ${errorOutput}`
128
82
  );
129
83
  reject(error);
130
84
  }
@@ -142,8 +96,90 @@ ${stderr}`
142
96
  });
143
97
  });
144
98
  }
99
+ function run(inputPath, options = {}) {
100
+ return new Promise((resolve, reject) => {
101
+ if (!fs.existsSync(inputPath)) {
102
+ return reject(new Error(`Input file or folder not found: ${inputPath}`));
103
+ }
104
+ const args = [];
105
+ if (options.outputFolder) {
106
+ args.push("--output-dir", options.outputFolder);
107
+ }
108
+ if (options.password) {
109
+ args.push("--password", options.password);
110
+ }
111
+ if (options.replaceInvalidChars) {
112
+ args.push("--replace-invalid-chars", options.replaceInvalidChars);
113
+ }
114
+ if (options.generateMarkdown) {
115
+ args.push("--markdown");
116
+ }
117
+ if (options.generateHtml) {
118
+ args.push("--html");
119
+ }
120
+ if (options.generateAnnotatedPdf) {
121
+ args.push("--pdf");
122
+ }
123
+ if (options.keepLineBreaks) {
124
+ args.push("--keep-line-breaks");
125
+ }
126
+ if (options.contentSafetyOff) {
127
+ args.push("--content-safety-off", options.contentSafetyOff);
128
+ }
129
+ if (options.htmlInMarkdown) {
130
+ args.push("--markdown-with-html");
131
+ }
132
+ if (options.addImageToMarkdown) {
133
+ args.push("--markdown-with-images");
134
+ }
135
+ if (options.noJson) {
136
+ args.push("--no-json");
137
+ }
138
+ args.push(inputPath);
139
+ executeJar(args, {
140
+ debug: options.debug,
141
+ streamOutput: Boolean(options.debug)
142
+ }).then(resolve).catch(reject);
143
+ });
144
+ }
145
+ function convert(inputPaths, options = {}) {
146
+ if (inputPaths.length === 0) {
147
+ return Promise.reject(new Error("At least one input path must be provided."));
148
+ }
149
+ for (const input of inputPaths) {
150
+ if (!fs.existsSync(input)) {
151
+ return Promise.reject(new Error(`Input file or folder not found: ${input}`));
152
+ }
153
+ }
154
+ const args = [...inputPaths];
155
+ if (options.outputDir) {
156
+ args.push("--output-dir", options.outputDir);
157
+ }
158
+ if (options.password) {
159
+ args.push("--password", options.password);
160
+ }
161
+ if (options.format && options.format.length > 0) {
162
+ args.push("--format", ...options.format);
163
+ }
164
+ if (options.quiet) {
165
+ args.push("--quiet");
166
+ }
167
+ if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
168
+ args.push("--content-safety-off", ...options.contentSafetyOff);
169
+ }
170
+ if (options.keepLineBreaks) {
171
+ args.push("--keep-line-breaks");
172
+ }
173
+ if (options.replaceInvalidChars) {
174
+ args.push("--replace-invalid-chars", options.replaceInvalidChars);
175
+ }
176
+ return executeJar(args, {
177
+ streamOutput: !options.quiet
178
+ });
179
+ }
145
180
  // Annotate the CommonJS export names for ESM import in node:
146
181
  0 && (module.exports = {
182
+ convert,
147
183
  run
148
184
  });
149
185
  //# sourceMappingURL=index.cjs.map