@opendataloader/pdf 1.1.1 → 1.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -43
- package/dist/cli.cjs +131 -163
- package/dist/cli.cjs.map +1 -1
- package/dist/cli.js +131 -163
- package/dist/cli.js.map +1 -1
- package/dist/index.cjs +89 -53
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +11 -1
- package/dist/index.d.ts +11 -1
- package/dist/index.js +88 -53
- package/dist/index.js.map +1 -1
- package/lib/opendataloader-pdf-cli.jar +0 -0
- package/package.json +4 -1
package/README.md
CHANGED
|
@@ -130,22 +130,18 @@ npm install @opendataloader/pdf
|
|
|
130
130
|
|
|
131
131
|
### Usage
|
|
132
132
|
|
|
133
|
-
|
|
134
|
-
- If you don’t specify an `outputFolder`, the output data will be saved in the same directory as the input document.
|
|
133
|
+
`inputPath` can be either the path to a single document or the path to a folder.
|
|
135
134
|
|
|
136
135
|
```typescript
|
|
137
|
-
import {
|
|
136
|
+
import { convert } from '@opendataloader/pdf';
|
|
138
137
|
|
|
139
138
|
async function main() {
|
|
140
139
|
try {
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
generateHtml: true,
|
|
145
|
-
generateAnnotatedPdf: true,
|
|
146
|
-
debug: true,
|
|
140
|
+
await convert(['path/to/document.pdf', 'path/to/folder'], {
|
|
141
|
+
outputDir: 'path/to/output',
|
|
142
|
+
format: ['json', 'html', 'pdf', 'markdown'],
|
|
147
143
|
});
|
|
148
|
-
console.log('
|
|
144
|
+
console.log('convert() complete');
|
|
149
145
|
} catch (error) {
|
|
150
146
|
console.error('Error processing PDF:', error);
|
|
151
147
|
}
|
|
@@ -153,54 +149,57 @@ async function main() {
|
|
|
153
149
|
|
|
154
150
|
main();
|
|
155
151
|
```
|
|
152
|
+
### Function: convert()
|
|
153
|
+
|
|
154
|
+
`convert(inputPaths: string[], options?: ConvertOptions): Promise<string>`
|
|
155
|
+
|
|
156
|
+
Multi-input helper matching the Python wrapper.
|
|
157
|
+
|
|
158
|
+
| Property | Type | Default | Description |
|
|
159
|
+
| --------------------------------| ---------- | ----------- | ----------------------------------------------------------------------------------------------------------------------------------- |
|
|
160
|
+
| `inputPaths` | `string[]` | — | One or more file paths or directories to process. |
|
|
161
|
+
| `options.outputDir` | `string` | `undefined` | Directory where outputs are written. |
|
|
162
|
+
| `options.password` | `string` | `undefined` | Password for encrypted PDFs. |
|
|
163
|
+
| `options.format` | `string[]` | `undefined` | Output formats (any combination of `json`, `text`, `html`, `pdf`, `markdown`, `markdown-with-html`, `markdown-with-images`). |
|
|
164
|
+
| `options.quiet` | `boolean` | `false` | Suppress CLI logging output and prevent streaming. |
|
|
165
|
+
| `options.contentSafetyOff` | `string[]` | `undefined` | Disable one or more content safety filters (`all`, `hidden-text`, `off-page`, `tiny`, `hidden-ocg`). |
|
|
166
|
+
| `options.keepLineBreaks` | `boolean` | `false` | Preserve line breaks in text output. |
|
|
167
|
+
| `options.replaceInvalidChars` | `string` | `undefined` | Replacement character for invalid or unrecognized characters. |
|
|
168
|
+
|
|
169
|
+
### Function: run()
|
|
170
|
+
|
|
171
|
+
Deprecated.
|
|
156
172
|
|
|
157
|
-
|
|
173
|
+
### CLI
|
|
158
174
|
|
|
159
175
|
```bash
|
|
160
|
-
npx @opendataloader/pdf path/to/document.pdf -o path/to/output
|
|
176
|
+
npx @opendataloader/pdf path/to/document.pdf path/to/folder -o path/to/output -f json html pdf markdown
|
|
161
177
|
```
|
|
162
178
|
|
|
163
|
-
|
|
179
|
+
Or install globally:
|
|
164
180
|
|
|
165
181
|
```bash
|
|
166
182
|
npm install -g @opendataloader/pdf
|
|
167
183
|
```
|
|
168
184
|
|
|
169
|
-
|
|
185
|
+
Then run:
|
|
170
186
|
|
|
171
187
|
```bash
|
|
172
|
-
opendataloader-pdf path/to/document.pdf -o path/to/output
|
|
188
|
+
opendataloader-pdf path/to/document.pdf path/to/folder -o path/to/output -f json html pdf markdown
|
|
173
189
|
```
|
|
174
190
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
`run(inputPath: string, options?: RunOptions): Promise<string>`
|
|
191
|
+
#### Available options
|
|
178
192
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
| Property | Type | Default | Description |
|
|
191
|
-
| ----------------------- | --------- | ------------- |-------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|
192
|
-
| `outputFolder` | `string` | `undefined` | Path to the output folder. If not set, output is saved next to the input. |
|
|
193
|
-
| `password` | `string` | `undefined` | Password for the PDF file. |
|
|
194
|
-
| `replaceInvalidChars` | `string` | `" "` | Character to replace invalid or unrecognized characters (e.g., , \u0000). |
|
|
195
|
-
| `contentSafetyOff` | `string` | `undefined` | Disables one or more content safety filters. Accepts a comma-separated list of filter names. Arguments: all, hidden-text, off-page, tiny, hidden-ocg. |
|
|
196
|
-
| `generateMarkdown` | `boolean` | `false` | If `true`, generates a Markdown output file. |
|
|
197
|
-
| `generateHtml` | `boolean` | `false` | If `true`, generates an HTML output file. |
|
|
198
|
-
| `generateAnnotatedPdf` | `boolean` | `false` | If `true`, generates an annotated PDF output file. |
|
|
199
|
-
| `keepLineBreaks` | `boolean` | `false` | If `true`, keeps line breaks in the output. |
|
|
200
|
-
| `htmlInMarkdown` | `boolean` | `false` | If `true`, uses HTML in the Markdown output. |
|
|
201
|
-
| `addImageToMarkdown` | `boolean` | `false` | If `true`, adds images to the Markdown output. |
|
|
202
|
-
| `noJson` | `boolean` | `false` | If `true`, disables the JSON output. |
|
|
203
|
-
| `debug` | `boolean` | `false` | If `true`, prints CLI messages to the console during execution. |
|
|
193
|
+
```
|
|
194
|
+
-o, --output-dir <path> Directory where outputs are written
|
|
195
|
+
-p, --password <password> Password for encrypted PDFs
|
|
196
|
+
-f, --format <value...> Output formats to generate (json, text, html, pdf, markdown, markdown-with-html, markdown-with-images)
|
|
197
|
+
-q, --quiet Suppress CLI logging output
|
|
198
|
+
--content-safety-off <mode...> Disable one or more content safety filters (all, hidden-text, off-page, tiny, hidden-ocg)
|
|
199
|
+
--keep-line-breaks Preserve line breaks in text output
|
|
200
|
+
--replace-invalid-chars <c> Replacement character for invalid or unrecognized characters
|
|
201
|
+
-h, --help Show usage information
|
|
202
|
+
```
|
|
204
203
|
|
|
205
204
|
<br/>
|
|
206
205
|
|
package/dist/cli.cjs
CHANGED
|
@@ -23,6 +23,9 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
|
|
|
23
23
|
mod
|
|
24
24
|
));
|
|
25
25
|
|
|
26
|
+
// src/cli.ts
|
|
27
|
+
var import_commander = require("commander");
|
|
28
|
+
|
|
26
29
|
// src/index.ts
|
|
27
30
|
var import_child_process = require("child_process");
|
|
28
31
|
var path = __toESM(require("path"), 1);
|
|
@@ -32,54 +35,9 @@ var import_meta = {};
|
|
|
32
35
|
var __filename = (0, import_url.fileURLToPath)(import_meta.url);
|
|
33
36
|
var __dirname = path.dirname(__filename);
|
|
34
37
|
var JAR_NAME = "opendataloader-pdf-cli.jar";
|
|
35
|
-
function
|
|
36
|
-
const
|
|
37
|
-
const passwordIndex = commandArgsForLogging.indexOf("--password");
|
|
38
|
-
if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {
|
|
39
|
-
commandArgsForLogging[passwordIndex + 1] = "[REDACTED]";
|
|
40
|
-
}
|
|
41
|
-
return `${command} ${commandArgsForLogging.join(" ")}`;
|
|
42
|
-
}
|
|
43
|
-
function run(inputPath, options = {}) {
|
|
38
|
+
function executeJar(args, executionOptions = {}) {
|
|
39
|
+
const { debug = false, streamOutput = false } = executionOptions;
|
|
44
40
|
return new Promise((resolve, reject) => {
|
|
45
|
-
if (!fs.existsSync(inputPath)) {
|
|
46
|
-
return reject(new Error(`Input file or folder not found: ${inputPath}`));
|
|
47
|
-
}
|
|
48
|
-
const args = [];
|
|
49
|
-
if (options.outputFolder) {
|
|
50
|
-
args.push("--output-dir", options.outputFolder);
|
|
51
|
-
}
|
|
52
|
-
if (options.password) {
|
|
53
|
-
args.push("--password", options.password);
|
|
54
|
-
}
|
|
55
|
-
if (options.replaceInvalidChars) {
|
|
56
|
-
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
57
|
-
}
|
|
58
|
-
if (options.generateMarkdown) {
|
|
59
|
-
args.push("--markdown");
|
|
60
|
-
}
|
|
61
|
-
if (options.generateHtml) {
|
|
62
|
-
args.push("--html");
|
|
63
|
-
}
|
|
64
|
-
if (options.generateAnnotatedPdf) {
|
|
65
|
-
args.push("--pdf");
|
|
66
|
-
}
|
|
67
|
-
if (options.keepLineBreaks) {
|
|
68
|
-
args.push("--keep-line-breaks");
|
|
69
|
-
}
|
|
70
|
-
if (options.contentSafetyOff) {
|
|
71
|
-
args.push("--content-safety-off", options.contentSafetyOff);
|
|
72
|
-
}
|
|
73
|
-
if (options.htmlInMarkdown) {
|
|
74
|
-
args.push("--markdown-with-html");
|
|
75
|
-
}
|
|
76
|
-
if (options.addImageToMarkdown) {
|
|
77
|
-
args.push("--markdown-with-images");
|
|
78
|
-
}
|
|
79
|
-
if (options.noJson) {
|
|
80
|
-
args.push("--no-json");
|
|
81
|
-
}
|
|
82
|
-
args.push(inputPath);
|
|
83
41
|
const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
|
|
84
42
|
if (!fs.existsSync(jarPath)) {
|
|
85
43
|
return reject(
|
|
@@ -88,22 +46,19 @@ function run(inputPath, options = {}) {
|
|
|
88
46
|
}
|
|
89
47
|
const command = "java";
|
|
90
48
|
const commandArgs = ["-jar", jarPath, ...args];
|
|
91
|
-
if (options.debug) {
|
|
92
|
-
console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);
|
|
93
|
-
}
|
|
94
49
|
const javaProcess = (0, import_child_process.spawn)(command, commandArgs);
|
|
95
50
|
let stdout = "";
|
|
96
51
|
let stderr = "";
|
|
97
52
|
javaProcess.stdout.on("data", (data) => {
|
|
98
53
|
const chunk = data.toString();
|
|
99
|
-
if (
|
|
54
|
+
if (streamOutput) {
|
|
100
55
|
process.stdout.write(chunk);
|
|
101
56
|
}
|
|
102
57
|
stdout += chunk;
|
|
103
58
|
});
|
|
104
59
|
javaProcess.stderr.on("data", (data) => {
|
|
105
60
|
const chunk = data.toString();
|
|
106
|
-
if (
|
|
61
|
+
if (streamOutput) {
|
|
107
62
|
process.stderr.write(chunk);
|
|
108
63
|
}
|
|
109
64
|
stderr += chunk;
|
|
@@ -112,10 +67,11 @@ function run(inputPath, options = {}) {
|
|
|
112
67
|
if (code === 0) {
|
|
113
68
|
resolve(stdout);
|
|
114
69
|
} else {
|
|
70
|
+
const errorOutput = stderr || stdout;
|
|
115
71
|
const error = new Error(
|
|
116
72
|
`The opendataloader-pdf CLI exited with code ${code}.
|
|
117
73
|
|
|
118
|
-
${
|
|
74
|
+
${errorOutput}`
|
|
119
75
|
);
|
|
120
76
|
reject(error);
|
|
121
77
|
}
|
|
@@ -133,133 +89,145 @@ ${stderr}`
|
|
|
133
89
|
});
|
|
134
90
|
});
|
|
135
91
|
}
|
|
92
|
+
function convert(inputPaths, options = {}) {
|
|
93
|
+
if (inputPaths.length === 0) {
|
|
94
|
+
return Promise.reject(new Error("At least one input path must be provided."));
|
|
95
|
+
}
|
|
96
|
+
for (const input of inputPaths) {
|
|
97
|
+
if (!fs.existsSync(input)) {
|
|
98
|
+
return Promise.reject(new Error(`Input file or folder not found: ${input}`));
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
const args = [...inputPaths];
|
|
102
|
+
if (options.outputDir) {
|
|
103
|
+
args.push("--output-dir", options.outputDir);
|
|
104
|
+
}
|
|
105
|
+
if (options.password) {
|
|
106
|
+
args.push("--password", options.password);
|
|
107
|
+
}
|
|
108
|
+
if (options.format && options.format.length > 0) {
|
|
109
|
+
args.push("--format", ...options.format);
|
|
110
|
+
}
|
|
111
|
+
if (options.quiet) {
|
|
112
|
+
args.push("--quiet");
|
|
113
|
+
}
|
|
114
|
+
if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
|
|
115
|
+
args.push("--content-safety-off", ...options.contentSafetyOff);
|
|
116
|
+
}
|
|
117
|
+
if (options.keepLineBreaks) {
|
|
118
|
+
args.push("--keep-line-breaks");
|
|
119
|
+
}
|
|
120
|
+
if (options.replaceInvalidChars) {
|
|
121
|
+
args.push("--replace-invalid-chars", options.replaceInvalidChars);
|
|
122
|
+
}
|
|
123
|
+
return executeJar(args, {
|
|
124
|
+
streamOutput: !options.quiet
|
|
125
|
+
});
|
|
126
|
+
}
|
|
136
127
|
|
|
137
128
|
// src/cli.ts
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
};
|
|
167
|
-
for (let i = 0; i < argv.length; i += 1) {
|
|
168
|
-
const arg = argv[i];
|
|
169
|
-
switch (arg) {
|
|
170
|
-
case "--help":
|
|
171
|
-
case "-h":
|
|
172
|
-
showHelp = true;
|
|
173
|
-
i = argv.length;
|
|
174
|
-
break;
|
|
175
|
-
case "--output-dir":
|
|
176
|
-
case "-o": {
|
|
177
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
178
|
-
options.outputFolder = value;
|
|
179
|
-
i = nextIndex;
|
|
180
|
-
break;
|
|
181
|
-
}
|
|
182
|
-
case "--password":
|
|
183
|
-
case "-p": {
|
|
184
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
185
|
-
options.password = value;
|
|
186
|
-
i = nextIndex;
|
|
187
|
-
break;
|
|
188
|
-
}
|
|
189
|
-
case "--replace-invalid-chars": {
|
|
190
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
191
|
-
options.replaceInvalidChars = value;
|
|
192
|
-
i = nextIndex;
|
|
193
|
-
break;
|
|
194
|
-
}
|
|
195
|
-
case "--content-safety-off": {
|
|
196
|
-
const { value, nextIndex } = readValue(i, arg);
|
|
197
|
-
options.contentSafetyOff = value;
|
|
198
|
-
i = nextIndex;
|
|
199
|
-
break;
|
|
200
|
-
}
|
|
201
|
-
case "--markdown":
|
|
202
|
-
options.generateMarkdown = true;
|
|
203
|
-
break;
|
|
204
|
-
case "--html":
|
|
205
|
-
options.generateHtml = true;
|
|
206
|
-
break;
|
|
207
|
-
case "--pdf":
|
|
208
|
-
options.generateAnnotatedPdf = true;
|
|
209
|
-
break;
|
|
210
|
-
case "--keep-line-breaks":
|
|
211
|
-
options.keepLineBreaks = true;
|
|
212
|
-
break;
|
|
213
|
-
case "--markdown-with-html":
|
|
214
|
-
options.htmlInMarkdown = true;
|
|
215
|
-
break;
|
|
216
|
-
case "--markdown-with-images":
|
|
217
|
-
options.addImageToMarkdown = true;
|
|
218
|
-
break;
|
|
219
|
-
case "--no-json":
|
|
220
|
-
options.noJson = true;
|
|
221
|
-
break;
|
|
222
|
-
case "--debug":
|
|
223
|
-
options.debug = true;
|
|
224
|
-
break;
|
|
225
|
-
default:
|
|
226
|
-
if (arg.startsWith("-")) {
|
|
227
|
-
throw new Error(`Unknown option: ${arg}`);
|
|
228
|
-
}
|
|
229
|
-
if (inputPath) {
|
|
230
|
-
throw new Error("Multiple input paths provided. Only one input path is allowed.");
|
|
231
|
-
}
|
|
232
|
-
inputPath = arg;
|
|
129
|
+
var VALID_FORMATS = /* @__PURE__ */ new Set([
|
|
130
|
+
"json",
|
|
131
|
+
"text",
|
|
132
|
+
"html",
|
|
133
|
+
"pdf",
|
|
134
|
+
"markdown",
|
|
135
|
+
"markdown-with-html",
|
|
136
|
+
"markdown-with-images"
|
|
137
|
+
]);
|
|
138
|
+
var VALID_CONTENT_SAFETY_MODES = /* @__PURE__ */ new Set([
|
|
139
|
+
"all",
|
|
140
|
+
"hidden-text",
|
|
141
|
+
"off-page",
|
|
142
|
+
"tiny",
|
|
143
|
+
"hidden-ocg"
|
|
144
|
+
]);
|
|
145
|
+
function createProgram() {
|
|
146
|
+
const program = new import_commander.Command();
|
|
147
|
+
program.name("opendataloader-pdf").usage("[options] <input...>").description("Convert PDFs using the OpenDataLoader CLI.").showHelpAfterError("Use '--help' to see available options.").showSuggestionAfterError(false).argument("<input...>", "Input files or directories to convert").option("-o, --output-dir <path>", "Directory where outputs are written").option("-p, --password <password>", "Password for encrypted PDFs").option(
|
|
148
|
+
"-f, --format <value...>",
|
|
149
|
+
"Output formats to generate (json, text, html, pdf, markdown, markdown-with-html, markdown-with-images)"
|
|
150
|
+
).option("-q, --quiet", "Suppress CLI logging output").option("--content-safety-off <mode...>", "Disable one or more content safety filters").option("--keep-line-breaks", "Preserve line breaks in text output").option("--replace-invalid-chars <c>", "Replacement character for invalid characters");
|
|
151
|
+
program.configureOutput({
|
|
152
|
+
writeErr: (str) => {
|
|
153
|
+
console.error(str.trimEnd());
|
|
154
|
+
},
|
|
155
|
+
outputError: (str, write) => {
|
|
156
|
+
write(str);
|
|
233
157
|
}
|
|
158
|
+
});
|
|
159
|
+
return program;
|
|
160
|
+
}
|
|
161
|
+
function buildConvertOptions(options) {
|
|
162
|
+
const convertOptions = {};
|
|
163
|
+
if (options.outputDir) {
|
|
164
|
+
convertOptions.outputDir = options.outputDir;
|
|
165
|
+
}
|
|
166
|
+
if (options.password) {
|
|
167
|
+
convertOptions.password = options.password;
|
|
168
|
+
}
|
|
169
|
+
if (options.format && options.format.length > 0) {
|
|
170
|
+
convertOptions.format = options.format;
|
|
171
|
+
}
|
|
172
|
+
if (options.quiet) {
|
|
173
|
+
convertOptions.quiet = true;
|
|
234
174
|
}
|
|
235
|
-
|
|
175
|
+
if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {
|
|
176
|
+
convertOptions.contentSafetyOff = options.contentSafetyOff;
|
|
177
|
+
}
|
|
178
|
+
if (options.keepLineBreaks) {
|
|
179
|
+
convertOptions.keepLineBreaks = true;
|
|
180
|
+
}
|
|
181
|
+
if (options.replaceInvalidChars) {
|
|
182
|
+
convertOptions.replaceInvalidChars = options.replaceInvalidChars;
|
|
183
|
+
}
|
|
184
|
+
return convertOptions;
|
|
236
185
|
}
|
|
237
186
|
async function main() {
|
|
238
|
-
|
|
187
|
+
const program = createProgram();
|
|
188
|
+
program.exitOverride();
|
|
239
189
|
try {
|
|
240
|
-
|
|
190
|
+
program.parse(process.argv);
|
|
241
191
|
} catch (err) {
|
|
192
|
+
if (err instanceof import_commander.CommanderError) {
|
|
193
|
+
if (err.code === "commander.helpDisplayed") {
|
|
194
|
+
return 0;
|
|
195
|
+
}
|
|
196
|
+
return err.exitCode ?? 1;
|
|
197
|
+
}
|
|
242
198
|
const message = err instanceof Error ? err.message : String(err);
|
|
243
199
|
console.error(message);
|
|
244
200
|
console.error("Use '--help' to see available options.");
|
|
245
201
|
return 1;
|
|
246
202
|
}
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
203
|
+
const cliOptions = program.opts();
|
|
204
|
+
const inputPaths = program.args;
|
|
205
|
+
if (cliOptions.format) {
|
|
206
|
+
for (const value of cliOptions.format) {
|
|
207
|
+
if (!VALID_FORMATS.has(value)) {
|
|
208
|
+
console.error(`Invalid format '${value}'. See '--help' for allowed values.`);
|
|
209
|
+
console.error("Use '--help' to see available options.");
|
|
210
|
+
return 1;
|
|
211
|
+
}
|
|
212
|
+
}
|
|
250
213
|
}
|
|
251
|
-
if (
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
214
|
+
if (cliOptions.contentSafetyOff) {
|
|
215
|
+
for (const value of cliOptions.contentSafetyOff) {
|
|
216
|
+
if (!VALID_CONTENT_SAFETY_MODES.has(value)) {
|
|
217
|
+
console.error(`Invalid content safety mode '${value}'. See '--help' for allowed values.`);
|
|
218
|
+
console.error("Use '--help' to see available options.");
|
|
219
|
+
return 1;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
255
222
|
}
|
|
223
|
+
const convertOptions = buildConvertOptions(cliOptions);
|
|
256
224
|
try {
|
|
257
|
-
const output = await
|
|
258
|
-
if (output && !
|
|
225
|
+
const output = await convert(inputPaths, convertOptions);
|
|
226
|
+
if (output && !convertOptions.quiet) {
|
|
259
227
|
process.stdout.write(output);
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
228
|
+
if (!output.endsWith("\n")) {
|
|
229
|
+
process.stdout.write("\n");
|
|
230
|
+
}
|
|
263
231
|
}
|
|
264
232
|
return 0;
|
|
265
233
|
} catch (err) {
|
package/dist/cli.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/index.ts","../src/cli.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\nfunction getRedactedCommandString(command: string, commandArgs: string[]): string {\n const commandArgsForLogging = [...commandArgs];\n const passwordIndex = commandArgsForLogging.indexOf('--password');\n if (passwordIndex > -1 && passwordIndex + 1 < commandArgsForLogging.length) {\n commandArgsForLogging[passwordIndex + 1] = '[REDACTED]';\n }\n return `${command} ${commandArgsForLogging.join(' ')}`;\n}\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.contentSafetyOff) {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n if (options.noJson) {\n args.push('--no-json');\n }\n\n args.push(inputPath);\n\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n if (options.debug) {\n console.error(`Running command: ${getRedactedCommandString(command, commandArgs)}`);\n }\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (options.debug) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${stderr}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n","#!/usr/bin/env node\nimport { run, RunOptions } from './index.js';\n\ninterface ParsedArgs {\n inputPath?: string;\n options: RunOptions;\n showHelp: boolean;\n}\n\nfunction printHelp(): void {\n console.log(`Usage: opendataloader-pdf [options] <input>`);\n console.log('');\n console.log('Options:');\n console.log(' -o, --output-dir <path> Directory where outputs are written');\n console.log(' -p, --password <password> Password for encrypted PDFs');\n console.log(' --replace-invalid-chars <c> Replacement character for invalid characters');\n console.log(' --content-safety-off <mode> Disable content safety filtering (provide mode)');\n console.log(' --markdown Generate Markdown output');\n console.log(' --html Generate HTML output');\n console.log(' --pdf Generate annotated PDF output');\n console.log(' --keep-line-breaks Preserve line breaks in text output');\n console.log(' --markdown-with-html Allow raw HTML within Markdown output');\n console.log(' --markdown-with-images Embed images in Markdown output');\n console.log(' --no-json Disable JSON output generation');\n console.log(' --debug Stream CLI logs directly to stdout/stderr');\n console.log(' -h, --help Show this message and exit');\n}\n\nfunction parseArgs(argv: string[]): ParsedArgs {\n const options: RunOptions = {};\n let inputPath: string | undefined;\n let showHelp = false;\n\n const readValue = (currentIndex: number, option: string): { value: string; nextIndex: number } => {\n const nextValue = argv[currentIndex + 1];\n if (!nextValue || nextValue.startsWith('-')) {\n throw new Error(`Option ${option} requires a value.`);\n }\n return { value: nextValue, nextIndex: currentIndex + 1 };\n };\n\n for (let i = 0; i < argv.length; i += 1) {\n const arg = argv[i];\n\n switch (arg) {\n case '--help':\n case '-h':\n showHelp = true;\n i = argv.length; // exit loop\n break;\n case '--output-dir':\n case '-o': {\n const { value, nextIndex } = readValue(i, arg);\n options.outputFolder = value;\n i = nextIndex;\n break;\n }\n case '--password':\n case '-p': {\n const { value, nextIndex } = readValue(i, arg);\n options.password = value;\n i = nextIndex;\n break;\n }\n case '--replace-invalid-chars': {\n const { value, nextIndex } = readValue(i, arg);\n options.replaceInvalidChars = value;\n i = nextIndex;\n break;\n }\n case '--content-safety-off': {\n const { value, nextIndex } = readValue(i, arg);\n options.contentSafetyOff = value;\n i = nextIndex;\n break;\n }\n case '--markdown':\n options.generateMarkdown = true;\n break;\n case '--html':\n options.generateHtml = true;\n break;\n case '--pdf':\n options.generateAnnotatedPdf = true;\n break;\n case '--keep-line-breaks':\n options.keepLineBreaks = true;\n break;\n case '--markdown-with-html':\n options.htmlInMarkdown = true;\n break;\n case '--markdown-with-images':\n options.addImageToMarkdown = true;\n break;\n case '--no-json':\n options.noJson = true;\n break;\n case '--debug':\n options.debug = true;\n break;\n default:\n if (arg.startsWith('-')) {\n throw new Error(`Unknown option: ${arg}`);\n }\n\n if (inputPath) {\n throw new Error('Multiple input paths provided. Only one input path is allowed.');\n }\n inputPath = arg;\n }\n }\n\n return { inputPath, options, showHelp };\n}\n\nasync function main(): Promise<number> {\n let parsed: ParsedArgs;\n\n try {\n parsed = parseArgs(process.argv.slice(2));\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n if (parsed.showHelp) {\n printHelp();\n return 0;\n }\n\n if (!parsed.inputPath) {\n console.error('Missing required input path.');\n console.error(\"Use '--help' to see usage information.\");\n return 1;\n }\n\n try {\n const output = await run(parsed.inputPath, parsed.options);\n if (output && !parsed.options.debug) {\n process.stdout.write(output);\n }\n if (output && !output.endsWith('\\n') && !parsed.options.debug) {\n process.stdout.write('\\n');\n }\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,iBAA8B;AAH9B;AAKA,IAAM,iBAAa,0BAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAEjB,SAAS,yBAAyB,SAAiB,aAA+B;AAChF,QAAM,wBAAwB,CAAC,GAAG,WAAW;AAC7C,QAAM,gBAAgB,sBAAsB,QAAQ,YAAY;AAChE,MAAI,gBAAgB,MAAM,gBAAgB,IAAI,sBAAsB,QAAQ;AAC1E,0BAAsB,gBAAgB,CAAC,IAAI;AAAA,EAC7C;AACA,SAAO,GAAG,OAAO,IAAI,sBAAsB,KAAK,GAAG,CAAC;AACtD;AAiBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,QAAI,CAAI,cAAW,SAAS,GAAG;AAC7B,aAAO,OAAO,IAAI,MAAM,mCAAmC,SAAS,EAAE,CAAC;AAAA,IACzE;AAEA,UAAM,OAAiB,CAAC;AACxB,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,gBAAgB,QAAQ,YAAY;AAAA,IAChD;AACA,QAAI,QAAQ,UAAU;AACpB,WAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,IAC1C;AACA,QAAI,QAAQ,qBAAqB;AAC/B,WAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,IAClE;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,YAAY;AAAA,IACxB;AACA,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,QAAQ;AAAA,IACpB;AACA,QAAI,QAAQ,sBAAsB;AAChC,WAAK,KAAK,OAAO;AAAA,IACnB;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,oBAAoB;AAAA,IAChC;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,sBAAsB;AAAA,IAClC;AACA,QAAI,QAAQ,oBAAoB;AAC9B,WAAK,KAAK,wBAAwB;AAAA,IACpC;AACA,QAAI,QAAQ,QAAQ;AAClB,WAAK,KAAK,WAAW;AAAA,IACvB;AAEA,SAAK,KAAK,SAAS;AAEnB,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,QAAI,QAAQ,OAAO;AACjB,cAAQ,MAAM,oBAAoB,yBAAyB,SAAS,WAAW,CAAC,EAAE;AAAA,IACpF;AAEA,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,QAAQ,OAAO;AACjB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,MAAM;AAAA,QACnE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAQ;AAC/B,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;;;AC/HA,SAAS,YAAkB;AACzB,UAAQ,IAAI,6CAA6C;AACzD,UAAQ,IAAI,EAAE;AACd,UAAQ,IAAI,UAAU;AACtB,UAAQ,IAAI,oEAAoE;AAChF,UAAQ,IAAI,4DAA4D;AACxE,UAAQ,IAAI,6EAA6E;AACzF,UAAQ,IAAI,gFAAgF;AAC5F,UAAQ,IAAI,yDAAyD;AACrE,UAAQ,IAAI,qDAAqD;AACjE,UAAQ,IAAI,8DAA8D;AAC1E,UAAQ,IAAI,oEAAoE;AAChF,UAAQ,IAAI,sEAAsE;AAClF,UAAQ,IAAI,gEAAgE;AAC5E,UAAQ,IAAI,+DAA+D;AAC3E,UAAQ,IAAI,0EAA0E;AACtF,UAAQ,IAAI,2DAA2D;AACzE;AAEA,SAAS,UAAU,MAA4B;AAC7C,QAAM,UAAsB,CAAC;AAC7B,MAAI;AACJ,MAAI,WAAW;AAEf,QAAM,YAAY,CAAC,cAAsB,WAAyD;AAChG,UAAM,YAAY,KAAK,eAAe,CAAC;AACvC,QAAI,CAAC,aAAa,UAAU,WAAW,GAAG,GAAG;AAC3C,YAAM,IAAI,MAAM,UAAU,MAAM,oBAAoB;AAAA,IACtD;AACA,WAAO,EAAE,OAAO,WAAW,WAAW,eAAe,EAAE;AAAA,EACzD;AAEA,WAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK,GAAG;AACvC,UAAM,MAAM,KAAK,CAAC;AAElB,YAAQ,KAAK;AAAA,MACX,KAAK;AAAA,MACL,KAAK;AACH,mBAAW;AACX,YAAI,KAAK;AACT;AAAA,MACF,KAAK;AAAA,MACL,KAAK,MAAM;AACT,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,eAAe;AACvB,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK;AAAA,MACL,KAAK,MAAM;AACT,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,WAAW;AACnB,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK,2BAA2B;AAC9B,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,sBAAsB;AAC9B,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK,wBAAwB;AAC3B,cAAM,EAAE,OAAO,UAAU,IAAI,UAAU,GAAG,GAAG;AAC7C,gBAAQ,mBAAmB;AAC3B,YAAI;AACJ;AAAA,MACF;AAAA,MACA,KAAK;AACH,gBAAQ,mBAAmB;AAC3B;AAAA,MACF,KAAK;AACH,gBAAQ,eAAe;AACvB;AAAA,MACF,KAAK;AACH,gBAAQ,uBAAuB;AAC/B;AAAA,MACF,KAAK;AACH,gBAAQ,iBAAiB;AACzB;AAAA,MACF,KAAK;AACH,gBAAQ,iBAAiB;AACzB;AAAA,MACF,KAAK;AACH,gBAAQ,qBAAqB;AAC7B;AAAA,MACF,KAAK;AACH,gBAAQ,SAAS;AACjB;AAAA,MACF,KAAK;AACH,gBAAQ,QAAQ;AAChB;AAAA,MACF;AACE,YAAI,IAAI,WAAW,GAAG,GAAG;AACvB,gBAAM,IAAI,MAAM,mBAAmB,GAAG,EAAE;AAAA,QAC1C;AAEA,YAAI,WAAW;AACb,gBAAM,IAAI,MAAM,gEAAgE;AAAA,QAClF;AACA,oBAAY;AAAA,IAChB;AAAA,EACF;AAEA,SAAO,EAAE,WAAW,SAAS,SAAS;AACxC;AAEA,eAAe,OAAwB;AACrC,MAAI;AAEJ,MAAI;AACF,aAAS,UAAU,QAAQ,KAAK,MAAM,CAAC,CAAC;AAAA,EAC1C,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,MAAI,OAAO,UAAU;AACnB,cAAU;AACV,WAAO;AAAA,EACT;AAEA,MAAI,CAAC,OAAO,WAAW;AACrB,YAAQ,MAAM,8BAA8B;AAC5C,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,MAAI;AACF,UAAM,SAAS,MAAM,IAAI,OAAO,WAAW,OAAO,OAAO;AACzD,QAAI,UAAU,CAAC,OAAO,QAAQ,OAAO;AACnC,cAAQ,OAAO,MAAM,MAAM;AAAA,IAC7B;AACA,QAAI,UAAU,CAAC,OAAO,SAAS,IAAI,KAAK,CAAC,OAAO,QAAQ,OAAO;AAC7D,cAAQ,OAAO,MAAM,IAAI;AAAA,IAC3B;AACA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/cli.ts","../src/index.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { Command, CommanderError } from 'commander';\nimport { convert, ConvertOptions } from './index.js';\n\ninterface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string[];\n quiet?: boolean;\n contentSafetyOff?: string[];\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n}\n\nconst VALID_FORMATS = new Set([\n 'json',\n 'text',\n 'html',\n 'pdf',\n 'markdown',\n 'markdown-with-html',\n 'markdown-with-images',\n]);\n\nconst VALID_CONTENT_SAFETY_MODES = new Set([\n 'all',\n 'hidden-text',\n 'off-page',\n 'tiny',\n 'hidden-ocg',\n]);\n\nfunction createProgram(): Command {\n const program = new Command();\n\n program\n .name('opendataloader-pdf')\n .usage('[options] <input...>')\n .description('Convert PDFs using the OpenDataLoader CLI.')\n .showHelpAfterError(\"Use '--help' to see available options.\")\n .showSuggestionAfterError(false)\n .argument('<input...>', 'Input files or directories to convert')\n .option('-o, --output-dir <path>', 'Directory where outputs are written')\n .option('-p, --password <password>', 'Password for encrypted PDFs')\n .option(\n '-f, --format <value...>',\n 'Output formats to generate (json, text, html, pdf, markdown, markdown-with-html, markdown-with-images)',\n )\n .option('-q, --quiet', 'Suppress CLI logging output')\n .option('--content-safety-off <mode...>', 'Disable one or more content safety filters')\n .option('--keep-line-breaks', 'Preserve line breaks in text output')\n .option('--replace-invalid-chars <c>', 'Replacement character for invalid characters');\n\n program.configureOutput({\n writeErr: (str) => {\n console.error(str.trimEnd());\n },\n outputError: (str, write) => {\n write(str);\n },\n });\n\n return program;\n}\n\nfunction buildConvertOptions(options: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (options.outputDir) {\n convertOptions.outputDir = options.outputDir;\n }\n if (options.password) {\n convertOptions.password = options.password;\n }\n if (options.format && options.format.length > 0) {\n convertOptions.format = options.format;\n }\n if (options.quiet) {\n convertOptions.quiet = true;\n }\n if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {\n convertOptions.contentSafetyOff = options.contentSafetyOff;\n }\n if (options.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (options.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = options.replaceInvalidChars;\n }\n\n return convertOptions;\n}\n\nasync function main(): Promise<number> {\n const program = createProgram();\n\n program.exitOverride();\n\n try {\n program.parse(process.argv);\n } catch (err) {\n if (err instanceof CommanderError) {\n if (err.code === 'commander.helpDisplayed') {\n return 0;\n }\n return err.exitCode ?? 1;\n }\n\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n const cliOptions = program.opts<CliOptions>();\n const inputPaths = program.args;\n\n if (cliOptions.format) {\n for (const value of cliOptions.format) {\n if (!VALID_FORMATS.has(value)) {\n console.error(`Invalid format '${value}'. See '--help' for allowed values.`);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n }\n }\n\n if (cliOptions.contentSafetyOff) {\n for (const value of cliOptions.contentSafetyOff) {\n if (!VALID_CONTENT_SAFETY_MODES.has(value)) {\n console.error(`Invalid content safety mode '${value}'. See '--help' for allowed values.`);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n }\n }\n\n const convertOptions = buildConvertOptions(cliOptions);\n\n try {\n const output = await convert(inputPaths, convertOptions);\n if (output && !convertOptions.quiet) {\n process.stdout.write(output);\n if (!output.endsWith('\\n')) {\n process.stdout.write('\\n');\n }\n }\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n","import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n debug?: boolean;\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { debug = false, streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.contentSafetyOff) {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n if (options.noJson) {\n args.push('--no-json');\n }\n\n args.push(inputPath);\n executeJar(args, {\n debug: options.debug,\n streamOutput: Boolean(options.debug),\n })\n .then(resolve)\n .catch(reject);\n });\n}\n\nexport interface ConvertOptions {\n outputDir?: string;\n password?: string;\n format?: string[];\n quiet?: boolean;\n contentSafetyOff?: string[];\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n}\n\nexport function convert(inputPaths: string[], options: ConvertOptions = {}): Promise<string> {\n if (inputPaths.length === 0) {\n return Promise.reject(new Error('At least one input path must be provided.'));\n }\n\n for (const input of inputPaths) {\n if (!fs.existsSync(input)) {\n return Promise.reject(new Error(`Input file or folder not found: ${input}`));\n }\n }\n\n const args: string[] = [...inputPaths];\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format && options.format.length > 0) {\n args.push('--format', ...options.format);\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff && options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', ...options.contentSafetyOff);\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n\n return executeJar(args, {\n streamOutput: !options.quiet,\n });\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AACA,uBAAwC;;;ACDxC,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,iBAA8B;AAH9B;AAKA,IAAM,iBAAa,0BAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAOjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,QAAQ,OAAO,eAAe,MAAM,IAAI;AAEhD,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AA8EO,SAAS,QAAQ,YAAsB,UAA0B,CAAC,GAAoB;AAC3F,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,QAAQ,OAAO,IAAI,MAAM,2CAA2C,CAAC;AAAA,EAC9E;AAEA,aAAW,SAAS,YAAY;AAC9B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,QAAQ,OAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE,CAAC;AAAA,IAC7E;AAAA,EACF;AAEA,QAAM,OAAiB,CAAC,GAAG,UAAU;AACrC,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,UAAU,QAAQ,OAAO,SAAS,GAAG;AAC/C,SAAK,KAAK,YAAY,GAAG,QAAQ,MAAM;AAAA,EACzC;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,oBAAoB,QAAQ,iBAAiB,SAAS,GAAG;AACnE,SAAK,KAAK,wBAAwB,GAAG,QAAQ,gBAAgB;AAAA,EAC/D;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AAEA,SAAO,WAAW,MAAM;AAAA,IACtB,cAAc,CAAC,QAAQ;AAAA,EACzB,CAAC;AACH;;;ADhLA,IAAM,gBAAgB,oBAAI,IAAI;AAAA,EAC5B;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,IAAM,6BAA6B,oBAAI,IAAI;AAAA,EACzC;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AACF,CAAC;AAED,SAAS,gBAAyB;AAChC,QAAM,UAAU,IAAI,yBAAQ;AAE5B,UACG,KAAK,oBAAoB,EACzB,MAAM,sBAAsB,EAC5B,YAAY,4CAA4C,EACxD,mBAAmB,wCAAwC,EAC3D,yBAAyB,KAAK,EAC9B,SAAS,cAAc,uCAAuC,EAC9D,OAAO,2BAA2B,qCAAqC,EACvE,OAAO,6BAA6B,6BAA6B,EACjE;AAAA,IACC;AAAA,IACA;AAAA,EACF,EACC,OAAO,eAAe,6BAA6B,EACnD,OAAO,kCAAkC,4CAA4C,EACrF,OAAO,sBAAsB,qCAAqC,EAClE,OAAO,+BAA+B,8CAA8C;AAEvF,UAAQ,gBAAgB;AAAA,IACtB,UAAU,CAAC,QAAQ;AACjB,cAAQ,MAAM,IAAI,QAAQ,CAAC;AAAA,IAC7B;AAAA,IACA,aAAa,CAAC,KAAK,UAAU;AAC3B,YAAM,GAAG;AAAA,IACX;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,SAAS,oBAAoB,SAAqC;AAChE,QAAM,iBAAiC,CAAC;AAExC,MAAI,QAAQ,WAAW;AACrB,mBAAe,YAAY,QAAQ;AAAA,EACrC;AACA,MAAI,QAAQ,UAAU;AACpB,mBAAe,WAAW,QAAQ;AAAA,EACpC;AACA,MAAI,QAAQ,UAAU,QAAQ,OAAO,SAAS,GAAG;AAC/C,mBAAe,SAAS,QAAQ;AAAA,EAClC;AACA,MAAI,QAAQ,OAAO;AACjB,mBAAe,QAAQ;AAAA,EACzB;AACA,MAAI,QAAQ,oBAAoB,QAAQ,iBAAiB,SAAS,GAAG;AACnE,mBAAe,mBAAmB,QAAQ;AAAA,EAC5C;AACA,MAAI,QAAQ,gBAAgB;AAC1B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,mBAAe,sBAAsB,QAAQ;AAAA,EAC/C;AAEA,SAAO;AACT;AAEA,eAAe,OAAwB;AACrC,QAAM,UAAU,cAAc;AAE9B,UAAQ,aAAa;AAErB,MAAI;AACF,YAAQ,MAAM,QAAQ,IAAI;AAAA,EAC5B,SAAS,KAAK;AACZ,QAAI,eAAe,iCAAgB;AACjC,UAAI,IAAI,SAAS,2BAA2B;AAC1C,eAAO;AAAA,MACT;AACA,aAAO,IAAI,YAAY;AAAA,IACzB;AAEA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,QAAQ,KAAiB;AAC5C,QAAM,aAAa,QAAQ;AAE3B,MAAI,WAAW,QAAQ;AACrB,eAAW,SAAS,WAAW,QAAQ;AACrC,UAAI,CAAC,cAAc,IAAI,KAAK,GAAG;AAC7B,gBAAQ,MAAM,mBAAmB,KAAK,qCAAqC;AAC3E,gBAAQ,MAAM,wCAAwC;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,MAAI,WAAW,kBAAkB;AAC/B,eAAW,SAAS,WAAW,kBAAkB;AAC/C,UAAI,CAAC,2BAA2B,IAAI,KAAK,GAAG;AAC1C,gBAAQ,MAAM,gCAAgC,KAAK,qCAAqC;AACxF,gBAAQ,MAAM,wCAAwC;AACtD,eAAO;AAAA,MACT;AAAA,IACF;AAAA,EACF;AAEA,QAAM,iBAAiB,oBAAoB,UAAU;AAErD,MAAI;AACF,UAAM,SAAS,MAAM,QAAQ,YAAY,cAAc;AACvD,QAAI,UAAU,CAAC,eAAe,OAAO;AACnC,cAAQ,OAAO,MAAM,MAAM;AAC3B,UAAI,CAAC,OAAO,SAAS,IAAI,GAAG;AAC1B,gBAAQ,OAAO,MAAM,IAAI;AAAA,MAC3B;AAAA,IACF;AACA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":[]}
|