@opendataloader/pdf 1.3.0 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -30,6 +30,7 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
30
30
  // src/index.ts
31
31
  var index_exports = {};
32
32
  __export(index_exports, {
33
+ buildArgs: () => buildArgs,
33
34
  convert: () => convert,
34
35
  run: () => run
35
36
  });
@@ -38,12 +39,77 @@ var import_child_process = require("child_process");
38
39
  var path = __toESM(require("path"), 1);
39
40
  var fs = __toESM(require("fs"), 1);
40
41
  var import_url = require("url");
42
+
43
+ // src/convert-options.generated.ts
44
+ function buildArgs(options) {
45
+ const args = [];
46
+ if (options.outputDir) {
47
+ args.push("--output-dir", options.outputDir);
48
+ }
49
+ if (options.password) {
50
+ args.push("--password", options.password);
51
+ }
52
+ if (options.format) {
53
+ if (Array.isArray(options.format)) {
54
+ if (options.format.length > 0) {
55
+ args.push("--format", options.format.join(","));
56
+ }
57
+ } else {
58
+ args.push("--format", options.format);
59
+ }
60
+ }
61
+ if (options.quiet) {
62
+ args.push("--quiet");
63
+ }
64
+ if (options.contentSafetyOff) {
65
+ if (Array.isArray(options.contentSafetyOff)) {
66
+ if (options.contentSafetyOff.length > 0) {
67
+ args.push("--content-safety-off", options.contentSafetyOff.join(","));
68
+ }
69
+ } else {
70
+ args.push("--content-safety-off", options.contentSafetyOff);
71
+ }
72
+ }
73
+ if (options.keepLineBreaks) {
74
+ args.push("--keep-line-breaks");
75
+ }
76
+ if (options.replaceInvalidChars) {
77
+ args.push("--replace-invalid-chars", options.replaceInvalidChars);
78
+ }
79
+ if (options.useStructTree) {
80
+ args.push("--use-struct-tree");
81
+ }
82
+ if (options.tableMethod) {
83
+ args.push("--table-method", options.tableMethod);
84
+ }
85
+ if (options.readingOrder) {
86
+ args.push("--reading-order", options.readingOrder);
87
+ }
88
+ if (options.markdownPageSeparator) {
89
+ args.push("--markdown-page-separator", options.markdownPageSeparator);
90
+ }
91
+ if (options.textPageSeparator) {
92
+ args.push("--text-page-separator", options.textPageSeparator);
93
+ }
94
+ if (options.htmlPageSeparator) {
95
+ args.push("--html-page-separator", options.htmlPageSeparator);
96
+ }
97
+ if (options.embedImages) {
98
+ args.push("--embed-images");
99
+ }
100
+ if (options.imageFormat) {
101
+ args.push("--image-format", options.imageFormat);
102
+ }
103
+ return args;
104
+ }
105
+
106
+ // src/index.ts
41
107
  var import_meta = {};
42
108
  var __filename = (0, import_url.fileURLToPath)(import_meta.url);
43
109
  var __dirname = path.dirname(__filename);
44
110
  var JAR_NAME = "opendataloader-pdf-cli.jar";
45
111
  function executeJar(args, executionOptions = {}) {
46
- const { debug = false, streamOutput = false } = executionOptions;
112
+ const { streamOutput = false } = executionOptions;
47
113
  return new Promise((resolve, reject) => {
48
114
  const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
49
115
  if (!fs.existsSync(jarPath)) {
@@ -96,103 +162,58 @@ ${errorOutput}`
96
162
  });
97
163
  });
98
164
  }
99
- function run(inputPath, options = {}) {
100
- return new Promise((resolve, reject) => {
101
- if (!fs.existsSync(inputPath)) {
102
- return reject(new Error(`Input file or folder not found: ${inputPath}`));
103
- }
104
- const args = [];
105
- if (options.outputFolder) {
106
- args.push("--output-dir", options.outputFolder);
107
- }
108
- if (options.password) {
109
- args.push("--password", options.password);
110
- }
111
- if (options.replaceInvalidChars) {
112
- args.push("--replace-invalid-chars", options.replaceInvalidChars);
113
- }
114
- if (options.generateMarkdown) {
115
- args.push("--markdown");
116
- }
117
- if (options.generateHtml) {
118
- args.push("--html");
119
- }
120
- if (options.generateAnnotatedPdf) {
121
- args.push("--pdf");
122
- }
123
- if (options.keepLineBreaks) {
124
- args.push("--keep-line-breaks");
125
- }
126
- if (options.contentSafetyOff) {
127
- args.push("--content-safety-off", options.contentSafetyOff);
128
- }
129
- if (options.htmlInMarkdown) {
130
- args.push("--markdown-with-html");
131
- }
132
- if (options.addImageToMarkdown) {
133
- args.push("--markdown-with-images");
134
- }
135
- if (options.noJson) {
136
- args.push("--no-json");
137
- }
138
- if (options.useStructTree) {
139
- args.push("--use-struct-tree");
140
- }
141
- args.push(inputPath);
142
- executeJar(args, {
143
- debug: options.debug,
144
- streamOutput: Boolean(options.debug)
145
- }).then(resolve).catch(reject);
146
- });
147
- }
148
165
  function convert(inputPaths, options = {}) {
149
- if (inputPaths.length === 0) {
166
+ const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];
167
+ if (inputList.length === 0) {
150
168
  return Promise.reject(new Error("At least one input path must be provided."));
151
169
  }
152
- for (const input of inputPaths) {
170
+ for (const input of inputList) {
153
171
  if (!fs.existsSync(input)) {
154
172
  return Promise.reject(new Error(`Input file or folder not found: ${input}`));
155
173
  }
156
174
  }
157
- const args = [...inputPaths];
158
- if (options.outputDir) {
159
- args.push("--output-dir", options.outputDir);
160
- }
161
- if (options.password) {
162
- args.push("--password", options.password);
163
- }
164
- if (options.format) {
165
- if (Array.isArray(options.format)) {
166
- args.push("--format", options.format.join(","));
167
- } else {
168
- args.push("--format", options.format);
169
- }
170
- }
171
- if (options.quiet) {
172
- args.push("--quiet");
175
+ const args = [...inputList, ...buildArgs(options)];
176
+ return executeJar(args, {
177
+ streamOutput: !options.quiet
178
+ });
179
+ }
180
+ function run(inputPath, options = {}) {
181
+ console.warn(
182
+ "Warning: run() is deprecated and will be removed in a future version. Use convert() instead."
183
+ );
184
+ const formats = [];
185
+ if (!options.noJson) {
186
+ formats.push("json");
173
187
  }
174
- if (options.contentSafetyOff) {
175
- if (Array.isArray(options.contentSafetyOff)) {
176
- args.push("--content-safety-off", options.contentSafetyOff.join(","));
188
+ if (options.generateMarkdown) {
189
+ if (options.addImageToMarkdown) {
190
+ formats.push("markdown-with-images");
191
+ } else if (options.htmlInMarkdown) {
192
+ formats.push("markdown-with-html");
177
193
  } else {
178
- args.push("--content-safety-off", options.contentSafetyOff);
194
+ formats.push("markdown");
179
195
  }
180
196
  }
181
- if (options.keepLineBreaks) {
182
- args.push("--keep-line-breaks");
197
+ if (options.generateHtml) {
198
+ formats.push("html");
183
199
  }
184
- if (options.replaceInvalidChars) {
185
- args.push("--replace-invalid-chars", options.replaceInvalidChars);
200
+ if (options.generateAnnotatedPdf) {
201
+ formats.push("pdf");
186
202
  }
187
- if (options.useStructTree) {
188
- args.push("--use-struct-tree");
189
- }
190
- return executeJar(args, {
191
- streamOutput: !options.quiet
203
+ return convert(inputPath, {
204
+ outputDir: options.outputFolder,
205
+ password: options.password,
206
+ replaceInvalidChars: options.replaceInvalidChars,
207
+ keepLineBreaks: options.keepLineBreaks,
208
+ contentSafetyOff: options.contentSafetyOff,
209
+ useStructTree: options.useStructTree,
210
+ format: formats.length > 0 ? formats : void 0,
211
+ quiet: !options.debug
192
212
  });
193
213
  }
194
214
  // Annotate the CommonJS export names for ESM import in node:
195
215
  0 && (module.exports = {
216
+ buildArgs,
196
217
  convert,
197
218
  run
198
219
  });
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n debug?: boolean;\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { debug = false, streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n return new Promise((resolve, reject) => {\n if (!fs.existsSync(inputPath)) {\n return reject(new Error(`Input file or folder not found: ${inputPath}`));\n }\n\n const args: string[] = [];\n if (options.outputFolder) {\n args.push('--output-dir', options.outputFolder);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.generateMarkdown) {\n args.push('--markdown');\n }\n if (options.generateHtml) {\n args.push('--html');\n }\n if (options.generateAnnotatedPdf) {\n args.push('--pdf');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.contentSafetyOff) {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n if (options.htmlInMarkdown) {\n args.push('--markdown-with-html');\n }\n if (options.addImageToMarkdown) {\n args.push('--markdown-with-images');\n }\n if (options.noJson) {\n args.push('--no-json');\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree')\n }\n\n args.push(inputPath);\n executeJar(args, {\n debug: options.debug,\n streamOutput: Boolean(options.debug),\n })\n .then(resolve)\n .catch(reject);\n });\n}\n\nexport interface ConvertOptions {\n outputDir?: string;\n password?: string;\n format?: string | string[];\n quiet?: boolean;\n contentSafetyOff?: string | string[];\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n}\n\nexport function convert(inputPaths: string[], options: ConvertOptions = {}): Promise<string> {\n if (inputPaths.length === 0) {\n return Promise.reject(new Error('At least one input path must be provided.'));\n }\n\n for (const input of inputPaths) {\n if (!fs.existsSync(input)) {\n return Promise.reject(new Error(`Input file or folder not found: ${input}`));\n }\n }\n\n const args: string[] = [...inputPaths];\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n args.push('--format', options.format.join(','));\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree')\n }\n\n return executeJar(args, {\n streamOutput: !options.quiet,\n });\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,iBAA8B;AAH9B;AAKA,IAAM,iBAAa,0BAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAOjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,QAAQ,OAAO,eAAe,MAAM,IAAI;AAEhD,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAkBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,QAAI,CAAI,cAAW,SAAS,GAAG;AAC7B,aAAO,OAAO,IAAI,MAAM,mCAAmC,SAAS,EAAE,CAAC;AAAA,IACzE;AAEA,UAAM,OAAiB,CAAC;AACxB,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,gBAAgB,QAAQ,YAAY;AAAA,IAChD;AACA,QAAI,QAAQ,UAAU;AACpB,WAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,IAC1C;AACA,QAAI,QAAQ,qBAAqB;AAC/B,WAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,IAClE;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,YAAY;AAAA,IACxB;AACA,QAAI,QAAQ,cAAc;AACxB,WAAK,KAAK,QAAQ;AAAA,IACpB;AACA,QAAI,QAAQ,sBAAsB;AAChC,WAAK,KAAK,OAAO;AAAA,IACnB;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,oBAAoB;AAAA,IAChC;AACA,QAAI,QAAQ,kBAAkB;AAC5B,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AACA,QAAI,QAAQ,gBAAgB;AAC1B,WAAK,KAAK,sBAAsB;AAAA,IAClC;AACA,QAAI,QAAQ,oBAAoB;AAC9B,WAAK,KAAK,wBAAwB;AAAA,IACpC;AACA,QAAI,QAAQ,QAAQ;AAClB,WAAK,KAAK,WAAW;AAAA,IACvB;AACA,QAAI,QAAQ,eAAe;AACzB,WAAK,KAAK,mBAAmB;AAAA,IAC/B;AAEA,SAAK,KAAK,SAAS;AACnB,eAAW,MAAM;AAAA,MACf,OAAO,QAAQ;AAAA,MACf,cAAc,QAAQ,QAAQ,KAAK;AAAA,IACrC,CAAC,EACE,KAAK,OAAO,EACZ,MAAM,MAAM;AAAA,EACjB,CAAC;AACH;AAaO,SAAS,QAAQ,YAAsB,UAA0B,CAAC,GAAoB;AAC3F,MAAI,WAAW,WAAW,GAAG;AAC3B,WAAO,QAAQ,OAAO,IAAI,MAAM,2CAA2C,CAAC;AAAA,EAC9E;AAEA,aAAW,SAAS,YAAY;AAC9B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,QAAQ,OAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE,CAAC;AAAA,IAC7E;AAAA,EACF;AAEA,QAAM,OAAiB,CAAC,GAAG,UAAU;AACrC,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,WAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,IAChD,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,WAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,IACtE,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AAEA,SAAO,WAAW,MAAM;AAAA,IACtB,cAAc,CAAC,QAAQ;AAAA,EACzB,CAAC;AACH;","names":[]}
1
+ {"version":3,"sources":["../src/index.ts","../src/convert-options.generated.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n\n javaProcess.stdout.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stdout.write(chunk);\n }\n stdout += chunk;\n });\n\n javaProcess.stderr.on('data', (data) => {\n const chunk = data.toString();\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return Promise.reject(new Error('At least one input path must be provided.'));\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return Promise.reject(new Error(`Input file or folder not found: ${input}`));\n }\n }\n\n const args: string[] = [...inputList, ...buildArgs(options)];\n\n return executeJar(args, {\n streamOutput: !options.quiet,\n });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: cluster */\n tableMethod?: string;\n /** Reading order algorithm. Values: none, xycut. Default: none */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Embed images as Base64 data URIs instead of file path references */\n embedImages?: boolean;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n embedImages?: boolean;\n imageFormat?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.embedImages) {\n convertOptions.embedImages = true;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.embedImages) {\n args.push('--embed-images');\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n\n return args;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,iBAA8B;;;ACmHvB,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,gBAAgB;AAAA,EAC5B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AAEA,SAAO;AACT;;;ADpLA;AAWA,IAAM,iBAAa,0BAAc,YAAY,GAAG;AAChD,IAAM,YAAiB,aAAQ,UAAU;AAEzC,IAAM,WAAW;AAMjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAS;AACtC,YAAM,QAAQ,KAAK,SAAS;AAC5B,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AACA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAChC,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEO,SAAS,QACd,YACA,UAA0B,CAAC,GACV;AACjB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,QAAQ,OAAO,IAAI,MAAM,2CAA2C,CAAC;AAAA,EAC9E;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,QAAQ,OAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE,CAAC;AAAA,IAC7E;AAAA,EACF;AAEA,QAAM,OAAiB,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAE3D,SAAO,WAAW,MAAM;AAAA,IACtB,cAAc,CAAC,QAAQ;AAAA,EACzB,CAAC;AACH;AAwBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,UAAQ;AAAA,IACN;AAAA,EACF;AAGA,QAAM,UAAoB,CAAC;AAC3B,MAAI,CAAC,QAAQ,QAAQ;AACnB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,QAAQ,oBAAoB;AAC9B,cAAQ,KAAK,sBAAsB;AAAA,IACrC,WAAW,QAAQ,gBAAgB;AACjC,cAAQ,KAAK,oBAAoB;AAAA,IACnC,OAAO;AACL,cAAQ,KAAK,UAAU;AAAA,IACzB;AAAA,EACF;AACA,MAAI,QAAQ,cAAc;AACxB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,sBAAsB;AAChC,YAAQ,KAAK,KAAK;AAAA,EACpB;AAEA,SAAO,QAAQ,WAAW;AAAA,IACxB,WAAW,QAAQ;AAAA,IACnB,UAAU,QAAQ;AAAA,IAClB,qBAAqB,QAAQ;AAAA,IAC7B,gBAAgB,QAAQ;AAAA,IACxB,kBAAkB,QAAQ;AAAA,IAC1B,eAAe,QAAQ;AAAA,IACvB,QAAQ,QAAQ,SAAS,IAAI,UAAU;AAAA,IACvC,OAAO,CAAC,QAAQ;AAAA,EAClB,CAAC;AACH;","names":[]}
package/dist/index.d.cts CHANGED
@@ -1,3 +1,47 @@
1
+ /**
2
+ * Options for the convert function.
3
+ */
4
+ interface ConvertOptions {
5
+ /** Directory where output files are written. Default: input file directory */
6
+ outputDir?: string;
7
+ /** Password for encrypted PDF files */
8
+ password?: string;
9
+ /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images. Default: json */
10
+ format?: string | string[];
11
+ /** Suppress console logging output */
12
+ quiet?: boolean;
13
+ /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */
14
+ contentSafetyOff?: string | string[];
15
+ /** Preserve original line breaks in extracted text */
16
+ keepLineBreaks?: boolean;
17
+ /** Replacement character for invalid/unrecognized characters. Default: space */
18
+ replaceInvalidChars?: string;
19
+ /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */
20
+ useStructTree?: boolean;
21
+ /** Table detection method. Values: cluster */
22
+ tableMethod?: string;
23
+ /** Reading order algorithm. Values: none, xycut. Default: none */
24
+ readingOrder?: string;
25
+ /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */
26
+ markdownPageSeparator?: string;
27
+ /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */
28
+ textPageSeparator?: string;
29
+ /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */
30
+ htmlPageSeparator?: string;
31
+ /** Embed images as Base64 data URIs instead of file path references */
32
+ embedImages?: boolean;
33
+ /** Output format for extracted images. Values: png, jpeg. Default: png */
34
+ imageFormat?: string;
35
+ }
36
+ /**
37
+ * Build CLI arguments array from ConvertOptions.
38
+ */
39
+ declare function buildArgs(options: ConvertOptions): string[];
40
+
41
+ declare function convert(inputPaths: string | string[], options?: ConvertOptions): Promise<string>;
42
+ /**
43
+ * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.
44
+ */
1
45
  interface RunOptions {
2
46
  outputFolder?: string;
3
47
  password?: string;
@@ -13,17 +57,9 @@ interface RunOptions {
13
57
  debug?: boolean;
14
58
  useStructTree?: boolean;
15
59
  }
60
+ /**
61
+ * @deprecated Use `convert()` instead. This function will be removed in a future version.
62
+ */
16
63
  declare function run(inputPath: string, options?: RunOptions): Promise<string>;
17
- interface ConvertOptions {
18
- outputDir?: string;
19
- password?: string;
20
- format?: string | string[];
21
- quiet?: boolean;
22
- contentSafetyOff?: string | string[];
23
- keepLineBreaks?: boolean;
24
- replaceInvalidChars?: string;
25
- useStructTree?: boolean;
26
- }
27
- declare function convert(inputPaths: string[], options?: ConvertOptions): Promise<string>;
28
64
 
29
- export { type ConvertOptions, type RunOptions, convert, run };
65
+ export { type ConvertOptions, type RunOptions, buildArgs, convert, run };
package/dist/index.d.ts CHANGED
@@ -1,3 +1,47 @@
1
+ /**
2
+ * Options for the convert function.
3
+ */
4
+ interface ConvertOptions {
5
+ /** Directory where output files are written. Default: input file directory */
6
+ outputDir?: string;
7
+ /** Password for encrypted PDF files */
8
+ password?: string;
9
+ /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images. Default: json */
10
+ format?: string | string[];
11
+ /** Suppress console logging output */
12
+ quiet?: boolean;
13
+ /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */
14
+ contentSafetyOff?: string | string[];
15
+ /** Preserve original line breaks in extracted text */
16
+ keepLineBreaks?: boolean;
17
+ /** Replacement character for invalid/unrecognized characters. Default: space */
18
+ replaceInvalidChars?: string;
19
+ /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */
20
+ useStructTree?: boolean;
21
+ /** Table detection method. Values: cluster */
22
+ tableMethod?: string;
23
+ /** Reading order algorithm. Values: none, xycut. Default: none */
24
+ readingOrder?: string;
25
+ /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */
26
+ markdownPageSeparator?: string;
27
+ /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */
28
+ textPageSeparator?: string;
29
+ /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */
30
+ htmlPageSeparator?: string;
31
+ /** Embed images as Base64 data URIs instead of file path references */
32
+ embedImages?: boolean;
33
+ /** Output format for extracted images. Values: png, jpeg. Default: png */
34
+ imageFormat?: string;
35
+ }
36
+ /**
37
+ * Build CLI arguments array from ConvertOptions.
38
+ */
39
+ declare function buildArgs(options: ConvertOptions): string[];
40
+
41
+ declare function convert(inputPaths: string | string[], options?: ConvertOptions): Promise<string>;
42
+ /**
43
+ * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.
44
+ */
1
45
  interface RunOptions {
2
46
  outputFolder?: string;
3
47
  password?: string;
@@ -13,17 +57,9 @@ interface RunOptions {
13
57
  debug?: boolean;
14
58
  useStructTree?: boolean;
15
59
  }
60
+ /**
61
+ * @deprecated Use `convert()` instead. This function will be removed in a future version.
62
+ */
16
63
  declare function run(inputPath: string, options?: RunOptions): Promise<string>;
17
- interface ConvertOptions {
18
- outputDir?: string;
19
- password?: string;
20
- format?: string | string[];
21
- quiet?: boolean;
22
- contentSafetyOff?: string | string[];
23
- keepLineBreaks?: boolean;
24
- replaceInvalidChars?: string;
25
- useStructTree?: boolean;
26
- }
27
- declare function convert(inputPaths: string[], options?: ConvertOptions): Promise<string>;
28
64
 
29
- export { type ConvertOptions, type RunOptions, convert, run };
65
+ export { type ConvertOptions, type RunOptions, buildArgs, convert, run };
package/dist/index.js CHANGED
@@ -3,11 +3,76 @@ import { spawn } from "child_process";
3
3
  import * as path from "path";
4
4
  import * as fs from "fs";
5
5
  import { fileURLToPath } from "url";
6
+
7
+ // src/convert-options.generated.ts
8
+ function buildArgs(options) {
9
+ const args = [];
10
+ if (options.outputDir) {
11
+ args.push("--output-dir", options.outputDir);
12
+ }
13
+ if (options.password) {
14
+ args.push("--password", options.password);
15
+ }
16
+ if (options.format) {
17
+ if (Array.isArray(options.format)) {
18
+ if (options.format.length > 0) {
19
+ args.push("--format", options.format.join(","));
20
+ }
21
+ } else {
22
+ args.push("--format", options.format);
23
+ }
24
+ }
25
+ if (options.quiet) {
26
+ args.push("--quiet");
27
+ }
28
+ if (options.contentSafetyOff) {
29
+ if (Array.isArray(options.contentSafetyOff)) {
30
+ if (options.contentSafetyOff.length > 0) {
31
+ args.push("--content-safety-off", options.contentSafetyOff.join(","));
32
+ }
33
+ } else {
34
+ args.push("--content-safety-off", options.contentSafetyOff);
35
+ }
36
+ }
37
+ if (options.keepLineBreaks) {
38
+ args.push("--keep-line-breaks");
39
+ }
40
+ if (options.replaceInvalidChars) {
41
+ args.push("--replace-invalid-chars", options.replaceInvalidChars);
42
+ }
43
+ if (options.useStructTree) {
44
+ args.push("--use-struct-tree");
45
+ }
46
+ if (options.tableMethod) {
47
+ args.push("--table-method", options.tableMethod);
48
+ }
49
+ if (options.readingOrder) {
50
+ args.push("--reading-order", options.readingOrder);
51
+ }
52
+ if (options.markdownPageSeparator) {
53
+ args.push("--markdown-page-separator", options.markdownPageSeparator);
54
+ }
55
+ if (options.textPageSeparator) {
56
+ args.push("--text-page-separator", options.textPageSeparator);
57
+ }
58
+ if (options.htmlPageSeparator) {
59
+ args.push("--html-page-separator", options.htmlPageSeparator);
60
+ }
61
+ if (options.embedImages) {
62
+ args.push("--embed-images");
63
+ }
64
+ if (options.imageFormat) {
65
+ args.push("--image-format", options.imageFormat);
66
+ }
67
+ return args;
68
+ }
69
+
70
+ // src/index.ts
6
71
  var __filename = fileURLToPath(import.meta.url);
7
72
  var __dirname = path.dirname(__filename);
8
73
  var JAR_NAME = "opendataloader-pdf-cli.jar";
9
74
  function executeJar(args, executionOptions = {}) {
10
- const { debug = false, streamOutput = false } = executionOptions;
75
+ const { streamOutput = false } = executionOptions;
11
76
  return new Promise((resolve, reject) => {
12
77
  const jarPath = path.join(__dirname, "..", "lib", JAR_NAME);
13
78
  if (!fs.existsSync(jarPath)) {
@@ -60,102 +125,57 @@ ${errorOutput}`
60
125
  });
61
126
  });
62
127
  }
63
- function run(inputPath, options = {}) {
64
- return new Promise((resolve, reject) => {
65
- if (!fs.existsSync(inputPath)) {
66
- return reject(new Error(`Input file or folder not found: ${inputPath}`));
67
- }
68
- const args = [];
69
- if (options.outputFolder) {
70
- args.push("--output-dir", options.outputFolder);
71
- }
72
- if (options.password) {
73
- args.push("--password", options.password);
74
- }
75
- if (options.replaceInvalidChars) {
76
- args.push("--replace-invalid-chars", options.replaceInvalidChars);
77
- }
78
- if (options.generateMarkdown) {
79
- args.push("--markdown");
80
- }
81
- if (options.generateHtml) {
82
- args.push("--html");
83
- }
84
- if (options.generateAnnotatedPdf) {
85
- args.push("--pdf");
86
- }
87
- if (options.keepLineBreaks) {
88
- args.push("--keep-line-breaks");
89
- }
90
- if (options.contentSafetyOff) {
91
- args.push("--content-safety-off", options.contentSafetyOff);
92
- }
93
- if (options.htmlInMarkdown) {
94
- args.push("--markdown-with-html");
95
- }
96
- if (options.addImageToMarkdown) {
97
- args.push("--markdown-with-images");
98
- }
99
- if (options.noJson) {
100
- args.push("--no-json");
101
- }
102
- if (options.useStructTree) {
103
- args.push("--use-struct-tree");
104
- }
105
- args.push(inputPath);
106
- executeJar(args, {
107
- debug: options.debug,
108
- streamOutput: Boolean(options.debug)
109
- }).then(resolve).catch(reject);
110
- });
111
- }
112
128
  function convert(inputPaths, options = {}) {
113
- if (inputPaths.length === 0) {
129
+ const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];
130
+ if (inputList.length === 0) {
114
131
  return Promise.reject(new Error("At least one input path must be provided."));
115
132
  }
116
- for (const input of inputPaths) {
133
+ for (const input of inputList) {
117
134
  if (!fs.existsSync(input)) {
118
135
  return Promise.reject(new Error(`Input file or folder not found: ${input}`));
119
136
  }
120
137
  }
121
- const args = [...inputPaths];
122
- if (options.outputDir) {
123
- args.push("--output-dir", options.outputDir);
124
- }
125
- if (options.password) {
126
- args.push("--password", options.password);
127
- }
128
- if (options.format) {
129
- if (Array.isArray(options.format)) {
130
- args.push("--format", options.format.join(","));
131
- } else {
132
- args.push("--format", options.format);
133
- }
134
- }
135
- if (options.quiet) {
136
- args.push("--quiet");
138
+ const args = [...inputList, ...buildArgs(options)];
139
+ return executeJar(args, {
140
+ streamOutput: !options.quiet
141
+ });
142
+ }
143
+ function run(inputPath, options = {}) {
144
+ console.warn(
145
+ "Warning: run() is deprecated and will be removed in a future version. Use convert() instead."
146
+ );
147
+ const formats = [];
148
+ if (!options.noJson) {
149
+ formats.push("json");
137
150
  }
138
- if (options.contentSafetyOff) {
139
- if (Array.isArray(options.contentSafetyOff)) {
140
- args.push("--content-safety-off", options.contentSafetyOff.join(","));
151
+ if (options.generateMarkdown) {
152
+ if (options.addImageToMarkdown) {
153
+ formats.push("markdown-with-images");
154
+ } else if (options.htmlInMarkdown) {
155
+ formats.push("markdown-with-html");
141
156
  } else {
142
- args.push("--content-safety-off", options.contentSafetyOff);
157
+ formats.push("markdown");
143
158
  }
144
159
  }
145
- if (options.keepLineBreaks) {
146
- args.push("--keep-line-breaks");
160
+ if (options.generateHtml) {
161
+ formats.push("html");
147
162
  }
148
- if (options.replaceInvalidChars) {
149
- args.push("--replace-invalid-chars", options.replaceInvalidChars);
163
+ if (options.generateAnnotatedPdf) {
164
+ formats.push("pdf");
150
165
  }
151
- if (options.useStructTree) {
152
- args.push("--use-struct-tree");
153
- }
154
- return executeJar(args, {
155
- streamOutput: !options.quiet
166
+ return convert(inputPath, {
167
+ outputDir: options.outputFolder,
168
+ password: options.password,
169
+ replaceInvalidChars: options.replaceInvalidChars,
170
+ keepLineBreaks: options.keepLineBreaks,
171
+ contentSafetyOff: options.contentSafetyOff,
172
+ useStructTree: options.useStructTree,
173
+ format: formats.length > 0 ? formats : void 0,
174
+ quiet: !options.debug
156
175
  });
157
176
  }
158
177
  export {
178
+ buildArgs,
159
179
  convert,
160
180
  run
161
181
  };