@opendataloader/pdf 2.4.1 → 2.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -102,20 +102,20 @@ opendataloader_pdf.convert(
102
102
 
103
103
  **opendataloader-pdf [hybrid] ranks #1 overall (0.907)** across reading order, table, and heading extraction accuracy.
104
104
 
105
- | Engine | Overall | Reading Order | Table | Heading | Speed (s/page) |
106
- |--------|---------|---------------|-------|---------|----------------|
107
- | **opendataloader [hybrid]** | **0.907** | **0.934** | **0.928** | 0.821 | 0.463 |
108
- | docling | 0.882 | 0.898 | 0.887 | **0.824** | 0.762 |
109
- | nutrient | 0.880 | 0.924 | 0.662 | 0.811 | 0.230 |
110
- | marker | 0.861 | 0.890 | 0.808 | 0.796 | 53.932 |
111
- | unstructured [hi_res] | 0.841 | 0.904 | 0.588 | 0.749 | 3.008 |
112
- | edgeparse | 0.837 | 0.894 | 0.717 | 0.706 | 0.036 |
113
- | opendataloader | 0.831 | 0.902 | 0.489 | 0.739 | **0.015** |
114
- | mineru | 0.831 | 0.857 | 0.873 | 0.743 | 5.962 |
115
- | pymupdf4llm | 0.732 | 0.885 | 0.401 | 0.412 | 0.091 |
116
- | unstructured | 0.686 | 0.882 | 0.000 | 0.388 | 0.077 |
117
- | markitdown | 0.589 | 0.844 | 0.273 | 0.000 | 0.114 |
118
- | liteparse | 0.576 | 0.866 | 0.000 | 0.000 | 1.061 |
105
+ | Engine | Overall | Reading Order | Table | Heading | Speed (s/page) | License |
106
+ |--------|---------|---------------|-------|---------|----------------|---------|
107
+ | **opendataloader [hybrid]** | **0.907** | **0.934** | **0.928** | 0.821 | 0.463 | Apache-2.0 |
108
+ | nutrient | 0.885 | 0.925 | 0.708 | 0.819 | **0.008** | Commercial |
109
+ | docling | 0.882 | 0.898 | 0.887 | **0.824** | 0.762 | MIT |
110
+ | marker | 0.861 | 0.890 | 0.808 | 0.796 | 53.932 | GPL-3.0 |
111
+ | unstructured [hi_res] | 0.841 | 0.904 | 0.588 | 0.749 | 3.008 | Apache-2.0 |
112
+ | edgeparse | 0.837 | 0.894 | 0.717 | 0.706 | 0.036 | Apache-2.0 |
113
+ | opendataloader | 0.831 | 0.902 | 0.489 | 0.739 | 0.015 | Apache-2.0 |
114
+ | mineru | 0.831 | 0.857 | 0.873 | 0.743 | 5.962 | AGPL-3.0 |
115
+ | pymupdf4llm | 0.732 | 0.885 | 0.401 | 0.412 | 0.091 | AGPL-3.0 |
116
+ | unstructured | 0.686 | 0.882 | 0.000 | 0.388 | 0.077 | Apache-2.0 |
117
+ | markitdown | 0.589 | 0.844 | 0.273 | 0.000 | 0.114 | MIT |
118
+ | liteparse | 0.576 | 0.866 | 0.000 | 0.000 | 1.061 | Apache-2.0 |
119
119
 
120
120
  > Scores normalized to [0, 1]. Higher is better for accuracy; lower is better for speed. **Bold** = best. [Full benchmark details](https://github.com/opendataloader-project/opendataloader-bench)
121
121
 
package/dist/cli.cjs CHANGED
@@ -23,7 +23,7 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
23
23
  mod
24
24
  ));
25
25
 
26
- // node_modules/.pnpm/tsup@8.5.1_postcss@8.5.9_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js
26
+ // node_modules/.pnpm/tsup@8.5.1_postcss@8.5.12_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js
27
27
  var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.tagName.toUpperCase() === "SCRIPT" ? document.currentScript.src : new URL("main.js", document.baseURI).href;
28
28
  var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
29
29
 
@@ -253,7 +253,13 @@ function executeJar(args, executionOptions = {}) {
253
253
  );
254
254
  }
255
255
  const command = "java";
256
- const commandArgs = ["-jar", jarPath, ...args];
256
+ const commandArgs = [
257
+ "-Djava.awt.headless=true",
258
+ "-Dapple.awt.UIElement=true",
259
+ "-jar",
260
+ jarPath,
261
+ ...args
262
+ ];
257
263
  const javaProcess = (0, import_child_process.spawn)(command, commandArgs);
258
264
  let stdout = "";
259
265
  let stderr = "";
@@ -299,6 +305,7 @@ function executeJar(args, executionOptions = {}) {
299
305
 
300
306
  ${errorOutput}`
301
307
  );
308
+ error.isJavaExit = true;
302
309
  reject(error);
303
310
  }
304
311
  });
@@ -408,8 +415,11 @@ async function main() {
408
415
  await _runForCli(inputPaths, convertOptions);
409
416
  return 0;
410
417
  } catch (err) {
411
- const message = err instanceof Error ? err.message : String(err);
412
- console.error(message);
418
+ const isJavaExit = err instanceof Error && err.isJavaExit === true;
419
+ if (!isJavaExit) {
420
+ const message = err instanceof Error ? err.message : String(err);
421
+ console.error(message);
422
+ }
413
423
  return 1;
414
424
  }
415
425
  }
package/dist/cli.cjs.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../node_modules/.pnpm/tsup@8.5.1_postcss@8.5.9_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js","../src/cli.ts","../src/index.ts","../src/convert-options.generated.ts","../src/cli-options.generated.ts"],"sourcesContent":["// Shim globals in cjs bundle\n// There's a weird bug that esbuild will always inject importMetaUrl\n// if we export it as `const importMetaUrl = ... __filename ...`\n// But using a function will not cause this issue\n\nconst getImportMetaUrl = () => \n typeof document === \"undefined\" \n ? new URL(`file:${__filename}`).href \n : (document.currentScript && document.currentScript.tagName.toUpperCase() === 'SCRIPT') \n ? document.currentScript.src \n : new URL(\"main.js\", document.baseURI).href;\n\nexport const importMetaUrl = /* @__PURE__ */ getImportMetaUrl()\n","#!/usr/bin/env node\nimport { Command, CommanderError } from 'commander';\nimport { _runForCli } from './index.js';\nimport { CliOptions, buildConvertOptions } from './convert-options.generated.js';\nimport { registerCliOptions } from './cli-options.generated.js';\n\nfunction createProgram(): Command {\n const program = new Command();\n\n program\n .name('opendataloader-pdf')\n .usage('[options] <input...>')\n .description('Convert PDFs using the OpenDataLoader CLI.')\n .showHelpAfterError(\"Use '--help' to see available options.\")\n .showSuggestionAfterError(false)\n .argument('<input...>', 'Input files or directories to convert');\n\n // Register CLI options from auto-generated file\n registerCliOptions(program);\n\n program.configureOutput({\n writeErr: (str) => {\n console.error(str.trimEnd());\n },\n outputError: (str, write) => {\n write(str);\n },\n });\n\n return program;\n}\n\nasync function main(): Promise<number> {\n const program = createProgram();\n\n program.exitOverride();\n\n try {\n program.parse(process.argv);\n } catch (err) {\n if (err instanceof CommanderError) {\n if (err.code === 'commander.helpDisplayed') {\n return 0;\n }\n return err.exitCode ?? 1;\n }\n\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n const cliOptions = program.opts<CliOptions>();\n const inputPaths = program.args;\n const convertOptions = buildConvertOptions(cliOptions);\n\n try {\n // _runForCli streams stdout/stderr to the parent process as they arrive;\n // we deliberately do not re-print anything here. (Issue #398.)\n await _runForCli(inputPaths, convertOptions);\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n","import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\nimport { Command } from 'commander';\n\n/**\n * Register all CLI options on the given Commander program.\n */\nexport function registerCliOptions(program: Command): void {\n program.option('-o, --output-dir <value>', 'Directory where output files are written. Default: input file directory');\n program.option('-p, --password <value>', 'Password for encrypted PDF files');\n program.option('-f, --format <value>', 'Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json');\n program.option('-q, --quiet', 'Suppress console logging output');\n program.option('--content-safety-off <value>', 'Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg');\n program.option('--sanitize', 'Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders');\n program.option('--keep-line-breaks', 'Preserve original line breaks in extracted text');\n program.option('--replace-invalid-chars <value>', 'Replacement character for invalid/unrecognized characters. Default: space');\n program.option('--use-struct-tree', 'Use PDF structure tree (tagged PDF) for reading order and semantic structure');\n program.option('--table-method <value>', 'Table detection method. Values: default (border-based), cluster (border + cluster). Default: default');\n program.option('--reading-order <value>', 'Reading order algorithm. Values: off, xycut. Default: xycut');\n program.option('--markdown-page-separator <value>', 'Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none');\n program.option('--text-page-separator <value>', 'Separator between pages in text output. Use %page-number% for page numbers. Default: none');\n program.option('--html-page-separator <value>', 'Separator between pages in HTML output. Use %page-number% for page numbers. Default: none');\n program.option('--image-output <value>', 'Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external');\n program.option('--image-format <value>', 'Output format for extracted images. Values: png, jpeg. Default: png');\n program.option('--image-dir <value>', 'Directory for extracted images');\n program.option('--pages <value>', 'Pages to extract (e.g., \"1,3,5-7\"). Default: all pages');\n program.option('--include-header-footer', 'Include page headers and footers in output');\n program.option('--detect-strikethrough', 'Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental)');\n program.option('--hybrid <value>', 'Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai');\n program.option('--hybrid-mode <value>', 'Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend)');\n program.option('--hybrid-url <value>', 'Hybrid backend server URL (overrides default)');\n program.option('--hybrid-timeout <value>', 'Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0');\n program.option('--hybrid-fallback', 'Opt in to Java fallback on hybrid backend error (default: disabled)');\n program.option('--hybrid-hancom-ai-regionlist-strategy <value>', 'DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list)');\n program.option('--hybrid-hancom-ai-ocr-strategy <value>', 'OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only)');\n program.option('--hybrid-hancom-ai-image-cache <value>', 'Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk');\n program.option('--to-stdout', 'Write output to stdout instead of file (single format only)');\n program.option('--threads <value>', 'Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAKA,IAAM,mBAAmB,MACvB,OAAO,aAAa,cAChB,IAAI,IAAI,QAAQ,UAAU,EAAE,EAAE,OAC7B,SAAS,iBAAiB,SAAS,cAAc,QAAQ,YAAY,MAAM,WAC1E,SAAS,cAAc,MACvB,IAAI,IAAI,WAAW,SAAS,OAAO,EAAE;AAEtC,IAAM,gBAAgC,iCAAiB;;;ACX9D,uBAAwC;;;ACDxC,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,4BAA8B;AAC9B,iBAA8B;;;ACwGvB,SAAS,oBAAoB,YAAwC;AAC1E,QAAM,iBAAiC,CAAC;AAExC,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ;AAAA,EACzB;AACA,MAAI,WAAW,kBAAkB;AAC/B,mBAAe,mBAAmB,WAAW;AAAA,EAC/C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB,WAAW;AAAA,EAClD;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB;AAAA,EACjC;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,cAAc;AAC3B,mBAAe,eAAe,WAAW;AAAA,EAC3C;AACA,MAAI,WAAW,uBAAuB;AACpC,mBAAe,wBAAwB,WAAW;AAAA,EACpD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ,WAAW;AAAA,EACpC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,YAAY;AACzB,mBAAe,aAAa,WAAW;AAAA,EACzC;AACA,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB,WAAW;AAAA,EAC5C;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,kCAAkC;AAC/C,mBAAe,mCAAmC,WAAW;AAAA,EAC/D;AACA,MAAI,WAAW,2BAA2B;AACxC,mBAAe,4BAA4B,WAAW;AAAA,EACxD;AACA,MAAI,WAAW,0BAA0B;AACvC,mBAAe,2BAA2B,WAAW;AAAA,EACvD;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,SAAS;AACtB,mBAAe,UAAU,WAAW;AAAA,EACtC;AAEA,SAAO;AACT;AAKO,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AD/SA,IAAMA,kBAAa,0BAAc,aAAe;AAChD,IAAM,YAAiB,aAAQA,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AA2BA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;;;AEnKO,SAAS,mBAAmB,SAAwB;AACzD,UAAQ,OAAO,4BAA4B,yEAAyE;AACpH,UAAQ,OAAO,0BAA0B,kCAAkC;AAC3E,UAAQ,OAAO,wBAAwB,gJAAgJ;AACvL,UAAQ,OAAO,eAAe,iCAAiC;AAC/D,UAAQ,OAAO,gCAAgC,sFAAsF;AACrI,UAAQ,OAAO,cAAc,mHAAmH;AAChJ,UAAQ,OAAO,sBAAsB,iDAAiD;AACtF,UAAQ,OAAO,mCAAmC,2EAA2E;AAC7H,UAAQ,OAAO,qBAAqB,8EAA8E;AAClH,UAAQ,OAAO,0BAA0B,sGAAsG;AAC/I,UAAQ,OAAO,2BAA2B,6DAA6D;AACvG,UAAQ,OAAO,qCAAqC,+FAA+F;AACnJ,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,0BAA0B,wHAAwH;AACjK,UAAQ,OAAO,0BAA0B,qEAAqE;AAC9G,UAAQ,OAAO,uBAAuB,gCAAgC;AACtE,UAAQ,OAAO,mBAAmB,wDAAwD;AAC1F,UAAQ,OAAO,2BAA2B,4CAA4C;AACtF,UAAQ,OAAO,0BAA0B,gHAAgH;AACzJ,UAAQ,OAAO,oBAAoB,iOAAiO;AACpQ,UAAQ,OAAO,yBAAyB,sGAAsG;AAC9I,UAAQ,OAAO,wBAAwB,+CAA+C;AACtF,UAAQ,OAAO,4BAA4B,6EAA6E;AACxH,UAAQ,OAAO,qBAAqB,qEAAqE;AACzG,UAAQ,OAAO,kDAAkD,8JAA8J;AAC/N,UAAQ,OAAO,2CAA2C,oIAAoI;AAC9L,UAAQ,OAAO,0CAA0C,uFAAuF;AAChJ,UAAQ,OAAO,eAAe,6DAA6D;AAC3F,UAAQ,OAAO,qBAAqB,iTAAiT;AACvV;;;AHjCA,SAAS,gBAAyB;AAChC,QAAM,UAAU,IAAI,yBAAQ;AAE5B,UACG,KAAK,oBAAoB,EACzB,MAAM,sBAAsB,EAC5B,YAAY,4CAA4C,EACxD,mBAAmB,wCAAwC,EAC3D,yBAAyB,KAAK,EAC9B,SAAS,cAAc,uCAAuC;AAGjE,qBAAmB,OAAO;AAE1B,UAAQ,gBAAgB;AAAA,IACtB,UAAU,CAAC,QAAQ;AACjB,cAAQ,MAAM,IAAI,QAAQ,CAAC;AAAA,IAC7B;AAAA,IACA,aAAa,CAAC,KAAK,UAAU;AAC3B,YAAM,GAAG;AAAA,IACX;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,eAAe,OAAwB;AACrC,QAAM,UAAU,cAAc;AAE9B,UAAQ,aAAa;AAErB,MAAI;AACF,YAAQ,MAAM,QAAQ,IAAI;AAAA,EAC5B,SAAS,KAAK;AACZ,QAAI,eAAe,iCAAgB;AACjC,UAAI,IAAI,SAAS,2BAA2B;AAC1C,eAAO;AAAA,MACT;AACA,aAAO,IAAI,YAAY;AAAA,IACzB;AAEA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,QAAQ,KAAiB;AAC5C,QAAM,aAAa,QAAQ;AAC3B,QAAM,iBAAiB,oBAAoB,UAAU;AAErD,MAAI;AAGF,UAAM,WAAW,YAAY,cAAc;AAC3C,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":["__filename"]}
1
+ {"version":3,"sources":["../node_modules/.pnpm/tsup@8.5.1_postcss@8.5.12_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js","../src/cli.ts","../src/index.ts","../src/convert-options.generated.ts","../src/cli-options.generated.ts"],"sourcesContent":["// Shim globals in cjs bundle\n// There's a weird bug that esbuild will always inject importMetaUrl\n// if we export it as `const importMetaUrl = ... __filename ...`\n// But using a function will not cause this issue\n\nconst getImportMetaUrl = () => \n typeof document === \"undefined\" \n ? new URL(`file:${__filename}`).href \n : (document.currentScript && document.currentScript.tagName.toUpperCase() === 'SCRIPT') \n ? document.currentScript.src \n : new URL(\"main.js\", document.baseURI).href;\n\nexport const importMetaUrl = /* @__PURE__ */ getImportMetaUrl()\n","#!/usr/bin/env node\nimport { Command, CommanderError } from 'commander';\nimport { _runForCli } from './index.js';\nimport { CliOptions, buildConvertOptions } from './convert-options.generated.js';\nimport { registerCliOptions } from './cli-options.generated.js';\n\nfunction createProgram(): Command {\n const program = new Command();\n\n program\n .name('opendataloader-pdf')\n .usage('[options] <input...>')\n .description('Convert PDFs using the OpenDataLoader CLI.')\n .showHelpAfterError(\"Use '--help' to see available options.\")\n .showSuggestionAfterError(false)\n .argument('<input...>', 'Input files or directories to convert');\n\n // Register CLI options from auto-generated file\n registerCliOptions(program);\n\n program.configureOutput({\n writeErr: (str) => {\n console.error(str.trimEnd());\n },\n outputError: (str, write) => {\n write(str);\n },\n });\n\n return program;\n}\n\nasync function main(): Promise<number> {\n const program = createProgram();\n\n program.exitOverride();\n\n try {\n program.parse(process.argv);\n } catch (err) {\n if (err instanceof CommanderError) {\n if (err.code === 'commander.helpDisplayed') {\n return 0;\n }\n return err.exitCode ?? 1;\n }\n\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n const cliOptions = program.opts<CliOptions>();\n const inputPaths = program.args;\n const convertOptions = buildConvertOptions(cliOptions);\n\n try {\n // _runForCli streams stdout/stderr to the parent process as they arrive;\n // we deliberately do not re-print anything here. (Issue #398.)\n await _runForCli(inputPaths, convertOptions);\n return 0;\n } catch (err) {\n // Subprocess-exit errors are already on the user's terminal via the live\n // stderr stream — re-printing would duplicate output and risk leaking\n // anything sensitive Java logged (e.g. a --password value echoed by an\n // underlying library). Wrapper-side failures (JAR not found, java not in\n // PATH, bad input path) still need to be surfaced.\n const isJavaExit =\n err instanceof Error && (err as Error & { isJavaExit?: boolean }).isJavaExit === true;\n if (!isJavaExit) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n }\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n","import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n // Force headless AWT so macOS doesn't surface a Dock icon (and steal focus)\n // every time the JVM touches ImageIO/PDFBox rendering. Safe on all OSes —\n // the CLI never opens a UI window, only manipulates BufferedImages.\n const commandArgs = [\n '-Djava.awt.headless=true',\n '-Dapple.awt.UIElement=true',\n '-jar',\n jarPath,\n ...args,\n ];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n // Tag so the CLI can suppress re-printing this message — Java's\n // stderr was already streamed live to the parent in CLI mode, and\n // re-printing risks leaking anything sensitive Java logged\n // (e.g. a --password value echoed by an underlying library).\n (error as Error & { isJavaExit?: boolean }).isJavaExit = true;\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\nimport { Command } from 'commander';\n\n/**\n * Register all CLI options on the given Commander program.\n */\nexport function registerCliOptions(program: Command): void {\n program.option('-o, --output-dir <value>', 'Directory where output files are written. Default: input file directory');\n program.option('-p, --password <value>', 'Password for encrypted PDF files');\n program.option('-f, --format <value>', 'Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json');\n program.option('-q, --quiet', 'Suppress console logging output');\n program.option('--content-safety-off <value>', 'Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg');\n program.option('--sanitize', 'Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders');\n program.option('--keep-line-breaks', 'Preserve original line breaks in extracted text');\n program.option('--replace-invalid-chars <value>', 'Replacement character for invalid/unrecognized characters. Default: space');\n program.option('--use-struct-tree', 'Use PDF structure tree (tagged PDF) for reading order and semantic structure');\n program.option('--table-method <value>', 'Table detection method. Values: default (border-based), cluster (border + cluster). Default: default');\n program.option('--reading-order <value>', 'Reading order algorithm. Values: off, xycut. Default: xycut');\n program.option('--markdown-page-separator <value>', 'Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none');\n program.option('--text-page-separator <value>', 'Separator between pages in text output. Use %page-number% for page numbers. Default: none');\n program.option('--html-page-separator <value>', 'Separator between pages in HTML output. Use %page-number% for page numbers. Default: none');\n program.option('--image-output <value>', 'Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external');\n program.option('--image-format <value>', 'Output format for extracted images. Values: png, jpeg. Default: png');\n program.option('--image-dir <value>', 'Directory for extracted images');\n program.option('--pages <value>', 'Pages to extract (e.g., \"1,3,5-7\"). Default: all pages');\n program.option('--include-header-footer', 'Include page headers and footers in output');\n program.option('--detect-strikethrough', 'Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental)');\n program.option('--hybrid <value>', 'Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai');\n program.option('--hybrid-mode <value>', 'Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend)');\n program.option('--hybrid-url <value>', 'Hybrid backend server URL (overrides default)');\n program.option('--hybrid-timeout <value>', 'Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0');\n program.option('--hybrid-fallback', 'Opt in to Java fallback on hybrid backend error (default: disabled)');\n program.option('--hybrid-hancom-ai-regionlist-strategy <value>', 'DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list)');\n program.option('--hybrid-hancom-ai-ocr-strategy <value>', 'OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only)');\n program.option('--hybrid-hancom-ai-image-cache <value>', 'Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk');\n program.option('--to-stdout', 'Write output to stdout instead of file (single format only)');\n program.option('--threads <value>', 'Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode');\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;AAKA,IAAM,mBAAmB,MACvB,OAAO,aAAa,cAChB,IAAI,IAAI,QAAQ,UAAU,EAAE,EAAE,OAC7B,SAAS,iBAAiB,SAAS,cAAc,QAAQ,YAAY,MAAM,WAC1E,SAAS,cAAc,MACvB,IAAI,IAAI,WAAW,SAAS,OAAO,EAAE;AAEtC,IAAM,gBAAgC,iCAAiB;;;ACX9D,uBAAwC;;;ACDxC,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,4BAA8B;AAC9B,iBAA8B;;;ACwGvB,SAAS,oBAAoB,YAAwC;AAC1E,QAAM,iBAAiC,CAAC;AAExC,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ;AAAA,EACzB;AACA,MAAI,WAAW,kBAAkB;AAC/B,mBAAe,mBAAmB,WAAW;AAAA,EAC/C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB,WAAW;AAAA,EAClD;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB;AAAA,EACjC;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,cAAc;AAC3B,mBAAe,eAAe,WAAW;AAAA,EAC3C;AACA,MAAI,WAAW,uBAAuB;AACpC,mBAAe,wBAAwB,WAAW;AAAA,EACpD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ,WAAW;AAAA,EACpC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,YAAY;AACzB,mBAAe,aAAa,WAAW;AAAA,EACzC;AACA,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB,WAAW;AAAA,EAC5C;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,kCAAkC;AAC/C,mBAAe,mCAAmC,WAAW;AAAA,EAC/D;AACA,MAAI,WAAW,2BAA2B;AACxC,mBAAe,4BAA4B,WAAW;AAAA,EACxD;AACA,MAAI,WAAW,0BAA0B;AACvC,mBAAe,2BAA2B,WAAW;AAAA,EACvD;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,SAAS;AACtB,mBAAe,UAAU,WAAW;AAAA,EACtC;AAEA,SAAO;AACT;AAKO,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AD/SA,IAAMA,kBAAa,0BAAc,aAAe;AAChD,IAAM,YAAiB,aAAQA,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAIhB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG;AAAA,IACL;AAEA,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AAKA,QAAC,MAA2C,aAAa;AACzD,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AA2BA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;;;AEjLO,SAAS,mBAAmB,SAAwB;AACzD,UAAQ,OAAO,4BAA4B,yEAAyE;AACpH,UAAQ,OAAO,0BAA0B,kCAAkC;AAC3E,UAAQ,OAAO,wBAAwB,gJAAgJ;AACvL,UAAQ,OAAO,eAAe,iCAAiC;AAC/D,UAAQ,OAAO,gCAAgC,sFAAsF;AACrI,UAAQ,OAAO,cAAc,mHAAmH;AAChJ,UAAQ,OAAO,sBAAsB,iDAAiD;AACtF,UAAQ,OAAO,mCAAmC,2EAA2E;AAC7H,UAAQ,OAAO,qBAAqB,8EAA8E;AAClH,UAAQ,OAAO,0BAA0B,sGAAsG;AAC/I,UAAQ,OAAO,2BAA2B,6DAA6D;AACvG,UAAQ,OAAO,qCAAqC,+FAA+F;AACnJ,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,0BAA0B,wHAAwH;AACjK,UAAQ,OAAO,0BAA0B,qEAAqE;AAC9G,UAAQ,OAAO,uBAAuB,gCAAgC;AACtE,UAAQ,OAAO,mBAAmB,wDAAwD;AAC1F,UAAQ,OAAO,2BAA2B,4CAA4C;AACtF,UAAQ,OAAO,0BAA0B,gHAAgH;AACzJ,UAAQ,OAAO,oBAAoB,iOAAiO;AACpQ,UAAQ,OAAO,yBAAyB,sGAAsG;AAC9I,UAAQ,OAAO,wBAAwB,+CAA+C;AACtF,UAAQ,OAAO,4BAA4B,6EAA6E;AACxH,UAAQ,OAAO,qBAAqB,qEAAqE;AACzG,UAAQ,OAAO,kDAAkD,8JAA8J;AAC/N,UAAQ,OAAO,2CAA2C,oIAAoI;AAC9L,UAAQ,OAAO,0CAA0C,uFAAuF;AAChJ,UAAQ,OAAO,eAAe,6DAA6D;AAC3F,UAAQ,OAAO,qBAAqB,iTAAiT;AACvV;;;AHjCA,SAAS,gBAAyB;AAChC,QAAM,UAAU,IAAI,yBAAQ;AAE5B,UACG,KAAK,oBAAoB,EACzB,MAAM,sBAAsB,EAC5B,YAAY,4CAA4C,EACxD,mBAAmB,wCAAwC,EAC3D,yBAAyB,KAAK,EAC9B,SAAS,cAAc,uCAAuC;AAGjE,qBAAmB,OAAO;AAE1B,UAAQ,gBAAgB;AAAA,IACtB,UAAU,CAAC,QAAQ;AACjB,cAAQ,MAAM,IAAI,QAAQ,CAAC;AAAA,IAC7B;AAAA,IACA,aAAa,CAAC,KAAK,UAAU;AAC3B,YAAM,GAAG;AAAA,IACX;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,eAAe,OAAwB;AACrC,QAAM,UAAU,cAAc;AAE9B,UAAQ,aAAa;AAErB,MAAI;AACF,YAAQ,MAAM,QAAQ,IAAI;AAAA,EAC5B,SAAS,KAAK;AACZ,QAAI,eAAe,iCAAgB;AACjC,UAAI,IAAI,SAAS,2BAA2B;AAC1C,eAAO;AAAA,MACT;AACA,aAAO,IAAI,YAAY;AAAA,IACzB;AAEA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,QAAQ,KAAiB;AAC5C,QAAM,aAAa,QAAQ;AAC3B,QAAM,iBAAiB,oBAAoB,UAAU;AAErD,MAAI;AAGF,UAAM,WAAW,YAAY,cAAc;AAC3C,WAAO;AAAA,EACT,SAAS,KAAK;AAMZ,UAAM,aACJ,eAAe,SAAU,IAAyC,eAAe;AACnF,QAAI,CAAC,YAAY;AACf,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,cAAQ,MAAM,OAAO;AAAA,IACvB;AACA,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":["__filename"]}
package/dist/cli.js CHANGED
@@ -226,7 +226,13 @@ function executeJar(args, executionOptions = {}) {
226
226
  );
227
227
  }
228
228
  const command = "java";
229
- const commandArgs = ["-jar", jarPath, ...args];
229
+ const commandArgs = [
230
+ "-Djava.awt.headless=true",
231
+ "-Dapple.awt.UIElement=true",
232
+ "-jar",
233
+ jarPath,
234
+ ...args
235
+ ];
230
236
  const javaProcess = spawn(command, commandArgs);
231
237
  let stdout = "";
232
238
  let stderr = "";
@@ -272,6 +278,7 @@ function executeJar(args, executionOptions = {}) {
272
278
 
273
279
  ${errorOutput}`
274
280
  );
281
+ error.isJavaExit = true;
275
282
  reject(error);
276
283
  }
277
284
  });
@@ -381,8 +388,11 @@ async function main() {
381
388
  await _runForCli(inputPaths, convertOptions);
382
389
  return 0;
383
390
  } catch (err) {
384
- const message = err instanceof Error ? err.message : String(err);
385
- console.error(message);
391
+ const isJavaExit = err instanceof Error && err.isJavaExit === true;
392
+ if (!isJavaExit) {
393
+ const message = err instanceof Error ? err.message : String(err);
394
+ console.error(message);
395
+ }
386
396
  return 1;
387
397
  }
388
398
  }
package/dist/cli.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/cli.ts","../src/index.ts","../src/convert-options.generated.ts","../src/cli-options.generated.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { Command, CommanderError } from 'commander';\nimport { _runForCli } from './index.js';\nimport { CliOptions, buildConvertOptions } from './convert-options.generated.js';\nimport { registerCliOptions } from './cli-options.generated.js';\n\nfunction createProgram(): Command {\n const program = new Command();\n\n program\n .name('opendataloader-pdf')\n .usage('[options] <input...>')\n .description('Convert PDFs using the OpenDataLoader CLI.')\n .showHelpAfterError(\"Use '--help' to see available options.\")\n .showSuggestionAfterError(false)\n .argument('<input...>', 'Input files or directories to convert');\n\n // Register CLI options from auto-generated file\n registerCliOptions(program);\n\n program.configureOutput({\n writeErr: (str) => {\n console.error(str.trimEnd());\n },\n outputError: (str, write) => {\n write(str);\n },\n });\n\n return program;\n}\n\nasync function main(): Promise<number> {\n const program = createProgram();\n\n program.exitOverride();\n\n try {\n program.parse(process.argv);\n } catch (err) {\n if (err instanceof CommanderError) {\n if (err.code === 'commander.helpDisplayed') {\n return 0;\n }\n return err.exitCode ?? 1;\n }\n\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n const cliOptions = program.opts<CliOptions>();\n const inputPaths = program.args;\n const convertOptions = buildConvertOptions(cliOptions);\n\n try {\n // _runForCli streams stdout/stderr to the parent process as they arrive;\n // we deliberately do not re-print anything here. (Issue #398.)\n await _runForCli(inputPaths, convertOptions);\n return 0;\n } catch (err) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n","import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\nimport { Command } from 'commander';\n\n/**\n * Register all CLI options on the given Commander program.\n */\nexport function registerCliOptions(program: Command): void {\n program.option('-o, --output-dir <value>', 'Directory where output files are written. Default: input file directory');\n program.option('-p, --password <value>', 'Password for encrypted PDF files');\n program.option('-f, --format <value>', 'Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json');\n program.option('-q, --quiet', 'Suppress console logging output');\n program.option('--content-safety-off <value>', 'Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg');\n program.option('--sanitize', 'Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders');\n program.option('--keep-line-breaks', 'Preserve original line breaks in extracted text');\n program.option('--replace-invalid-chars <value>', 'Replacement character for invalid/unrecognized characters. Default: space');\n program.option('--use-struct-tree', 'Use PDF structure tree (tagged PDF) for reading order and semantic structure');\n program.option('--table-method <value>', 'Table detection method. Values: default (border-based), cluster (border + cluster). Default: default');\n program.option('--reading-order <value>', 'Reading order algorithm. Values: off, xycut. Default: xycut');\n program.option('--markdown-page-separator <value>', 'Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none');\n program.option('--text-page-separator <value>', 'Separator between pages in text output. Use %page-number% for page numbers. Default: none');\n program.option('--html-page-separator <value>', 'Separator between pages in HTML output. Use %page-number% for page numbers. Default: none');\n program.option('--image-output <value>', 'Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external');\n program.option('--image-format <value>', 'Output format for extracted images. Values: png, jpeg. Default: png');\n program.option('--image-dir <value>', 'Directory for extracted images');\n program.option('--pages <value>', 'Pages to extract (e.g., \"1,3,5-7\"). Default: all pages');\n program.option('--include-header-footer', 'Include page headers and footers in output');\n program.option('--detect-strikethrough', 'Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental)');\n program.option('--hybrid <value>', 'Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai');\n program.option('--hybrid-mode <value>', 'Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend)');\n program.option('--hybrid-url <value>', 'Hybrid backend server URL (overrides default)');\n program.option('--hybrid-timeout <value>', 'Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0');\n program.option('--hybrid-fallback', 'Opt in to Java fallback on hybrid backend error (default: disabled)');\n program.option('--hybrid-hancom-ai-regionlist-strategy <value>', 'DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list)');\n program.option('--hybrid-hancom-ai-ocr-strategy <value>', 'OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only)');\n program.option('--hybrid-hancom-ai-image-cache <value>', 'Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk');\n program.option('--to-stdout', 'Write output to stdout instead of file (single format only)');\n program.option('--threads <value>', 'Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode');\n}\n"],"mappings":";;;AACA,SAAS,SAAS,sBAAsB;;;ACDxC,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAC9B,SAAS,qBAAqB;;;ACwGvB,SAAS,oBAAoB,YAAwC;AAC1E,QAAM,iBAAiC,CAAC;AAExC,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ;AAAA,EACzB;AACA,MAAI,WAAW,kBAAkB;AAC/B,mBAAe,mBAAmB,WAAW;AAAA,EAC/C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB,WAAW;AAAA,EAClD;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB;AAAA,EACjC;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,cAAc;AAC3B,mBAAe,eAAe,WAAW;AAAA,EAC3C;AACA,MAAI,WAAW,uBAAuB;AACpC,mBAAe,wBAAwB,WAAW;AAAA,EACpD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ,WAAW;AAAA,EACpC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,YAAY;AACzB,mBAAe,aAAa,WAAW;AAAA,EACzC;AACA,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB,WAAW;AAAA,EAC5C;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,kCAAkC;AAC/C,mBAAe,mCAAmC,WAAW;AAAA,EAC/D;AACA,MAAI,WAAW,2BAA2B;AACxC,mBAAe,4BAA4B,WAAW;AAAA,EACxD;AACA,MAAI,WAAW,0BAA0B;AACvC,mBAAe,2BAA2B,WAAW;AAAA,EACvD;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,SAAS;AACtB,mBAAe,UAAU,WAAW;AAAA,EACtC;AAEA,SAAO;AACT;AAKO,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AD/SA,IAAMA,cAAa,cAAc,YAAY,GAAG;AAChD,IAAMC,aAAiB,aAAQD,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAKC,YAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AA2BA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;;;AEnKO,SAAS,mBAAmB,SAAwB;AACzD,UAAQ,OAAO,4BAA4B,yEAAyE;AACpH,UAAQ,OAAO,0BAA0B,kCAAkC;AAC3E,UAAQ,OAAO,wBAAwB,gJAAgJ;AACvL,UAAQ,OAAO,eAAe,iCAAiC;AAC/D,UAAQ,OAAO,gCAAgC,sFAAsF;AACrI,UAAQ,OAAO,cAAc,mHAAmH;AAChJ,UAAQ,OAAO,sBAAsB,iDAAiD;AACtF,UAAQ,OAAO,mCAAmC,2EAA2E;AAC7H,UAAQ,OAAO,qBAAqB,8EAA8E;AAClH,UAAQ,OAAO,0BAA0B,sGAAsG;AAC/I,UAAQ,OAAO,2BAA2B,6DAA6D;AACvG,UAAQ,OAAO,qCAAqC,+FAA+F;AACnJ,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,0BAA0B,wHAAwH;AACjK,UAAQ,OAAO,0BAA0B,qEAAqE;AAC9G,UAAQ,OAAO,uBAAuB,gCAAgC;AACtE,UAAQ,OAAO,mBAAmB,wDAAwD;AAC1F,UAAQ,OAAO,2BAA2B,4CAA4C;AACtF,UAAQ,OAAO,0BAA0B,gHAAgH;AACzJ,UAAQ,OAAO,oBAAoB,iOAAiO;AACpQ,UAAQ,OAAO,yBAAyB,sGAAsG;AAC9I,UAAQ,OAAO,wBAAwB,+CAA+C;AACtF,UAAQ,OAAO,4BAA4B,6EAA6E;AACxH,UAAQ,OAAO,qBAAqB,qEAAqE;AACzG,UAAQ,OAAO,kDAAkD,8JAA8J;AAC/N,UAAQ,OAAO,2CAA2C,oIAAoI;AAC9L,UAAQ,OAAO,0CAA0C,uFAAuF;AAChJ,UAAQ,OAAO,eAAe,6DAA6D;AAC3F,UAAQ,OAAO,qBAAqB,iTAAiT;AACvV;;;AHjCA,SAAS,gBAAyB;AAChC,QAAM,UAAU,IAAI,QAAQ;AAE5B,UACG,KAAK,oBAAoB,EACzB,MAAM,sBAAsB,EAC5B,YAAY,4CAA4C,EACxD,mBAAmB,wCAAwC,EAC3D,yBAAyB,KAAK,EAC9B,SAAS,cAAc,uCAAuC;AAGjE,qBAAmB,OAAO;AAE1B,UAAQ,gBAAgB;AAAA,IACtB,UAAU,CAAC,QAAQ;AACjB,cAAQ,MAAM,IAAI,QAAQ,CAAC;AAAA,IAC7B;AAAA,IACA,aAAa,CAAC,KAAK,UAAU;AAC3B,YAAM,GAAG;AAAA,IACX;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,eAAe,OAAwB;AACrC,QAAM,UAAU,cAAc;AAE9B,UAAQ,aAAa;AAErB,MAAI;AACF,YAAQ,MAAM,QAAQ,IAAI;AAAA,EAC5B,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB;AACjC,UAAI,IAAI,SAAS,2BAA2B;AAC1C,eAAO;AAAA,MACT;AACA,aAAO,IAAI,YAAY;AAAA,IACzB;AAEA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,QAAQ,KAAiB;AAC5C,QAAM,aAAa,QAAQ;AAC3B,QAAM,iBAAiB,oBAAoB,UAAU;AAErD,MAAI;AAGF,UAAM,WAAW,YAAY,cAAc;AAC3C,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":["__filename","__dirname"]}
1
+ {"version":3,"sources":["../src/cli.ts","../src/index.ts","../src/convert-options.generated.ts","../src/cli-options.generated.ts"],"sourcesContent":["#!/usr/bin/env node\nimport { Command, CommanderError } from 'commander';\nimport { _runForCli } from './index.js';\nimport { CliOptions, buildConvertOptions } from './convert-options.generated.js';\nimport { registerCliOptions } from './cli-options.generated.js';\n\nfunction createProgram(): Command {\n const program = new Command();\n\n program\n .name('opendataloader-pdf')\n .usage('[options] <input...>')\n .description('Convert PDFs using the OpenDataLoader CLI.')\n .showHelpAfterError(\"Use '--help' to see available options.\")\n .showSuggestionAfterError(false)\n .argument('<input...>', 'Input files or directories to convert');\n\n // Register CLI options from auto-generated file\n registerCliOptions(program);\n\n program.configureOutput({\n writeErr: (str) => {\n console.error(str.trimEnd());\n },\n outputError: (str, write) => {\n write(str);\n },\n });\n\n return program;\n}\n\nasync function main(): Promise<number> {\n const program = createProgram();\n\n program.exitOverride();\n\n try {\n program.parse(process.argv);\n } catch (err) {\n if (err instanceof CommanderError) {\n if (err.code === 'commander.helpDisplayed') {\n return 0;\n }\n return err.exitCode ?? 1;\n }\n\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n console.error(\"Use '--help' to see available options.\");\n return 1;\n }\n\n const cliOptions = program.opts<CliOptions>();\n const inputPaths = program.args;\n const convertOptions = buildConvertOptions(cliOptions);\n\n try {\n // _runForCli streams stdout/stderr to the parent process as they arrive;\n // we deliberately do not re-print anything here. (Issue #398.)\n await _runForCli(inputPaths, convertOptions);\n return 0;\n } catch (err) {\n // Subprocess-exit errors are already on the user's terminal via the live\n // stderr stream — re-printing would duplicate output and risk leaking\n // anything sensitive Java logged (e.g. a --password value echoed by an\n // underlying library). Wrapper-side failures (JAR not found, java not in\n // PATH, bad input path) still need to be surfaced.\n const isJavaExit =\n err instanceof Error && (err as Error & { isJavaExit?: boolean }).isJavaExit === true;\n if (!isJavaExit) {\n const message = err instanceof Error ? err.message : String(err);\n console.error(message);\n }\n return 1;\n }\n}\n\nmain().then((code) => {\n if (code !== 0) {\n process.exit(code);\n }\n});\n","import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n // Force headless AWT so macOS doesn't surface a Dock icon (and steal focus)\n // every time the JVM touches ImageIO/PDFBox rendering. Safe on all OSes —\n // the CLI never opens a UI window, only manipulates BufferedImages.\n const commandArgs = [\n '-Djava.awt.headless=true',\n '-Dapple.awt.UIElement=true',\n '-jar',\n jarPath,\n ...args,\n ];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n // Tag so the CLI can suppress re-printing this message — Java's\n // stderr was already streamed live to the parent in CLI mode, and\n // re-printing risks leaking anything sensitive Java logged\n // (e.g. a --password value echoed by an underlying library).\n (error as Error & { isJavaExit?: boolean }).isJavaExit = true;\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\nimport { Command } from 'commander';\n\n/**\n * Register all CLI options on the given Commander program.\n */\nexport function registerCliOptions(program: Command): void {\n program.option('-o, --output-dir <value>', 'Directory where output files are written. Default: input file directory');\n program.option('-p, --password <value>', 'Password for encrypted PDF files');\n program.option('-f, --format <value>', 'Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json');\n program.option('-q, --quiet', 'Suppress console logging output');\n program.option('--content-safety-off <value>', 'Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg');\n program.option('--sanitize', 'Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders');\n program.option('--keep-line-breaks', 'Preserve original line breaks in extracted text');\n program.option('--replace-invalid-chars <value>', 'Replacement character for invalid/unrecognized characters. Default: space');\n program.option('--use-struct-tree', 'Use PDF structure tree (tagged PDF) for reading order and semantic structure');\n program.option('--table-method <value>', 'Table detection method. Values: default (border-based), cluster (border + cluster). Default: default');\n program.option('--reading-order <value>', 'Reading order algorithm. Values: off, xycut. Default: xycut');\n program.option('--markdown-page-separator <value>', 'Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none');\n program.option('--text-page-separator <value>', 'Separator between pages in text output. Use %page-number% for page numbers. Default: none');\n program.option('--html-page-separator <value>', 'Separator between pages in HTML output. Use %page-number% for page numbers. Default: none');\n program.option('--image-output <value>', 'Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external');\n program.option('--image-format <value>', 'Output format for extracted images. Values: png, jpeg. Default: png');\n program.option('--image-dir <value>', 'Directory for extracted images');\n program.option('--pages <value>', 'Pages to extract (e.g., \"1,3,5-7\"). Default: all pages');\n program.option('--include-header-footer', 'Include page headers and footers in output');\n program.option('--detect-strikethrough', 'Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental)');\n program.option('--hybrid <value>', 'Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai');\n program.option('--hybrid-mode <value>', 'Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend)');\n program.option('--hybrid-url <value>', 'Hybrid backend server URL (overrides default)');\n program.option('--hybrid-timeout <value>', 'Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0');\n program.option('--hybrid-fallback', 'Opt in to Java fallback on hybrid backend error (default: disabled)');\n program.option('--hybrid-hancom-ai-regionlist-strategy <value>', 'DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list)');\n program.option('--hybrid-hancom-ai-ocr-strategy <value>', 'OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only)');\n program.option('--hybrid-hancom-ai-image-cache <value>', 'Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk');\n program.option('--to-stdout', 'Write output to stdout instead of file (single format only)');\n program.option('--threads <value>', 'Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode');\n}\n"],"mappings":";;;AACA,SAAS,SAAS,sBAAsB;;;ACDxC,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAC9B,SAAS,qBAAqB;;;ACwGvB,SAAS,oBAAoB,YAAwC;AAC1E,QAAM,iBAAiC,CAAC;AAExC,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ;AAAA,EACzB;AACA,MAAI,WAAW,kBAAkB;AAC/B,mBAAe,mBAAmB,WAAW;AAAA,EAC/C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB,WAAW;AAAA,EAClD;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB;AAAA,EACjC;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,cAAc;AAC3B,mBAAe,eAAe,WAAW;AAAA,EAC3C;AACA,MAAI,WAAW,uBAAuB;AACpC,mBAAe,wBAAwB,WAAW;AAAA,EACpD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,mBAAmB;AAChC,mBAAe,oBAAoB,WAAW;AAAA,EAChD;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,aAAa;AAC1B,mBAAe,cAAc,WAAW;AAAA,EAC1C;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW,WAAW;AAAA,EACvC;AACA,MAAI,WAAW,OAAO;AACpB,mBAAe,QAAQ,WAAW;AAAA,EACpC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,qBAAqB;AAClC,mBAAe,sBAAsB;AAAA,EACvC;AACA,MAAI,WAAW,QAAQ;AACrB,mBAAe,SAAS,WAAW;AAAA,EACrC;AACA,MAAI,WAAW,YAAY;AACzB,mBAAe,aAAa,WAAW;AAAA,EACzC;AACA,MAAI,WAAW,WAAW;AACxB,mBAAe,YAAY,WAAW;AAAA,EACxC;AACA,MAAI,WAAW,eAAe;AAC5B,mBAAe,gBAAgB,WAAW;AAAA,EAC5C;AACA,MAAI,WAAW,gBAAgB;AAC7B,mBAAe,iBAAiB;AAAA,EAClC;AACA,MAAI,WAAW,kCAAkC;AAC/C,mBAAe,mCAAmC,WAAW;AAAA,EAC/D;AACA,MAAI,WAAW,2BAA2B;AACxC,mBAAe,4BAA4B,WAAW;AAAA,EACxD;AACA,MAAI,WAAW,0BAA0B;AACvC,mBAAe,2BAA2B,WAAW;AAAA,EACvD;AACA,MAAI,WAAW,UAAU;AACvB,mBAAe,WAAW;AAAA,EAC5B;AACA,MAAI,WAAW,SAAS;AACtB,mBAAe,UAAU,WAAW;AAAA,EACtC;AAEA,SAAO;AACT;AAKO,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AD/SA,IAAMA,cAAa,cAAc,YAAY,GAAG;AAChD,IAAMC,aAAiB,aAAQD,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAKC,YAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAIhB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG;AAAA,IACL;AAEA,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AAKA,QAAC,MAA2C,aAAa;AACzD,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AA2BA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;;;AEjLO,SAAS,mBAAmB,SAAwB;AACzD,UAAQ,OAAO,4BAA4B,yEAAyE;AACpH,UAAQ,OAAO,0BAA0B,kCAAkC;AAC3E,UAAQ,OAAO,wBAAwB,gJAAgJ;AACvL,UAAQ,OAAO,eAAe,iCAAiC;AAC/D,UAAQ,OAAO,gCAAgC,sFAAsF;AACrI,UAAQ,OAAO,cAAc,mHAAmH;AAChJ,UAAQ,OAAO,sBAAsB,iDAAiD;AACtF,UAAQ,OAAO,mCAAmC,2EAA2E;AAC7H,UAAQ,OAAO,qBAAqB,8EAA8E;AAClH,UAAQ,OAAO,0BAA0B,sGAAsG;AAC/I,UAAQ,OAAO,2BAA2B,6DAA6D;AACvG,UAAQ,OAAO,qCAAqC,+FAA+F;AACnJ,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,iCAAiC,2FAA2F;AAC3I,UAAQ,OAAO,0BAA0B,wHAAwH;AACjK,UAAQ,OAAO,0BAA0B,qEAAqE;AAC9G,UAAQ,OAAO,uBAAuB,gCAAgC;AACtE,UAAQ,OAAO,mBAAmB,wDAAwD;AAC1F,UAAQ,OAAO,2BAA2B,4CAA4C;AACtF,UAAQ,OAAO,0BAA0B,gHAAgH;AACzJ,UAAQ,OAAO,oBAAoB,iOAAiO;AACpQ,UAAQ,OAAO,yBAAyB,sGAAsG;AAC9I,UAAQ,OAAO,wBAAwB,+CAA+C;AACtF,UAAQ,OAAO,4BAA4B,6EAA6E;AACxH,UAAQ,OAAO,qBAAqB,qEAAqE;AACzG,UAAQ,OAAO,kDAAkD,8JAA8J;AAC/N,UAAQ,OAAO,2CAA2C,oIAAoI;AAC9L,UAAQ,OAAO,0CAA0C,uFAAuF;AAChJ,UAAQ,OAAO,eAAe,6DAA6D;AAC3F,UAAQ,OAAO,qBAAqB,iTAAiT;AACvV;;;AHjCA,SAAS,gBAAyB;AAChC,QAAM,UAAU,IAAI,QAAQ;AAE5B,UACG,KAAK,oBAAoB,EACzB,MAAM,sBAAsB,EAC5B,YAAY,4CAA4C,EACxD,mBAAmB,wCAAwC,EAC3D,yBAAyB,KAAK,EAC9B,SAAS,cAAc,uCAAuC;AAGjE,qBAAmB,OAAO;AAE1B,UAAQ,gBAAgB;AAAA,IACtB,UAAU,CAAC,QAAQ;AACjB,cAAQ,MAAM,IAAI,QAAQ,CAAC;AAAA,IAC7B;AAAA,IACA,aAAa,CAAC,KAAK,UAAU;AAC3B,YAAM,GAAG;AAAA,IACX;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAEA,eAAe,OAAwB;AACrC,QAAM,UAAU,cAAc;AAE9B,UAAQ,aAAa;AAErB,MAAI;AACF,YAAQ,MAAM,QAAQ,IAAI;AAAA,EAC5B,SAAS,KAAK;AACZ,QAAI,eAAe,gBAAgB;AACjC,UAAI,IAAI,SAAS,2BAA2B;AAC1C,eAAO;AAAA,MACT;AACA,aAAO,IAAI,YAAY;AAAA,IACzB;AAEA,UAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,YAAQ,MAAM,OAAO;AACrB,YAAQ,MAAM,wCAAwC;AACtD,WAAO;AAAA,EACT;AAEA,QAAM,aAAa,QAAQ,KAAiB;AAC5C,QAAM,aAAa,QAAQ;AAC3B,QAAM,iBAAiB,oBAAoB,UAAU;AAErD,MAAI;AAGF,UAAM,WAAW,YAAY,cAAc;AAC3C,WAAO;AAAA,EACT,SAAS,KAAK;AAMZ,UAAM,aACJ,eAAe,SAAU,IAAyC,eAAe;AACnF,QAAI,CAAC,YAAY;AACf,YAAM,UAAU,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG;AAC/D,cAAQ,MAAM,OAAO;AAAA,IACvB;AACA,WAAO;AAAA,EACT;AACF;AAEA,KAAK,EAAE,KAAK,CAAC,SAAS;AACpB,MAAI,SAAS,GAAG;AACd,YAAQ,KAAK,IAAI;AAAA,EACnB;AACF,CAAC;","names":["__filename","__dirname"]}
package/dist/index.cjs CHANGED
@@ -37,7 +37,7 @@ __export(index_exports, {
37
37
  });
38
38
  module.exports = __toCommonJS(index_exports);
39
39
 
40
- // node_modules/.pnpm/tsup@8.5.1_postcss@8.5.9_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js
40
+ // node_modules/.pnpm/tsup@8.5.1_postcss@8.5.12_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js
41
41
  var getImportMetaUrl = () => typeof document === "undefined" ? new URL(`file:${__filename}`).href : document.currentScript && document.currentScript.tagName.toUpperCase() === "SCRIPT" ? document.currentScript.src : new URL("main.js", document.baseURI).href;
42
42
  var importMetaUrl = /* @__PURE__ */ getImportMetaUrl();
43
43
 
@@ -170,7 +170,13 @@ function executeJar(args, executionOptions = {}) {
170
170
  );
171
171
  }
172
172
  const command = "java";
173
- const commandArgs = ["-jar", jarPath, ...args];
173
+ const commandArgs = [
174
+ "-Djava.awt.headless=true",
175
+ "-Dapple.awt.UIElement=true",
176
+ "-jar",
177
+ jarPath,
178
+ ...args
179
+ ];
174
180
  const javaProcess = (0, import_child_process.spawn)(command, commandArgs);
175
181
  let stdout = "";
176
182
  let stderr = "";
@@ -216,6 +222,7 @@ function executeJar(args, executionOptions = {}) {
216
222
 
217
223
  ${errorOutput}`
218
224
  );
225
+ error.isJavaExit = true;
219
226
  reject(error);
220
227
  }
221
228
  });
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../node_modules/.pnpm/tsup@8.5.1_postcss@8.5.9_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js","../src/convert-options.generated.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// Shim globals in cjs bundle\n// There's a weird bug that esbuild will always inject importMetaUrl\n// if we export it as `const importMetaUrl = ... __filename ...`\n// But using a function will not cause this issue\n\nconst getImportMetaUrl = () => \n typeof document === \"undefined\" \n ? new URL(`file:${__filename}`).href \n : (document.currentScript && document.currentScript.tagName.toUpperCase() === 'SCRIPT') \n ? document.currentScript.src \n : new URL(\"main.js\", document.baseURI).href;\n\nexport const importMetaUrl = /* @__PURE__ */ getImportMetaUrl()\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKA,IAAM,mBAAmB,MACvB,OAAO,aAAa,cAChB,IAAI,IAAI,QAAQ,UAAU,EAAE,EAAE,OAC7B,SAAS,iBAAiB,SAAS,cAAc,QAAQ,YAAY,MAAM,WAC1E,SAAS,cAAc,MACvB,IAAI,IAAI,WAAW,SAAS,OAAO,EAAE;AAEtC,IAAM,gBAAgC,iCAAiB;;;ADZ9D,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,4BAA8B;AAC9B,iBAA8B;;;AE4MvB,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AF/SA,IAAMA,kBAAa,0BAAc,aAAe;AAChD,IAAM,YAAiB,aAAQA,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AAEO,SAAS,QACd,YACA,UAA0B,CAAC,GACV;AACjB,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,WAAO,QAAQ,OAAO,WAAW;AAAA,EACnC;AAIA,SAAO,WAAW,aAAa,EAAE,cAAc,MAAM,CAAC;AACxD;AAaA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;AAwBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,UAAQ;AAAA,IACN;AAAA,EACF;AAGA,QAAM,UAAoB,CAAC;AAC3B,MAAI,CAAC,QAAQ,QAAQ;AACnB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,QAAQ,oBAAoB;AAC9B,cAAQ,KAAK,sBAAsB;AAAA,IACrC,WAAW,QAAQ,gBAAgB;AACjC,cAAQ,KAAK,oBAAoB;AAAA,IACnC,OAAO;AACL,cAAQ,KAAK,UAAU;AAAA,IACzB;AAAA,EACF;AACA,MAAI,QAAQ,cAAc;AACxB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,sBAAsB;AAChC,YAAQ,KAAK,KAAK;AAAA,EACpB;AAEA,SAAO,QAAQ,WAAW;AAAA,IACxB,WAAW,QAAQ;AAAA,IACnB,UAAU,QAAQ;AAAA,IAClB,qBAAqB,QAAQ;AAAA,IAC7B,gBAAgB,QAAQ;AAAA,IACxB,kBAAkB,QAAQ;AAAA,IAC1B,eAAe,QAAQ;AAAA,IACvB,QAAQ,QAAQ,SAAS,IAAI,UAAU;AAAA,IACvC,OAAO,CAAC,QAAQ;AAAA,EAClB,CAAC;AACH;","names":["__filename"]}
1
+ {"version":3,"sources":["../src/index.ts","../node_modules/.pnpm/tsup@8.5.1_postcss@8.5.12_tsx@4.20.5_typescript@6.0.2/node_modules/tsup/assets/cjs_shims.js","../src/convert-options.generated.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n // Force headless AWT so macOS doesn't surface a Dock icon (and steal focus)\n // every time the JVM touches ImageIO/PDFBox rendering. Safe on all OSes —\n // the CLI never opens a UI window, only manipulates BufferedImages.\n const commandArgs = [\n '-Djava.awt.headless=true',\n '-Dapple.awt.UIElement=true',\n '-jar',\n jarPath,\n ...args,\n ];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n // Tag so the CLI can suppress re-printing this message — Java's\n // stderr was already streamed live to the parent in CLI mode, and\n // re-printing risks leaking anything sensitive Java logged\n // (e.g. a --password value echoed by an underlying library).\n (error as Error & { isJavaExit?: boolean }).isJavaExit = true;\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// Shim globals in cjs bundle\n// There's a weird bug that esbuild will always inject importMetaUrl\n// if we export it as `const importMetaUrl = ... __filename ...`\n// But using a function will not cause this issue\n\nconst getImportMetaUrl = () => \n typeof document === \"undefined\" \n ? new URL(`file:${__filename}`).href \n : (document.currentScript && document.currentScript.tagName.toUpperCase() === 'SCRIPT') \n ? document.currentScript.src \n : new URL(\"main.js\", document.baseURI).href;\n\nexport const importMetaUrl = /* @__PURE__ */ getImportMetaUrl()\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACKA,IAAM,mBAAmB,MACvB,OAAO,aAAa,cAChB,IAAI,IAAI,QAAQ,UAAU,EAAE,EAAE,OAC7B,SAAS,iBAAiB,SAAS,cAAc,QAAQ,YAAY,MAAM,WAC1E,SAAS,cAAc,MACvB,IAAI,IAAI,WAAW,SAAS,OAAO,EAAE;AAEtC,IAAM,gBAAgC,iCAAiB;;;ADZ9D,2BAAsB;AACtB,WAAsB;AACtB,SAAoB;AACpB,4BAA8B;AAC9B,iBAA8B;;;AE4MvB,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AF/SA,IAAMA,kBAAa,0BAAc,aAAe;AAChD,IAAM,YAAiB,aAAQA,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAK,WAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAIhB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG;AAAA,IACL;AAEA,UAAM,kBAAc,4BAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,oCAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AAKA,QAAC,MAA2C,aAAa;AACzD,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AAEO,SAAS,QACd,YACA,UAA0B,CAAC,GACV;AACjB,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,WAAO,QAAQ,OAAO,WAAW;AAAA,EACnC;AAIA,SAAO,WAAW,aAAa,EAAE,cAAc,MAAM,CAAC;AACxD;AAaA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;AAwBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,UAAQ;AAAA,IACN;AAAA,EACF;AAGA,QAAM,UAAoB,CAAC;AAC3B,MAAI,CAAC,QAAQ,QAAQ;AACnB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,QAAQ,oBAAoB;AAC9B,cAAQ,KAAK,sBAAsB;AAAA,IACrC,WAAW,QAAQ,gBAAgB;AACjC,cAAQ,KAAK,oBAAoB;AAAA,IACnC,OAAO;AACL,cAAQ,KAAK,UAAU;AAAA,IACzB;AAAA,EACF;AACA,MAAI,QAAQ,cAAc;AACxB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,sBAAsB;AAChC,YAAQ,KAAK,KAAK;AAAA,EACpB;AAEA,SAAO,QAAQ,WAAW;AAAA,IACxB,WAAW,QAAQ;AAAA,IACnB,UAAU,QAAQ;AAAA,IAClB,qBAAqB,QAAQ;AAAA,IAC7B,gBAAgB,QAAQ;AAAA,IACxB,kBAAkB,QAAQ;AAAA,IAC1B,eAAe,QAAQ;AAAA,IACvB,QAAQ,QAAQ,SAAS,IAAI,UAAU;AAAA,IACvC,OAAO,CAAC,QAAQ;AAAA,EAClB,CAAC;AACH;","names":["__filename"]}
package/dist/index.js CHANGED
@@ -127,7 +127,13 @@ function executeJar(args, executionOptions = {}) {
127
127
  );
128
128
  }
129
129
  const command = "java";
130
- const commandArgs = ["-jar", jarPath, ...args];
130
+ const commandArgs = [
131
+ "-Djava.awt.headless=true",
132
+ "-Dapple.awt.UIElement=true",
133
+ "-jar",
134
+ jarPath,
135
+ ...args
136
+ ];
131
137
  const javaProcess = spawn(command, commandArgs);
132
138
  let stdout = "";
133
139
  let stderr = "";
@@ -173,6 +179,7 @@ function executeJar(args, executionOptions = {}) {
173
179
 
174
180
  ${errorOutput}`
175
181
  );
182
+ error.isJavaExit = true;
176
183
  reject(error);
177
184
  }
178
185
  });
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/index.ts","../src/convert-options.generated.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n const commandArgs = ['-jar', jarPath, ...args];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n"],"mappings":";AAAA,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAC9B,SAAS,qBAAqB;;;AC4MvB,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AD/SA,IAAMA,cAAa,cAAc,YAAY,GAAG;AAChD,IAAMC,aAAiB,aAAQD,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAKC,YAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAChB,UAAM,cAAc,CAAC,QAAQ,SAAS,GAAG,IAAI;AAE7C,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AACA,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AAEO,SAAS,QACd,YACA,UAA0B,CAAC,GACV;AACjB,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,WAAO,QAAQ,OAAO,WAAW;AAAA,EACnC;AAIA,SAAO,WAAW,aAAa,EAAE,cAAc,MAAM,CAAC;AACxD;AAaA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;AAwBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,UAAQ;AAAA,IACN;AAAA,EACF;AAGA,QAAM,UAAoB,CAAC;AAC3B,MAAI,CAAC,QAAQ,QAAQ;AACnB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,QAAQ,oBAAoB;AAC9B,cAAQ,KAAK,sBAAsB;AAAA,IACrC,WAAW,QAAQ,gBAAgB;AACjC,cAAQ,KAAK,oBAAoB;AAAA,IACnC,OAAO;AACL,cAAQ,KAAK,UAAU;AAAA,IACzB;AAAA,EACF;AACA,MAAI,QAAQ,cAAc;AACxB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,sBAAsB;AAChC,YAAQ,KAAK,KAAK;AAAA,EACpB;AAEA,SAAO,QAAQ,WAAW;AAAA,IACxB,WAAW,QAAQ;AAAA,IACnB,UAAU,QAAQ;AAAA,IAClB,qBAAqB,QAAQ;AAAA,IAC7B,gBAAgB,QAAQ;AAAA,IACxB,kBAAkB,QAAQ;AAAA,IAC1B,eAAe,QAAQ;AAAA,IACvB,QAAQ,QAAQ,SAAS,IAAI,UAAU;AAAA,IACvC,OAAO,CAAC,QAAQ;AAAA,EAClB,CAAC;AACH;","names":["__filename","__dirname"]}
1
+ {"version":3,"sources":["../src/index.ts","../src/convert-options.generated.ts"],"sourcesContent":["import { spawn } from 'child_process';\nimport * as path from 'path';\nimport * as fs from 'fs';\nimport { StringDecoder } from 'string_decoder';\nimport { fileURLToPath } from 'url';\n\n// Re-export types and utilities from auto-generated file\nexport type { ConvertOptions } from './convert-options.generated.js';\nexport { buildArgs } from './convert-options.generated.js';\nimport type { ConvertOptions } from './convert-options.generated.js';\nimport { buildArgs } from './convert-options.generated.js';\n\nconst __filename = fileURLToPath(import.meta.url);\nconst __dirname = path.dirname(__filename);\n\nconst JAR_NAME = 'opendataloader-pdf-cli.jar';\n\ninterface JarExecutionOptions {\n /**\n * When true, forwards Java's stdout and stderr chunks to the parent\n * process in real time as well as accumulating them. Used by the bundled\n * CLI so long-running conversions show progress as it happens.\n */\n streamOutput?: boolean;\n}\n\nfunction executeJar(args: string[], executionOptions: JarExecutionOptions = {}): Promise<string> {\n const { streamOutput = false } = executionOptions;\n\n return new Promise((resolve, reject) => {\n const jarPath = path.join(__dirname, '..', 'lib', JAR_NAME);\n\n if (!fs.existsSync(jarPath)) {\n return reject(\n new Error(`JAR file not found at ${jarPath}. Please run the build script first.`),\n );\n }\n\n const command = 'java';\n // Force headless AWT so macOS doesn't surface a Dock icon (and steal focus)\n // every time the JVM touches ImageIO/PDFBox rendering. Safe on all OSes —\n // the CLI never opens a UI window, only manipulates BufferedImages.\n const commandArgs = [\n '-Djava.awt.headless=true',\n '-Dapple.awt.UIElement=true',\n '-jar',\n jarPath,\n ...args,\n ];\n\n const javaProcess = spawn(command, commandArgs);\n\n let stdout = '';\n let stderr = '';\n // StringDecoder buffers incomplete multi-byte UTF-8 sequences across\n // chunk boundaries — Buffer.toString() alone would emit replacement\n // characters when, e.g., a 3-byte Korean codepoint splits across two\n // 'data' events. One decoder per stream so they don't share state.\n const stdoutDecoder = new StringDecoder('utf8');\n const stderrDecoder = new StringDecoder('utf8');\n\n javaProcess.stdout.on('data', (data: Buffer) => {\n const chunk = stdoutDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n // Stream-only: don't also accumulate, or a multi-hour conversion\n // would buffer its entire (potentially gigabyte) stdout in memory\n // for no consumer.\n process.stdout.write(chunk);\n } else {\n stdout += chunk;\n }\n });\n\n javaProcess.stderr.on('data', (data: Buffer) => {\n const chunk = stderrDecoder.write(data);\n if (chunk.length === 0) return;\n if (streamOutput) {\n process.stderr.write(chunk);\n }\n // stderr is always accumulated (progress logs are small and we need\n // them for error messages on non-zero exit).\n stderr += chunk;\n });\n\n javaProcess.on('close', (code) => {\n // Flush any trailing bytes the decoder is still holding (always emit\n // them — if we drop them on error paths, error messages with non-ASCII\n // characters lose their tail).\n const stdoutTail = stdoutDecoder.end();\n const stderrTail = stderrDecoder.end();\n if (stdoutTail.length > 0) {\n if (streamOutput) {\n process.stdout.write(stdoutTail);\n } else {\n stdout += stdoutTail;\n }\n }\n if (stderrTail.length > 0) {\n if (streamOutput) process.stderr.write(stderrTail);\n stderr += stderrTail;\n }\n\n if (code === 0) {\n resolve(stdout);\n } else {\n const errorOutput = stderr || stdout;\n const error = new Error(\n `The opendataloader-pdf CLI exited with code ${code}.\\n\\n${errorOutput}`,\n );\n // Tag so the CLI can suppress re-printing this message — Java's\n // stderr was already streamed live to the parent in CLI mode, and\n // re-printing risks leaking anything sensitive Java logged\n // (e.g. a --password value echoed by an underlying library).\n (error as Error & { isJavaExit?: boolean }).isJavaExit = true;\n reject(error);\n }\n });\n\n javaProcess.on('error', (err: Error) => {\n if (err.message.includes('ENOENT')) {\n reject(\n new Error(\n \"'java' command not found. Please ensure Java is installed and in your system's PATH.\",\n ),\n );\n } else {\n reject(err);\n }\n });\n });\n}\n\nfunction buildJarArgs(\n inputPaths: string | string[],\n options: ConvertOptions,\n): string[] | Error {\n const inputList = Array.isArray(inputPaths) ? inputPaths : [inputPaths];\n if (inputList.length === 0) {\n return new Error('At least one input path must be provided.');\n }\n\n for (const input of inputList) {\n if (!fs.existsSync(input)) {\n return new Error(`Input file or folder not found: ${input}`);\n }\n }\n\n return [...inputList, ...buildArgs(options)];\n}\n\nexport function convert(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<string> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n return Promise.reject(argsOrError);\n }\n // Library API: never streams to the parent process. Returns the full stdout\n // string so callers can do `const out = await convert(...)` without surprise\n // side-effects on process.stdout / process.stderr.\n return executeJar(argsOrError, { streamOutput: false });\n}\n\n/**\n * Internal entry point used by the bundled CLI. Streams Java's stdout and\n * stderr to the parent process in real time (so long-running conversions like\n * hybrid mode show progress as it happens) and resolves without a stdout\n * payload — preventing the caller from re-printing what was already streamed.\n *\n * Not part of the public API: do not import this from application code. Use\n * {@link convert} instead.\n *\n * @internal\n */\nexport async function _runForCli(\n inputPaths: string | string[],\n options: ConvertOptions = {},\n): Promise<void> {\n const argsOrError = buildJarArgs(inputPaths, options);\n if (argsOrError instanceof Error) {\n throw argsOrError;\n }\n await executeJar(argsOrError, { streamOutput: true });\n}\n\n/**\n * @deprecated Use `convert()` and `ConvertOptions` instead. This function will be removed in a future version.\n */\nexport interface RunOptions {\n outputFolder?: string;\n password?: string;\n replaceInvalidChars?: string;\n generateMarkdown?: boolean;\n generateHtml?: boolean;\n generateAnnotatedPdf?: boolean;\n keepLineBreaks?: boolean;\n contentSafetyOff?: string;\n htmlInMarkdown?: boolean;\n addImageToMarkdown?: boolean;\n noJson?: boolean;\n debug?: boolean;\n useStructTree?: boolean;\n}\n\n/**\n * @deprecated Use `convert()` instead. This function will be removed in a future version.\n */\nexport function run(inputPath: string, options: RunOptions = {}): Promise<string> {\n console.warn(\n 'Warning: run() is deprecated and will be removed in a future version. Use convert() instead.',\n );\n\n // Build format array based on legacy boolean options\n const formats: string[] = [];\n if (!options.noJson) {\n formats.push('json');\n }\n if (options.generateMarkdown) {\n if (options.addImageToMarkdown) {\n formats.push('markdown-with-images');\n } else if (options.htmlInMarkdown) {\n formats.push('markdown-with-html');\n } else {\n formats.push('markdown');\n }\n }\n if (options.generateHtml) {\n formats.push('html');\n }\n if (options.generateAnnotatedPdf) {\n formats.push('pdf');\n }\n\n return convert(inputPath, {\n outputDir: options.outputFolder,\n password: options.password,\n replaceInvalidChars: options.replaceInvalidChars,\n keepLineBreaks: options.keepLineBreaks,\n contentSafetyOff: options.contentSafetyOff,\n useStructTree: options.useStructTree,\n format: formats.length > 0 ? formats : undefined,\n quiet: !options.debug,\n });\n}\n","// AUTO-GENERATED FROM options.json - DO NOT EDIT DIRECTLY\n// Run `npm run generate-options` to regenerate\n\n/**\n * Options for the convert function.\n */\nexport interface ConvertOptions {\n /** Directory where output files are written. Default: input file directory */\n outputDir?: string;\n /** Password for encrypted PDF files */\n password?: string;\n /** Output formats (comma-separated). Values: json, text, html, pdf, markdown, markdown-with-html, markdown-with-images, tagged-pdf. Default: json */\n format?: string | string[];\n /** Suppress console logging output */\n quiet?: boolean;\n /** Disable content safety filters. Values: all, hidden-text, off-page, tiny, hidden-ocg */\n contentSafetyOff?: string | string[];\n /** Enable sensitive data sanitization. Replaces emails, phone numbers, IPs, credit cards, and URLs with placeholders */\n sanitize?: boolean;\n /** Preserve original line breaks in extracted text */\n keepLineBreaks?: boolean;\n /** Replacement character for invalid/unrecognized characters. Default: space */\n replaceInvalidChars?: string;\n /** Use PDF structure tree (tagged PDF) for reading order and semantic structure */\n useStructTree?: boolean;\n /** Table detection method. Values: default (border-based), cluster (border + cluster). Default: default */\n tableMethod?: string;\n /** Reading order algorithm. Values: off, xycut. Default: xycut */\n readingOrder?: string;\n /** Separator between pages in Markdown output. Use %page-number% for page numbers. Default: none */\n markdownPageSeparator?: string;\n /** Separator between pages in text output. Use %page-number% for page numbers. Default: none */\n textPageSeparator?: string;\n /** Separator between pages in HTML output. Use %page-number% for page numbers. Default: none */\n htmlPageSeparator?: string;\n /** Image output mode. Values: off (no images), embedded (Base64 data URIs), external (file references). Default: external */\n imageOutput?: string;\n /** Output format for extracted images. Values: png, jpeg. Default: png */\n imageFormat?: string;\n /** Directory for extracted images */\n imageDir?: string;\n /** Pages to extract (e.g., \"1,3,5-7\"). Default: all pages */\n pages?: string;\n /** Include page headers and footers in output */\n includeHeaderFooter?: boolean;\n /** Detect strikethrough text and wrap with ~~ in Markdown output or <del></del> tag in HTML output (experimental) */\n detectStrikethrough?: boolean;\n /** Hybrid backend (requires a running server). Quick start: pip install \"opendataloader-pdf[hybrid]\" && opendataloader-pdf-hybrid --port 5002. For remote servers use --hybrid-url. Values: off (default), docling-fast, hancom-ai */\n hybrid?: string;\n /** Hybrid triage mode. Values: auto (default, dynamic triage), full (skip triage, all pages to backend) */\n hybridMode?: string;\n /** Hybrid backend server URL (overrides default) */\n hybridUrl?: string;\n /** Hybrid backend request timeout in milliseconds (0 = no timeout). Default: 0 */\n hybridTimeout?: string;\n /** Opt in to Java fallback on hybrid backend error (default: disabled) */\n hybridFallback?: boolean;\n /** DLA label 7 (regionlist) handling. Requires --hybrid=hancom-ai. Values: table-first (default; check TSR overlap), list-only (skip TSR, always treat as list) */\n hybridHancomAiRegionlistStrategy?: string;\n /** OCR strategy. Requires --hybrid=hancom-ai. Values: off (stream-only), auto (default; stream first, OCR fallback), force (OCR-only) */\n hybridHancomAiOcrStrategy?: string;\n /** Page image cache backing. Requires --hybrid=hancom-ai. Values: memory (default), disk */\n hybridHancomAiImageCache?: string;\n /** Write output to stdout instead of file (single format only) */\n toStdout?: boolean;\n /** Number of worker threads for per-page processing. Default: 1 (sequential, stable). Values >1 (experimental) run pages in parallel for faster throughput; output may vary slightly on some PDFs. Capped at the number of available CPU cores. Applies to the native Java pipeline only; ignored in --hybrid mode */\n threads?: string;\n}\n\n/**\n * Options as parsed from CLI (all values are strings from commander).\n */\nexport interface CliOptions {\n outputDir?: string;\n password?: string;\n format?: string;\n quiet?: boolean;\n contentSafetyOff?: string;\n sanitize?: boolean;\n keepLineBreaks?: boolean;\n replaceInvalidChars?: string;\n useStructTree?: boolean;\n tableMethod?: string;\n readingOrder?: string;\n markdownPageSeparator?: string;\n textPageSeparator?: string;\n htmlPageSeparator?: string;\n imageOutput?: string;\n imageFormat?: string;\n imageDir?: string;\n pages?: string;\n includeHeaderFooter?: boolean;\n detectStrikethrough?: boolean;\n hybrid?: string;\n hybridMode?: string;\n hybridUrl?: string;\n hybridTimeout?: string;\n hybridFallback?: boolean;\n hybridHancomAiRegionlistStrategy?: string;\n hybridHancomAiOcrStrategy?: string;\n hybridHancomAiImageCache?: string;\n toStdout?: boolean;\n threads?: string;\n}\n\n/**\n * Convert CLI options to ConvertOptions.\n */\nexport function buildConvertOptions(cliOptions: CliOptions): ConvertOptions {\n const convertOptions: ConvertOptions = {};\n\n if (cliOptions.outputDir) {\n convertOptions.outputDir = cliOptions.outputDir;\n }\n if (cliOptions.password) {\n convertOptions.password = cliOptions.password;\n }\n if (cliOptions.format) {\n convertOptions.format = cliOptions.format;\n }\n if (cliOptions.quiet) {\n convertOptions.quiet = true;\n }\n if (cliOptions.contentSafetyOff) {\n convertOptions.contentSafetyOff = cliOptions.contentSafetyOff;\n }\n if (cliOptions.sanitize) {\n convertOptions.sanitize = true;\n }\n if (cliOptions.keepLineBreaks) {\n convertOptions.keepLineBreaks = true;\n }\n if (cliOptions.replaceInvalidChars) {\n convertOptions.replaceInvalidChars = cliOptions.replaceInvalidChars;\n }\n if (cliOptions.useStructTree) {\n convertOptions.useStructTree = true;\n }\n if (cliOptions.tableMethod) {\n convertOptions.tableMethod = cliOptions.tableMethod;\n }\n if (cliOptions.readingOrder) {\n convertOptions.readingOrder = cliOptions.readingOrder;\n }\n if (cliOptions.markdownPageSeparator) {\n convertOptions.markdownPageSeparator = cliOptions.markdownPageSeparator;\n }\n if (cliOptions.textPageSeparator) {\n convertOptions.textPageSeparator = cliOptions.textPageSeparator;\n }\n if (cliOptions.htmlPageSeparator) {\n convertOptions.htmlPageSeparator = cliOptions.htmlPageSeparator;\n }\n if (cliOptions.imageOutput) {\n convertOptions.imageOutput = cliOptions.imageOutput;\n }\n if (cliOptions.imageFormat) {\n convertOptions.imageFormat = cliOptions.imageFormat;\n }\n if (cliOptions.imageDir) {\n convertOptions.imageDir = cliOptions.imageDir;\n }\n if (cliOptions.pages) {\n convertOptions.pages = cliOptions.pages;\n }\n if (cliOptions.includeHeaderFooter) {\n convertOptions.includeHeaderFooter = true;\n }\n if (cliOptions.detectStrikethrough) {\n convertOptions.detectStrikethrough = true;\n }\n if (cliOptions.hybrid) {\n convertOptions.hybrid = cliOptions.hybrid;\n }\n if (cliOptions.hybridMode) {\n convertOptions.hybridMode = cliOptions.hybridMode;\n }\n if (cliOptions.hybridUrl) {\n convertOptions.hybridUrl = cliOptions.hybridUrl;\n }\n if (cliOptions.hybridTimeout) {\n convertOptions.hybridTimeout = cliOptions.hybridTimeout;\n }\n if (cliOptions.hybridFallback) {\n convertOptions.hybridFallback = true;\n }\n if (cliOptions.hybridHancomAiRegionlistStrategy) {\n convertOptions.hybridHancomAiRegionlistStrategy = cliOptions.hybridHancomAiRegionlistStrategy;\n }\n if (cliOptions.hybridHancomAiOcrStrategy) {\n convertOptions.hybridHancomAiOcrStrategy = cliOptions.hybridHancomAiOcrStrategy;\n }\n if (cliOptions.hybridHancomAiImageCache) {\n convertOptions.hybridHancomAiImageCache = cliOptions.hybridHancomAiImageCache;\n }\n if (cliOptions.toStdout) {\n convertOptions.toStdout = true;\n }\n if (cliOptions.threads) {\n convertOptions.threads = cliOptions.threads;\n }\n\n return convertOptions;\n}\n\n/**\n * Build CLI arguments array from ConvertOptions.\n */\nexport function buildArgs(options: ConvertOptions): string[] {\n const args: string[] = [];\n\n if (options.outputDir) {\n args.push('--output-dir', options.outputDir);\n }\n if (options.password) {\n args.push('--password', options.password);\n }\n if (options.format) {\n if (Array.isArray(options.format)) {\n if (options.format.length > 0) {\n args.push('--format', options.format.join(','));\n }\n } else {\n args.push('--format', options.format);\n }\n }\n if (options.quiet) {\n args.push('--quiet');\n }\n if (options.contentSafetyOff) {\n if (Array.isArray(options.contentSafetyOff)) {\n if (options.contentSafetyOff.length > 0) {\n args.push('--content-safety-off', options.contentSafetyOff.join(','));\n }\n } else {\n args.push('--content-safety-off', options.contentSafetyOff);\n }\n }\n if (options.sanitize) {\n args.push('--sanitize');\n }\n if (options.keepLineBreaks) {\n args.push('--keep-line-breaks');\n }\n if (options.replaceInvalidChars) {\n args.push('--replace-invalid-chars', options.replaceInvalidChars);\n }\n if (options.useStructTree) {\n args.push('--use-struct-tree');\n }\n if (options.tableMethod) {\n args.push('--table-method', options.tableMethod);\n }\n if (options.readingOrder) {\n args.push('--reading-order', options.readingOrder);\n }\n if (options.markdownPageSeparator) {\n args.push('--markdown-page-separator', options.markdownPageSeparator);\n }\n if (options.textPageSeparator) {\n args.push('--text-page-separator', options.textPageSeparator);\n }\n if (options.htmlPageSeparator) {\n args.push('--html-page-separator', options.htmlPageSeparator);\n }\n if (options.imageOutput) {\n args.push('--image-output', options.imageOutput);\n }\n if (options.imageFormat) {\n args.push('--image-format', options.imageFormat);\n }\n if (options.imageDir) {\n args.push('--image-dir', options.imageDir);\n }\n if (options.pages) {\n args.push('--pages', options.pages);\n }\n if (options.includeHeaderFooter) {\n args.push('--include-header-footer');\n }\n if (options.detectStrikethrough) {\n args.push('--detect-strikethrough');\n }\n if (options.hybrid) {\n args.push('--hybrid', options.hybrid);\n }\n if (options.hybridMode) {\n args.push('--hybrid-mode', options.hybridMode);\n }\n if (options.hybridUrl) {\n args.push('--hybrid-url', options.hybridUrl);\n }\n if (options.hybridTimeout) {\n args.push('--hybrid-timeout', options.hybridTimeout);\n }\n if (options.hybridFallback) {\n args.push('--hybrid-fallback');\n }\n if (options.hybridHancomAiRegionlistStrategy) {\n args.push('--hybrid-hancom-ai-regionlist-strategy', options.hybridHancomAiRegionlistStrategy);\n }\n if (options.hybridHancomAiOcrStrategy) {\n args.push('--hybrid-hancom-ai-ocr-strategy', options.hybridHancomAiOcrStrategy);\n }\n if (options.hybridHancomAiImageCache) {\n args.push('--hybrid-hancom-ai-image-cache', options.hybridHancomAiImageCache);\n }\n if (options.toStdout) {\n args.push('--to-stdout');\n }\n if (options.threads) {\n args.push('--threads', options.threads);\n }\n\n return args;\n}\n"],"mappings":";AAAA,SAAS,aAAa;AACtB,YAAY,UAAU;AACtB,YAAY,QAAQ;AACpB,SAAS,qBAAqB;AAC9B,SAAS,qBAAqB;;;AC4MvB,SAAS,UAAU,SAAmC;AAC3D,QAAM,OAAiB,CAAC;AAExB,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,cAAc,QAAQ,QAAQ;AAAA,EAC1C;AACA,MAAI,QAAQ,QAAQ;AAClB,QAAI,MAAM,QAAQ,QAAQ,MAAM,GAAG;AACjC,UAAI,QAAQ,OAAO,SAAS,GAAG;AAC7B,aAAK,KAAK,YAAY,QAAQ,OAAO,KAAK,GAAG,CAAC;AAAA,MAChD;AAAA,IACF,OAAO;AACL,WAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,IACtC;AAAA,EACF;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,SAAS;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,MAAM,QAAQ,QAAQ,gBAAgB,GAAG;AAC3C,UAAI,QAAQ,iBAAiB,SAAS,GAAG;AACvC,aAAK,KAAK,wBAAwB,QAAQ,iBAAiB,KAAK,GAAG,CAAC;AAAA,MACtE;AAAA,IACF,OAAO;AACL,WAAK,KAAK,wBAAwB,QAAQ,gBAAgB;AAAA,IAC5D;AAAA,EACF;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,YAAY;AAAA,EACxB;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,oBAAoB;AAAA,EAChC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,2BAA2B,QAAQ,mBAAmB;AAAA,EAClE;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,cAAc;AACxB,SAAK,KAAK,mBAAmB,QAAQ,YAAY;AAAA,EACnD;AACA,MAAI,QAAQ,uBAAuB;AACjC,SAAK,KAAK,6BAA6B,QAAQ,qBAAqB;AAAA,EACtE;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,mBAAmB;AAC7B,SAAK,KAAK,yBAAyB,QAAQ,iBAAiB;AAAA,EAC9D;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,aAAa;AACvB,SAAK,KAAK,kBAAkB,QAAQ,WAAW;AAAA,EACjD;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,eAAe,QAAQ,QAAQ;AAAA,EAC3C;AACA,MAAI,QAAQ,OAAO;AACjB,SAAK,KAAK,WAAW,QAAQ,KAAK;AAAA,EACpC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,yBAAyB;AAAA,EACrC;AACA,MAAI,QAAQ,qBAAqB;AAC/B,SAAK,KAAK,wBAAwB;AAAA,EACpC;AACA,MAAI,QAAQ,QAAQ;AAClB,SAAK,KAAK,YAAY,QAAQ,MAAM;AAAA,EACtC;AACA,MAAI,QAAQ,YAAY;AACtB,SAAK,KAAK,iBAAiB,QAAQ,UAAU;AAAA,EAC/C;AACA,MAAI,QAAQ,WAAW;AACrB,SAAK,KAAK,gBAAgB,QAAQ,SAAS;AAAA,EAC7C;AACA,MAAI,QAAQ,eAAe;AACzB,SAAK,KAAK,oBAAoB,QAAQ,aAAa;AAAA,EACrD;AACA,MAAI,QAAQ,gBAAgB;AAC1B,SAAK,KAAK,mBAAmB;AAAA,EAC/B;AACA,MAAI,QAAQ,kCAAkC;AAC5C,SAAK,KAAK,0CAA0C,QAAQ,gCAAgC;AAAA,EAC9F;AACA,MAAI,QAAQ,2BAA2B;AACrC,SAAK,KAAK,mCAAmC,QAAQ,yBAAyB;AAAA,EAChF;AACA,MAAI,QAAQ,0BAA0B;AACpC,SAAK,KAAK,kCAAkC,QAAQ,wBAAwB;AAAA,EAC9E;AACA,MAAI,QAAQ,UAAU;AACpB,SAAK,KAAK,aAAa;AAAA,EACzB;AACA,MAAI,QAAQ,SAAS;AACnB,SAAK,KAAK,aAAa,QAAQ,OAAO;AAAA,EACxC;AAEA,SAAO;AACT;;;AD/SA,IAAMA,cAAa,cAAc,YAAY,GAAG;AAChD,IAAMC,aAAiB,aAAQD,WAAU;AAEzC,IAAM,WAAW;AAWjB,SAAS,WAAW,MAAgB,mBAAwC,CAAC,GAAoB;AAC/F,QAAM,EAAE,eAAe,MAAM,IAAI;AAEjC,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,UAAe,UAAKC,YAAW,MAAM,OAAO,QAAQ;AAE1D,QAAI,CAAI,cAAW,OAAO,GAAG;AAC3B,aAAO;AAAA,QACL,IAAI,MAAM,yBAAyB,OAAO,sCAAsC;AAAA,MAClF;AAAA,IACF;AAEA,UAAM,UAAU;AAIhB,UAAM,cAAc;AAAA,MAClB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA,GAAG;AAAA,IACL;AAEA,UAAM,cAAc,MAAM,SAAS,WAAW;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAKb,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAC9C,UAAM,gBAAgB,IAAI,cAAc,MAAM;AAE9C,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAIhB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B,OAAO;AACL,kBAAU;AAAA,MACZ;AAAA,IACF,CAAC;AAED,gBAAY,OAAO,GAAG,QAAQ,CAAC,SAAiB;AAC9C,YAAM,QAAQ,cAAc,MAAM,IAAI;AACtC,UAAI,MAAM,WAAW,EAAG;AACxB,UAAI,cAAc;AAChB,gBAAQ,OAAO,MAAM,KAAK;AAAA,MAC5B;AAGA,gBAAU;AAAA,IACZ,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,SAAS;AAIhC,YAAM,aAAa,cAAc,IAAI;AACrC,YAAM,aAAa,cAAc,IAAI;AACrC,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,cAAc;AAChB,kBAAQ,OAAO,MAAM,UAAU;AAAA,QACjC,OAAO;AACL,oBAAU;AAAA,QACZ;AAAA,MACF;AACA,UAAI,WAAW,SAAS,GAAG;AACzB,YAAI,aAAc,SAAQ,OAAO,MAAM,UAAU;AACjD,kBAAU;AAAA,MACZ;AAEA,UAAI,SAAS,GAAG;AACd,gBAAQ,MAAM;AAAA,MAChB,OAAO;AACL,cAAM,cAAc,UAAU;AAC9B,cAAM,QAAQ,IAAI;AAAA,UAChB,+CAA+C,IAAI;AAAA;AAAA,EAAQ,WAAW;AAAA,QACxE;AAKA,QAAC,MAA2C,aAAa;AACzD,eAAO,KAAK;AAAA,MACd;AAAA,IACF,CAAC;AAED,gBAAY,GAAG,SAAS,CAAC,QAAe;AACtC,UAAI,IAAI,QAAQ,SAAS,QAAQ,GAAG;AAClC;AAAA,UACE,IAAI;AAAA,YACF;AAAA,UACF;AAAA,QACF;AAAA,MACF,OAAO;AACL,eAAO,GAAG;AAAA,MACZ;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAEA,SAAS,aACP,YACA,SACkB;AAClB,QAAM,YAAY,MAAM,QAAQ,UAAU,IAAI,aAAa,CAAC,UAAU;AACtE,MAAI,UAAU,WAAW,GAAG;AAC1B,WAAO,IAAI,MAAM,2CAA2C;AAAA,EAC9D;AAEA,aAAW,SAAS,WAAW;AAC7B,QAAI,CAAI,cAAW,KAAK,GAAG;AACzB,aAAO,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC7D;AAAA,EACF;AAEA,SAAO,CAAC,GAAG,WAAW,GAAG,UAAU,OAAO,CAAC;AAC7C;AAEO,SAAS,QACd,YACA,UAA0B,CAAC,GACV;AACjB,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,WAAO,QAAQ,OAAO,WAAW;AAAA,EACnC;AAIA,SAAO,WAAW,aAAa,EAAE,cAAc,MAAM,CAAC;AACxD;AAaA,eAAsB,WACpB,YACA,UAA0B,CAAC,GACZ;AACf,QAAM,cAAc,aAAa,YAAY,OAAO;AACpD,MAAI,uBAAuB,OAAO;AAChC,UAAM;AAAA,EACR;AACA,QAAM,WAAW,aAAa,EAAE,cAAc,KAAK,CAAC;AACtD;AAwBO,SAAS,IAAI,WAAmB,UAAsB,CAAC,GAAoB;AAChF,UAAQ;AAAA,IACN;AAAA,EACF;AAGA,QAAM,UAAoB,CAAC;AAC3B,MAAI,CAAC,QAAQ,QAAQ;AACnB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,kBAAkB;AAC5B,QAAI,QAAQ,oBAAoB;AAC9B,cAAQ,KAAK,sBAAsB;AAAA,IACrC,WAAW,QAAQ,gBAAgB;AACjC,cAAQ,KAAK,oBAAoB;AAAA,IACnC,OAAO;AACL,cAAQ,KAAK,UAAU;AAAA,IACzB;AAAA,EACF;AACA,MAAI,QAAQ,cAAc;AACxB,YAAQ,KAAK,MAAM;AAAA,EACrB;AACA,MAAI,QAAQ,sBAAsB;AAChC,YAAQ,KAAK,KAAK;AAAA,EACpB;AAEA,SAAO,QAAQ,WAAW;AAAA,IACxB,WAAW,QAAQ;AAAA,IACnB,UAAU,QAAQ;AAAA,IAClB,qBAAqB,QAAQ;AAAA,IAC7B,gBAAgB,QAAQ;AAAA,IACxB,kBAAkB,QAAQ;AAAA,IAC1B,eAAe,QAAQ;AAAA,IACvB,QAAQ,QAAQ,SAAS,IAAI,UAAU;AAAA,IACvC,OAAO,CAAC,QAAQ;AAAA,EAClB,CAAC;AACH;","names":["__filename","__dirname"]}
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@opendataloader/pdf",
3
- "version": "2.4.1",
3
+ "version": "2.4.3",
4
4
  "description": "A Node.js wrapper for the opendataloader-pdf Java CLI.",
5
5
  "main": "./dist/index.cjs",
6
6
  "module": "./dist/index.js",