npm - @robin7331/papyrus-cli - Versions diffs - 0.1.6 → 0.1.8 - Mend

@robin7331/papyrus-cli 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +20 -30
package/dist/cli.js +29 -27
package/dist/cliHelpers.d.ts +4 -7
package/dist/cliHelpers.js +20 -27
package/dist/openaiPdfToMarkdown.d.ts +1 -1
package/dist/openaiPdfToMarkdown.js +25 -29
package/package.json +1 -1
package/src/cli.ts +30 -28
package/src/cliHelpers.ts +21 -33
package/src/openaiPdfToMarkdown.ts +35 -37
package/test/cliHelpers.test.ts +32 -32

package/README.md CHANGED Viewed

@@ -27,20 +27,20 @@ papyrus --help
 # Show installed CLI version
 papyrus --version
-# Single file (auto mode; if no API key is found, Papyrus prompts you to paste one)
+# Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
 papyrus ./path/to/input.pdf
-# Single file with explicit format/output/model
+# Single file with explicit output extension/output/model
 papyrus ./path/to/input.pdf --format md --output ./out/result.md --model gpt-4o-mini
-# Auto mode with extra instructions
+# Default conversion with extra instructions
 papyrus ./path/to/input.pdf --instructions "Prioritize table accuracy." --format txt
-# Prompt mode (inline prompt)
-papyrus ./path/to/input.pdf --mode prompt --prompt "Extract all invoice line items as bullet points." --format md
+# Prompt conversion (inline prompt)
+papyrus ./path/to/input.pdf --prompt "Extract all invoice line items as bullet points." --format md
-# Prompt mode (prompt file)
-papyrus ./path/to/input.pdf --mode prompt --prompt-file ./my-prompt.txt --format txt
+# Prompt conversion (prompt file)
+papyrus ./path/to/input.pdf --prompt-file ./my-prompt.txt --format txt
 # Folder mode (recursive scan, asks for confirmation)
 papyrus ./path/to/folder
@@ -110,14 +110,14 @@ papyrus --version
 ### `--format <format>`
-Output format override:
-- `md` for GitHub-flavored Markdown
-- `txt` for plain text
+Output file extension override. Any extension is allowed (for example `md`, `txt`, `csv`, `json`).
+This flag controls the output filename extension only.
+When provided, Papyrus also passes the extension as a guidance hint to the model.
 Example:
 ```bash
-papyrus ./docs/invoice.pdf --format md
+papyrus ./docs/invoice.pdf --format csv
 ```
 ### `-o, --output <path>`
@@ -132,46 +132,34 @@ Example:
 papyrus ./docs --output ./converted
 ```
-### `--mode <mode>`
-Conversion mode:
-- `auto` (default): built-in conversion behavior.
-- `prompt`: use your own prompt via `--prompt` or `--prompt-file`.
-Example:
-```bash
-papyrus ./docs/invoice.pdf --mode prompt --prompt "Extract all line items."
-```
 ### `--instructions <text>`
-Additional conversion instructions in `auto` mode only.
+Additional conversion instructions for default conversion behavior. Cannot be combined with `--prompt` or `--prompt-file`.
 Example:
 ```bash
-papyrus ./docs/invoice.pdf --mode auto --instructions "Keep table columns aligned."
+papyrus ./docs/invoice.pdf --instructions "Keep table columns aligned."
 ```
 ### `--prompt <text>`
-Inline prompt text for `prompt` mode. Must be non-empty. In `prompt` mode, use exactly one of `--prompt` or `--prompt-file`.
+Inline prompt text for prompt-based conversion. Must be non-empty. Use exactly one of `--prompt` or `--prompt-file`.
 Example:
 ```bash
-papyrus ./docs/invoice.pdf --mode prompt --prompt "Summarize payment terms."
+papyrus ./docs/invoice.pdf --prompt "Summarize payment terms."
 ```
 ### `--prompt-file <path>`
-Path to a text file containing the prompt for `prompt` mode. File must contain non-empty text. In `prompt` mode, use exactly one of `--prompt` or `--prompt-file`.
+Path to a text file containing the prompt for prompt-based conversion. File must contain non-empty text. Use exactly one of `--prompt` or `--prompt-file`.
 Example:
 ```bash
-papyrus ./docs/invoice.pdf --mode prompt --prompt-file ./my-prompt.txt
+papyrus ./docs/invoice.pdf --prompt-file ./my-prompt.txt
 ```
 ### `-m, --model <model>`
@@ -206,7 +194,9 @@ papyrus ./docs --yes
 ## Notes
-- In `auto` mode without `--format`, the model returns structured JSON with `format` + `content`.
+- In default conversion (without `--prompt`/`--prompt-file`), the model returns structured JSON with `format` + `content`.
+- Without `--format`, output extension follows model-selected content format (`.md` or `.txt`).
+- With `--format`, only the output extension changes.
 - Single-file input now also shows a live worker lane (spinner in TTY) while conversion is running.
 - Folder input is scanned recursively for `.pdf` files and processed in parallel.
 - In folder mode, `--output` must be a directory path and mirrored subfolders are preserved.

package/dist/cli.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { dirname, join, relative, resolve } from "node:path";
 import { Command } from "commander";
 import { clearStoredApiKey, getConfigFilePath, getStoredApiKey, maskApiKey, setStoredApiKey } from "./config.js";
 import { convertPdf } from "./openaiPdfToMarkdown.js";
-import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, parseMode, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
+import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
 const program = new Command();
 const configFilePath = getConfigFilePath();
 const OPENAI_API_KEYS_URL = "https://platform.openai.com/settings/organization/api-keys";
@@ -20,24 +20,24 @@ program
     .option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
     .option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
     .option("-y, --yes", "Skip confirmation prompt in folder mode")
-    .option("--mode <mode>", "Conversion mode: auto or prompt", parseMode, "auto")
-    .option("--format <format>", "Output format override: md or txt", parseFormat)
-    .option("--instructions <text>", "Additional conversion instructions for auto mode")
-    .option("--prompt <text>", "Custom prompt text for prompt mode")
-    .option("--prompt-file <path>", "Path to file containing prompt text for prompt mode")
+    .option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
+    .option("--instructions <text>", "Additional conversion instructions (only when not using --prompt/--prompt-file)")
+    .option("--prompt <text>", "Custom prompt text (enables prompt mode)")
+    .option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
     .action(async (input, options) => {
     const inputPath = resolve(input);
     const startedAt = Date.now();
     try {
         validateOptionCombination(options);
         const promptText = await resolvePromptText(options);
+        const conversionMode = resolveConversionMode(promptText);
         const inputKind = await detectInputKind(inputPath);
         let usageTotals = emptyUsage();
         if (inputKind === "file") {
-            usageTotals = await processSingleFile(inputPath, options, promptText);
+            usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
         }
         else {
-            const summary = await processFolder(inputPath, options, promptText);
+            const summary = await processFolder(inputPath, options, conversionMode, promptText);
             usageTotals = summary.usage;
             if (!summary.cancelled && summary.failed > 0) {
                 process.exitCode = 1;
@@ -112,7 +112,7 @@ program.parseAsync(process.argv).catch((error) => {
     console.error(`Command failed: ${message}`);
     process.exitCode = 1;
 });
-async function processSingleFile(inputPath, options, promptText) {
+async function processSingleFile(inputPath, options, mode, promptText) {
     if (!isPdfPath(inputPath)) {
         throw new Error("Input file must have a .pdf extension.");
     }
@@ -131,22 +131,23 @@ async function processSingleFile(inputPath, options, promptText) {
         const result = await convertPdf({
             inputPath,
             model: options.model,
-            mode: options.mode,
-            format: options.format,
+            mode,
             instructions: options.instructions,
-            promptText
+            promptText,
+            outputExtensionHint: options.format
         });
-        const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
+        const outputExtension = options.format ?? result.format;
+        const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, outputExtension));
         await mkdir(dirname(outputPath), { recursive: true });
         await writeFile(outputPath, result.content, "utf8");
         if (workerDashboard) {
-            workerDashboard.setWorkerDone(0, displayInput, `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`);
+            workerDashboard.setWorkerDone(0, displayInput, `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`);
             workerDashboard.setSummary(1, 0);
         }
         else {
-            console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`);
+            console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`);
         }
-        console.log(`Output (${result.format}) written to: ${outputPath}`);
+        console.log(`Output (.${outputExtension}) written to: ${outputPath}`);
         return result.usage;
     }
     catch (error) {
@@ -164,9 +165,9 @@ async function processSingleFile(inputPath, options, promptText) {
         workerDashboard?.stop();
     }
 }
-async function processFolder(inputDir, options, promptText) {
+async function processFolder(inputDir, options, mode, promptText) {
     if (options.output && looksLikeFileOutput(options.output)) {
-        throw new Error("In folder mode, --output must be a directory path (not a .md/.txt file path).");
+        throw new Error("In folder mode, --output must be a directory path.");
     }
     const files = await collectPdfFiles(inputDir);
     if (files.length === 0) {
@@ -200,21 +201,22 @@ async function processFolder(inputDir, options, promptText) {
                 const result = await convertPdf({
                     inputPath: filePath,
                     model: options.model,
-                    mode: options.mode,
-                    format: options.format,
+                    mode,
                     instructions: options.instructions,
-                    promptText
+                    promptText,
+                    outputExtensionHint: options.format
                 });
-                const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, result.format);
+                const outputExtension = options.format ?? result.format;
+                const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, outputExtension);
                 await mkdir(dirname(outputPath), { recursive: true });
                 await writeFile(outputPath, result.content, "utf8");
                 succeeded += 1;
                 mergeUsage(usage, result.usage);
                 if (workerDashboard) {
-                    workerDashboard.setWorkerDone(workerId, relativeInput, `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`);
+                    workerDashboard.setWorkerDone(workerId, relativeInput, `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`);
                 }
                 else {
-                    console.log(`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`);
+                    console.log(`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`);
                 }
             }
             catch (error) {
@@ -250,9 +252,6 @@ async function processFolder(inputDir, options, promptText) {
     return { total: files.length, succeeded, failed, cancelled: false, usage };
 }
 async function resolvePromptText(options) {
-    if (options.mode !== "prompt") {
-        return undefined;
-    }
     if (options.prompt) {
         const prompt = options.prompt.trim();
         if (!prompt) {
@@ -270,6 +269,9 @@ async function resolvePromptText(options) {
     }
     return promptFromFile;
 }
+function resolveConversionMode(promptText) {
+    return promptText ? "prompt" : "auto";
+}
 async function handleConfigInit(options) {
     const existingKey = await getStoredApiKey();
     if (existingKey && !options.force) {

package/dist/cliHelpers.d.ts CHANGED Viewed

@@ -1,21 +1,18 @@
-import { type ConversionMode, type OutputFormat } from "./openaiPdfToMarkdown.js";
 export type CliOptions = {
     output?: string;
     model: string;
     concurrency?: number;
     yes?: boolean;
-    mode: ConversionMode;
-    format?: OutputFormat;
+    format?: string;
     instructions?: string;
     prompt?: string;
     promptFile?: string;
 };
-export declare function parseMode(value: string): ConversionMode;
-export declare function parseFormat(value: string): OutputFormat;
+export declare function parseFormat(value: string): string;
 export declare function parseConcurrency(value: string): number;
 export declare function validateOptionCombination(options: CliOptions): void;
-export declare function defaultOutputPath(inputPath: string, format: OutputFormat): string;
-export declare function resolveFolderOutputPath(inputPath: string, inputRoot: string, outputRoot: string | undefined, format: OutputFormat): string;
+export declare function defaultOutputPath(inputPath: string, extension: string): string;
+export declare function resolveFolderOutputPath(inputPath: string, inputRoot: string, outputRoot: string | undefined, extension: string): string;
 export declare function isPdfPath(inputPath: string): boolean;
 export declare function looksLikeFileOutput(outputPath: string): boolean;
 export declare function truncate(value: string, maxLength: number): string;

package/dist/cliHelpers.js CHANGED Viewed

@@ -1,16 +1,14 @@
 import { InvalidArgumentError } from "commander";
 import { basename, dirname, extname, join, relative } from "node:path";
-export function parseMode(value) {
-    if (value === "auto" || value === "prompt") {
-        return value;
-    }
-    throw new InvalidArgumentError("Mode must be either 'auto' or 'prompt'.");
-}
 export function parseFormat(value) {
-    if (value === "md" || value === "txt") {
-        return value;
+    const normalized = value.trim().replace(/^\.+/, "");
+    if (!normalized) {
+        throw new InvalidArgumentError("Format must be a non-empty file extension.");
+    }
+    if (normalized.includes("/") || normalized.includes("\\")) {
+        throw new InvalidArgumentError("Format must be a file extension, not a path.");
     }
-    throw new InvalidArgumentError("Format must be either 'md' or 'txt'.");
+    return normalized;
 }
 export function parseConcurrency(value) {
     const parsed = Number(value);
@@ -20,35 +18,30 @@ export function parseConcurrency(value) {
     return parsed;
 }
 export function validateOptionCombination(options) {
-    if (options.mode === "prompt") {
-        const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
-        if (promptSourceCount !== 1) {
-            throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
-        }
-        if (options.instructions) {
-            throw new Error("--instructions is only supported in auto mode.");
-        }
-        return;
+    const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
+    if (promptSourceCount > 1) {
+        throw new Error("Use exactly one of --prompt or --prompt-file.");
     }
-    if (options.prompt || options.promptFile) {
-        throw new Error("--prompt and --prompt-file are only supported in prompt mode.");
+    if (promptSourceCount === 1 && options.instructions) {
+        throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
     }
 }
-export function defaultOutputPath(inputPath, format) {
-    const extension = format === "md" ? ".md" : ".txt";
+export function defaultOutputPath(inputPath, extension) {
+    const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`;
     if (extname(inputPath).toLowerCase() === ".pdf") {
-        return inputPath.slice(0, -4) + extension;
+        return inputPath.slice(0, -4) + normalizedExtension;
     }
-    return inputPath + extension;
+    return inputPath + normalizedExtension;
 }
-export function resolveFolderOutputPath(inputPath, inputRoot, outputRoot, format) {
+export function resolveFolderOutputPath(inputPath, inputRoot, outputRoot, extension) {
     if (!outputRoot) {
-        return defaultOutputPath(inputPath, format);
+        return defaultOutputPath(inputPath, extension);
     }
     const relativePath = relative(inputRoot, inputPath);
     const relativeDir = dirname(relativePath);
     const base = basename(relativePath, extname(relativePath));
-    const filename = `${base}.${format}`;
+    const normalizedExtension = extension.startsWith(".") ? extension.slice(1) : extension;
+    const filename = `${base}.${normalizedExtension}`;
     if (relativeDir === ".") {
         return join(outputRoot, filename);
     }

package/dist/openaiPdfToMarkdown.d.ts CHANGED Viewed

@@ -2,9 +2,9 @@ export type ConvertOptions = {
     inputPath: string;
     model: string;
     mode: ConversionMode;
-    format?: OutputFormat;
     instructions?: string;
     promptText?: string;
+    outputExtensionHint?: string;
 };
 export type ConversionMode = "auto" | "prompt";
 export type OutputFormat = "md" | "txt";

package/dist/openaiPdfToMarkdown.js CHANGED Viewed

@@ -54,13 +54,13 @@ export async function convertPdf(options) {
         outputTokens: result.state.usage.outputTokens,
         totalTokens: result.state.usage.totalTokens
     };
-    if (options.mode === "auto" && !options.format) {
+    if (options.mode === "auto") {
         return { ...parseAutoResponse(rawOutput), usage };
     }
-    const format = options.format ?? "txt";
-    return { format, content: rawOutput, usage };
+    return { format: "txt", content: rawOutput, usage };
 }
 function buildPromptText(options) {
+    const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
     if (options.mode === "prompt") {
         if (!options.promptText) {
             throw new Error("promptText is required when mode is 'prompt'.");
@@ -70,35 +70,16 @@ function buildPromptText(options) {
             "Return only the final converted content.",
             `User prompt:\n${options.promptText}`
         ];
-        if (options.format === "md") {
-            promptModeParts.push("Output format requirement: Return only GitHub-flavored Markdown.");
-        }
-        else if (options.format === "txt") {
-            promptModeParts.push("Output format requirement: Return plain text only and do not use Markdown syntax.");
-        }
-        else {
-            promptModeParts.push("If the prompt does not enforce a format, prefer plain text without Markdown syntax.");
+        if (outputExtensionHint) {
+            promptModeParts.push([
+                `Output file extension hint: .${outputExtensionHint}.`,
+                "Prefer content that is practical for saving under this extension.",
+                "Treat this as guidance and still follow the user prompt exactly."
+            ].join(" "));
         }
         return promptModeParts.join("\n\n");
     }
-    if (options.format === "md") {
-        return withAdditionalInstructions([
-            "Convert this PDF into clean GitHub-flavored Markdown.",
-            "Preserve headings, paragraphs, lists, and tables.",
-            "Render tables as Markdown pipe tables with header separators.",
-            "If cells are empty due to merged cells, keep the table readable and consistent.",
-            "Return only Markdown without code fences."
-        ].join(" "), options.instructions);
-    }
-    if (options.format === "txt") {
-        return withAdditionalInstructions([
-            "Convert this PDF into clean plain text.",
-            "Preserve reading order and paragraph boundaries.",
-            "Represent tables in readable plain text (no Markdown syntax).",
-            "Return plain text only and do not use Markdown syntax or code fences."
-        ].join(" "), options.instructions);
-    }
-    return withAdditionalInstructions([
+    let autoPrompt = withAdditionalInstructions([
         "Decide the best output format for this PDF: Markdown ('md') or plain text ('txt').",
         "Choose 'md' for documents with meaningful headings, lists, and tables that benefit from Markdown.",
         "Choose 'txt' for mostly linear text where Markdown adds little value.",
@@ -108,6 +89,14 @@ function buildPromptText(options) {
         "If format is 'txt', output plain text only and do not use Markdown syntax.",
         "Do not wrap the JSON in code fences."
     ].join("\n"), options.instructions);
+    if (outputExtensionHint) {
+        autoPrompt = `${autoPrompt}\n\n${[
+            `Output file extension hint: .${outputExtensionHint}.`,
+            "Prefer content that is practical for that extension while still returning JSON with format='md' or 'txt'.",
+            "This is guidance only and should not break the required JSON schema."
+        ].join(" ")}`;
+    }
+    return autoPrompt;
 }
 function withAdditionalInstructions(base, additional) {
     if (!additional) {
@@ -115,6 +104,13 @@ function withAdditionalInstructions(base, additional) {
     }
     return `${base}\n\nAdditional user instructions:\n${additional}`;
 }
+function normalizeExtensionHint(extension) {
+    if (!extension) {
+        return undefined;
+    }
+    const normalized = extension.trim().replace(/^\.+/, "");
+    return normalized || undefined;
+}
 function parseAutoResponse(rawOutput) {
     let candidate = rawOutput.trim();
     const fencedMatch = candidate.match(/```(?:json)?\s*([\s\S]*?)```/i);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@robin7331/papyrus-cli",
-  "version": "0.1.6",
+  "version": "0.1.8",
   "private": false,
   "description": "Convert PDF to markdown or text with the OpenAI Agents SDK",
   "repository": {

package/src/cli.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
 } from "./config.js";
 import {
   convertPdf,
+  type ConversionMode,
   type ConvertUsage
 } from "./openaiPdfToMarkdown.js";
 import {
@@ -23,7 +24,6 @@ import {
   looksLikeFileOutput,
   parseConcurrency,
   parseFormat,
-  parseMode,
   resolveFolderOutputPath,
   truncate,
   type CliOptions,
@@ -52,14 +52,13 @@ program
     parseConcurrency
   )
   .option("-y, --yes", "Skip confirmation prompt in folder mode")
-  .option("--mode <mode>", "Conversion mode: auto or prompt", parseMode, "auto")
-  .option("--format <format>", "Output format override: md or txt", parseFormat)
+  .option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
   .option(
     "--instructions <text>",
-    "Additional conversion instructions for auto mode"
+    "Additional conversion instructions (only when not using --prompt/--prompt-file)"
   )
-  .option("--prompt <text>", "Custom prompt text for prompt mode")
-  .option("--prompt-file <path>", "Path to file containing prompt text for prompt mode")
+  .option("--prompt <text>", "Custom prompt text (enables prompt mode)")
+  .option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
   .action(async (input: string, options: CliOptions) => {
     const inputPath = resolve(input);
     const startedAt = Date.now();
@@ -68,13 +67,14 @@ program
       validateOptionCombination(options);
       const promptText = await resolvePromptText(options);
+      const conversionMode = resolveConversionMode(promptText);
       const inputKind = await detectInputKind(inputPath);
       let usageTotals: ConvertUsage = emptyUsage();
       if (inputKind === "file") {
-        usageTotals = await processSingleFile(inputPath, options, promptText);
+        usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
       } else {
-        const summary = await processFolder(inputPath, options, promptText);
+        const summary = await processFolder(inputPath, options, conversionMode, promptText);
         usageTotals = summary.usage;
         if (!summary.cancelled && summary.failed > 0) {
           process.exitCode = 1;
@@ -157,6 +157,7 @@ program.parseAsync(process.argv).catch((error: unknown) => {
 async function processSingleFile(
   inputPath: string,
   options: CliOptions,
+  mode: ConversionMode,
   promptText?: string
 ): Promise<ConvertUsage> {
   if (!isPdfPath(inputPath)) {
@@ -180,13 +181,14 @@ async function processSingleFile(
     const result = await convertPdf({
       inputPath,
       model: options.model,
-      mode: options.mode,
-      format: options.format,
+      mode,
       instructions: options.instructions,
-      promptText
+      promptText,
+      outputExtensionHint: options.format
     });
-    const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
+    const outputExtension = options.format ?? result.format;
+    const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, outputExtension));
     await mkdir(dirname(outputPath), { recursive: true });
     await writeFile(outputPath, result.content, "utf8");
@@ -194,16 +196,16 @@ async function processSingleFile(
       workerDashboard.setWorkerDone(
         0,
         displayInput,
-        `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`
+        `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`
       );
       workerDashboard.setSummary(1, 0);
     } else {
       console.log(
-        `[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`
+        `[worker-1] Done ${displayInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`
       );
     }
-    console.log(`Output (${result.format}) written to: ${outputPath}`);
+    console.log(`Output (.${outputExtension}) written to: ${outputPath}`);
     return result.usage;
   } catch (error) {
     const message = error instanceof Error ? error.message : String(error);
@@ -237,12 +239,11 @@ type FolderSummary = {
 async function processFolder(
   inputDir: string,
   options: CliOptions,
+  mode: ConversionMode,
   promptText?: string
 ): Promise<FolderSummary> {
   if (options.output && looksLikeFileOutput(options.output)) {
-    throw new Error(
-      "In folder mode, --output must be a directory path (not a .md/.txt file path)."
-    );
+    throw new Error("In folder mode, --output must be a directory path.");
   }
   const files = await collectPdfFiles(inputDir);
@@ -282,13 +283,14 @@ async function processFolder(
         const result = await convertPdf({
           inputPath: filePath,
           model: options.model,
-          mode: options.mode,
-          format: options.format,
+          mode,
           instructions: options.instructions,
-          promptText
+          promptText,
+          outputExtensionHint: options.format
         });
-        const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, result.format);
+        const outputExtension = options.format ?? result.format;
+        const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, outputExtension);
         await mkdir(dirname(outputPath), { recursive: true });
         await writeFile(outputPath, result.content, "utf8");
         succeeded += 1;
@@ -298,11 +300,11 @@ async function processFolder(
           workerDashboard.setWorkerDone(
             workerId,
             relativeInput,
-            `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`
+            `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`
           );
         } else {
           console.log(
-            `[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`
+            `[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`
           );
         }
       } catch (error) {
@@ -347,10 +349,6 @@ async function processFolder(
 }
 async function resolvePromptText(options: CliOptions): Promise<string | undefined> {
-  if (options.mode !== "prompt") {
-    return undefined;
-  }
   if (options.prompt) {
     const prompt = options.prompt.trim();
     if (!prompt) {
@@ -373,6 +371,10 @@ async function resolvePromptText(options: CliOptions): Promise<string | undefine
   return promptFromFile;
 }
+function resolveConversionMode(promptText: string | undefined): ConversionMode {
+  return promptText ? "prompt" : "auto";
+}
 async function handleConfigInit(options: ConfigInitOptions): Promise<void> {
   const existingKey = await getStoredApiKey();
   if (existingKey && !options.force) {

package/src/cliHelpers.ts CHANGED Viewed

@@ -1,33 +1,28 @@
 import { InvalidArgumentError } from "commander";
 import { basename, dirname, extname, join, relative } from "node:path";
-import { type ConversionMode, type OutputFormat } from "./openaiPdfToMarkdown.js";
 export type CliOptions = {
   output?: string;
   model: string;
   concurrency?: number;
   yes?: boolean;
-  mode: ConversionMode;
-  format?: OutputFormat;
+  format?: string;
   instructions?: string;
   prompt?: string;
   promptFile?: string;
 };
-export function parseMode(value: string): ConversionMode {
-  if (value === "auto" || value === "prompt") {
-    return value;
+export function parseFormat(value: string): string {
+  const normalized = value.trim().replace(/^\.+/, "");
+  if (!normalized) {
+    throw new InvalidArgumentError("Format must be a non-empty file extension.");
   }
-  throw new InvalidArgumentError("Mode must be either 'auto' or 'prompt'.");
-}
-export function parseFormat(value: string): OutputFormat {
-  if (value === "md" || value === "txt") {
-    return value;
+  if (normalized.includes("/") || normalized.includes("\\")) {
+    throw new InvalidArgumentError("Format must be a file extension, not a path.");
   }
-  throw new InvalidArgumentError("Format must be either 'md' or 'txt'.");
+  return normalized;
 }
 export function parseConcurrency(value: string): number {
@@ -40,48 +35,41 @@ export function parseConcurrency(value: string): number {
 }
 export function validateOptionCombination(options: CliOptions): void {
-  if (options.mode === "prompt") {
-    const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
-    if (promptSourceCount !== 1) {
-      throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
-    }
-    if (options.instructions) {
-      throw new Error("--instructions is only supported in auto mode.");
-    }
-    return;
+  const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
+  if (promptSourceCount > 1) {
+    throw new Error("Use exactly one of --prompt or --prompt-file.");
   }
-  if (options.prompt || options.promptFile) {
-    throw new Error("--prompt and --prompt-file are only supported in prompt mode.");
+  if (promptSourceCount === 1 && options.instructions) {
+    throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
   }
 }
-export function defaultOutputPath(inputPath: string, format: OutputFormat): string {
-  const extension = format === "md" ? ".md" : ".txt";
+export function defaultOutputPath(inputPath: string, extension: string): string {
+  const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`;
   if (extname(inputPath).toLowerCase() === ".pdf") {
-    return inputPath.slice(0, -4) + extension;
+    return inputPath.slice(0, -4) + normalizedExtension;
   }
-  return inputPath + extension;
+  return inputPath + normalizedExtension;
 }
 export function resolveFolderOutputPath(
   inputPath: string,
   inputRoot: string,
   outputRoot: string | undefined,
-  format: OutputFormat
+  extension: string
 ): string {
   if (!outputRoot) {
-    return defaultOutputPath(inputPath, format);
+    return defaultOutputPath(inputPath, extension);
   }
   const relativePath = relative(inputRoot, inputPath);
   const relativeDir = dirname(relativePath);
   const base = basename(relativePath, extname(relativePath));
-  const filename = `${base}.${format}`;
+  const normalizedExtension = extension.startsWith(".") ? extension.slice(1) : extension;
+  const filename = `${base}.${normalizedExtension}`;
   if (relativeDir === ".") {
     return join(outputRoot, filename);

package/src/openaiPdfToMarkdown.ts CHANGED Viewed

@@ -9,9 +9,9 @@ export type ConvertOptions = {
   inputPath: string;
   model: string;
   mode: ConversionMode;
-  format?: OutputFormat;
   instructions?: string;
   promptText?: string;
+  outputExtensionHint?: string;
 };
 export type ConversionMode = "auto" | "prompt";
@@ -94,63 +94,40 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
     totalTokens: result.state.usage.totalTokens
   };
-  if (options.mode === "auto" && !options.format) {
+  if (options.mode === "auto") {
     return { ...parseAutoResponse(rawOutput), usage };
   }
-  const format = options.format ?? "txt";
-  return { format, content: rawOutput, usage };
+  return { format: "txt", content: rawOutput, usage };
 }
 function buildPromptText(options: ConvertOptions): string {
+  const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
   if (options.mode === "prompt") {
     if (!options.promptText) {
       throw new Error("promptText is required when mode is 'prompt'.");
     }
-    const promptModeParts = [
+    const promptModeParts: string[] = [
       "Apply the following user prompt to the PDF.",
       "Return only the final converted content.",
       `User prompt:\n${options.promptText}`
     ];
-    if (options.format === "md") {
-      promptModeParts.push("Output format requirement: Return only GitHub-flavored Markdown.");
-    } else if (options.format === "txt") {
-      promptModeParts.push("Output format requirement: Return plain text only and do not use Markdown syntax.");
-    } else {
-      promptModeParts.push("If the prompt does not enforce a format, prefer plain text without Markdown syntax.");
+    if (outputExtensionHint) {
+      promptModeParts.push(
+        [
+          `Output file extension hint: .${outputExtensionHint}.`,
+          "Prefer content that is practical for saving under this extension.",
+          "Treat this as guidance and still follow the user prompt exactly."
+        ].join(" ")
+      );
     }
     return promptModeParts.join("\n\n");
   }
-  if (options.format === "md") {
-    return withAdditionalInstructions(
-      [
-        "Convert this PDF into clean GitHub-flavored Markdown.",
-        "Preserve headings, paragraphs, lists, and tables.",
-        "Render tables as Markdown pipe tables with header separators.",
-        "If cells are empty due to merged cells, keep the table readable and consistent.",
-        "Return only Markdown without code fences."
-      ].join(" "),
-      options.instructions
-    );
-  }
-  if (options.format === "txt") {
-    return withAdditionalInstructions(
-      [
-        "Convert this PDF into clean plain text.",
-        "Preserve reading order and paragraph boundaries.",
-        "Represent tables in readable plain text (no Markdown syntax).",
-        "Return plain text only and do not use Markdown syntax or code fences."
-      ].join(" "),
-      options.instructions
-    );
-  }
-  return withAdditionalInstructions(
+  let autoPrompt = withAdditionalInstructions(
     [
       "Decide the best output format for this PDF: Markdown ('md') or plain text ('txt').",
       "Choose 'md' for documents with meaningful headings, lists, and tables that benefit from Markdown.",
@@ -163,6 +140,18 @@ function buildPromptText(options: ConvertOptions): string {
     ].join("\n"),
     options.instructions
   );
+  if (outputExtensionHint) {
+    autoPrompt = `${autoPrompt}\n\n${
+      [
+        `Output file extension hint: .${outputExtensionHint}.`,
+        "Prefer content that is practical for that extension while still returning JSON with format='md' or 'txt'.",
+        "This is guidance only and should not break the required JSON schema."
+      ].join(" ")
+    }`;
+  }
+  return autoPrompt;
 }
 function withAdditionalInstructions(base: string, additional?: string): string {
@@ -173,6 +162,15 @@ function withAdditionalInstructions(base: string, additional?: string): string {
   return `${base}\n\nAdditional user instructions:\n${additional}`;
 }
+function normalizeExtensionHint(extension: string | undefined): string | undefined {
+  if (!extension) {
+    return undefined;
+  }
+  const normalized = extension.trim().replace(/^\.+/, "");
+  return normalized || undefined;
+}
 function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
   let candidate = rawOutput.trim();

package/test/cliHelpers.test.ts CHANGED Viewed

@@ -8,29 +8,25 @@ import {
   looksLikeFileOutput,
   parseConcurrency,
   parseFormat,
-  parseMode,
   resolveFolderOutputPath,
   truncate,
   validateOptionCombination,
   type CliOptions
 } from "../src/cliHelpers.js";
-test("parseMode accepts valid values", () => {
-  assert.equal(parseMode("auto"), "auto");
-  assert.equal(parseMode("prompt"), "prompt");
-});
-test("parseMode rejects invalid values", () => {
-  assert.throws(() => parseMode("invalid"), InvalidArgumentError);
-});
 test("parseFormat accepts valid values", () => {
   assert.equal(parseFormat("md"), "md");
   assert.equal(parseFormat("txt"), "txt");
+  assert.equal(parseFormat("csv"), "csv");
+  assert.equal(parseFormat(".json"), "json");
+  assert.equal(parseFormat("tar.gz"), "tar.gz");
 });
 test("parseFormat rejects invalid values", () => {
-  assert.throws(() => parseFormat("json"), InvalidArgumentError);
+  assert.throws(() => parseFormat(""), InvalidArgumentError);
+  assert.throws(() => parseFormat("   "), InvalidArgumentError);
+  assert.throws(() => parseFormat("../json"), InvalidArgumentError);
+  assert.throws(() => parseFormat("a/b"), InvalidArgumentError);
 });
 test("parseConcurrency accepts in-range integers", () => {
@@ -45,48 +41,47 @@ test("parseConcurrency rejects invalid values", () => {
   assert.throws(() => parseConcurrency("abc"), InvalidArgumentError);
 });
-test("validateOptionCombination enforces prompt mode requirements", () => {
+test("validateOptionCombination allows default auto behavior without prompt flags", () => {
   const base: CliOptions = {
-    model: "gpt-4o-mini",
-    mode: "prompt"
+    model: "gpt-4o-mini"
   };
-  assert.throws(
-    () => validateOptionCombination(base),
-    /Prompt mode requires exactly one of --prompt or --prompt-file\./
-  );
-  assert.doesNotThrow(() => validateOptionCombination({ ...base, prompt: "Convert this" }));
+  assert.doesNotThrow(() => validateOptionCombination(base));
+  assert.doesNotThrow(() => validateOptionCombination({ ...base, instructions: "Extra formatting rules" }));
+});
+test("validateOptionCombination treats --prompt and --prompt-file as mutually exclusive", () => {
+  const base: CliOptions = {
+    model: "gpt-4o-mini"
+  };
+  assert.doesNotThrow(() => validateOptionCombination({ ...base, prompt: "Convert" }));
   assert.doesNotThrow(() => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }));
   assert.throws(
     () => validateOptionCombination({ ...base, prompt: "x", promptFile: "./prompt.txt" }),
-    /Prompt mode requires exactly one of --prompt or --prompt-file\./
-  );
-  assert.throws(
-    () => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
-    /--instructions is only supported in auto mode\./
+    /Use exactly one of --prompt or --prompt-file\./
   );
 });
-test("validateOptionCombination rejects prompt flags in auto mode", () => {
+test("validateOptionCombination rejects --instructions with prompt flags", () => {
   const base: CliOptions = {
-    model: "gpt-4o-mini",
-    mode: "auto"
+    model: "gpt-4o-mini"
   };
-  assert.doesNotThrow(() => validateOptionCombination(base));
   assert.throws(
-    () => validateOptionCombination({ ...base, prompt: "Convert" }),
-    /--prompt and --prompt-file are only supported in prompt mode\./
+    () => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
+    /--instructions cannot be combined with --prompt or --prompt-file\./
   );
   assert.throws(
-    () => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }),
-    /--prompt and --prompt-file are only supported in prompt mode\./
+    () => validateOptionCombination({ ...base, promptFile: "./prompt.txt", instructions: "Extra" }),
+    /--instructions cannot be combined with --prompt or --prompt-file\./
   );
 });
 test("defaultOutputPath replaces .pdf extension and appends for other files", () => {
   assert.equal(defaultOutputPath("/tmp/input.pdf", "md"), "/tmp/input.md");
   assert.equal(defaultOutputPath("/tmp/input.PDF", "txt"), "/tmp/input.txt");
+  assert.equal(defaultOutputPath("/tmp/input.pdf", ".csv"), "/tmp/input.csv");
   assert.equal(defaultOutputPath("/tmp/input", "md"), "/tmp/input.md");
 });
@@ -105,6 +100,11 @@ test("resolveFolderOutputPath preserves nested structure when output root is set
     resolveFolderOutputPath("/data/invoices/file.pdf", "/data/invoices", "/exports", "txt"),
     "/exports/file.txt"
   );
+  assert.equal(
+    resolveFolderOutputPath("/data/invoices/file.pdf", "/data/invoices", "/exports", ".csv"),
+    "/exports/file.csv"
+  );
 });
 test("resolveFolderOutputPath falls back to default path when no output root", () => {