npm - @robin7331/papyrus-cli - Versions diffs - 0.1.5 → 0.1.7 - Mend

@robin7331/papyrus-cli 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/README.md +19 -25
package/dist/cli.js +59 -26
package/dist/cliHelpers.d.ts +1 -3
package/dist/cliHelpers.js +5 -17
package/dist/openaiPdfToMarkdown.js +192 -4
package/package.json +2 -1
package/src/cli.ts +73 -25
package/src/cliHelpers.ts +6 -23
package/src/openaiPdfToMarkdown.ts +273 -19
package/test/cliHelpers.test.ts +19 -31

package/README.md CHANGED Viewed

@@ -27,20 +27,20 @@ papyrus --help
 # Show installed CLI version
 papyrus --version
-# Single file (auto mode; if no API key is found, Papyrus prompts you to paste one)
+# Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
 papyrus ./path/to/input.pdf
 # Single file with explicit format/output/model
 papyrus ./path/to/input.pdf --format md --output ./out/result.md --model gpt-4o-mini
-# Auto mode with extra instructions
+# Default conversion with extra instructions
 papyrus ./path/to/input.pdf --instructions "Prioritize table accuracy." --format txt
-# Prompt mode (inline prompt)
-papyrus ./path/to/input.pdf --mode prompt --prompt "Extract all invoice line items as bullet points." --format md
+# Prompt conversion (inline prompt)
+papyrus ./path/to/input.pdf --prompt "Extract all invoice line items as bullet points." --format md
-# Prompt mode (prompt file)
-papyrus ./path/to/input.pdf --mode prompt --prompt-file ./my-prompt.txt --format txt
+# Prompt conversion (prompt file)
+papyrus ./path/to/input.pdf --prompt-file ./my-prompt.txt --format txt
 # Folder mode (recursive scan, asks for confirmation)
 papyrus ./path/to/folder
@@ -132,46 +132,34 @@ Example:
 papyrus ./docs --output ./converted
 ```
-### `--mode <mode>`
-Conversion mode:
-- `auto` (default): built-in conversion behavior.
-- `prompt`: use your own prompt via `--prompt` or `--prompt-file`.
-Example:
-```bash
-papyrus ./docs/invoice.pdf --mode prompt --prompt "Extract all line items."
-```
 ### `--instructions <text>`
-Additional conversion instructions in `auto` mode only.
+Additional conversion instructions for default conversion behavior. Cannot be combined with `--prompt` or `--prompt-file`.
 Example:
 ```bash
-papyrus ./docs/invoice.pdf --mode auto --instructions "Keep table columns aligned."
+papyrus ./docs/invoice.pdf --instructions "Keep table columns aligned."
 ```
 ### `--prompt <text>`
-Inline prompt text for `prompt` mode. Must be non-empty. In `prompt` mode, use exactly one of `--prompt` or `--prompt-file`.
+Inline prompt text for prompt-based conversion. Must be non-empty. Use exactly one of `--prompt` or `--prompt-file`.
 Example:
 ```bash
-papyrus ./docs/invoice.pdf --mode prompt --prompt "Summarize payment terms."
+papyrus ./docs/invoice.pdf --prompt "Summarize payment terms."
 ```
 ### `--prompt-file <path>`
-Path to a text file containing the prompt for `prompt` mode. File must contain non-empty text. In `prompt` mode, use exactly one of `--prompt` or `--prompt-file`.
+Path to a text file containing the prompt for prompt-based conversion. File must contain non-empty text. Use exactly one of `--prompt` or `--prompt-file`.
 Example:
 ```bash
-papyrus ./docs/invoice.pdf --mode prompt --prompt-file ./my-prompt.txt
+papyrus ./docs/invoice.pdf --prompt-file ./my-prompt.txt
 ```
 ### `-m, --model <model>`
@@ -206,9 +194,15 @@ papyrus ./docs --yes
 ## Notes
-- In `auto` mode without `--format`, the model returns structured JSON with `format` + `content`.
+- In default conversion (without `--prompt`/`--prompt-file`) and without `--format`, the model returns structured JSON with `format` + `content`.
+- Single-file input now also shows a live worker lane (spinner in TTY) while conversion is running.
 - Folder input is scanned recursively for `.pdf` files and processed in parallel.
 - In folder mode, `--output` must be a directory path and mirrored subfolders are preserved.
+- OpenAI rate-limit (`429`) responses are retried automatically using `Retry-After` (when present) plus exponential backoff.
+- Rate-limit retry tuning is available via environment variables:
+  - `PAPYRUS_RATE_LIMIT_MAX_RETRIES` (default `8`)
+  - `PAPYRUS_RATE_LIMIT_BASE_DELAY_MS` (default `2000`)
+  - `PAPYRUS_RATE_LIMIT_MAX_DELAY_MS` (default `120000`)
 - For scanned PDFs, output quality depends on OCR quality from the model.
 ## Development

package/dist/cli.js CHANGED Viewed

@@ -6,7 +6,7 @@ import { dirname, join, relative, resolve } from "node:path";
 import { Command } from "commander";
 import { clearStoredApiKey, getConfigFilePath, getStoredApiKey, maskApiKey, setStoredApiKey } from "./config.js";
 import { convertPdf } from "./openaiPdfToMarkdown.js";
-import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, parseMode, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
+import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
 const program = new Command();
 const configFilePath = getConfigFilePath();
 const OPENAI_API_KEYS_URL = "https://platform.openai.com/settings/organization/api-keys";
@@ -20,24 +20,24 @@ program
     .option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
     .option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
     .option("-y, --yes", "Skip confirmation prompt in folder mode")
-    .option("--mode <mode>", "Conversion mode: auto or prompt", parseMode, "auto")
     .option("--format <format>", "Output format override: md or txt", parseFormat)
-    .option("--instructions <text>", "Additional conversion instructions for auto mode")
-    .option("--prompt <text>", "Custom prompt text for prompt mode")
-    .option("--prompt-file <path>", "Path to file containing prompt text for prompt mode")
+    .option("--instructions <text>", "Additional conversion instructions (only when not using --prompt/--prompt-file)")
+    .option("--prompt <text>", "Custom prompt text (enables prompt mode)")
+    .option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
     .action(async (input, options) => {
     const inputPath = resolve(input);
     const startedAt = Date.now();
     try {
         validateOptionCombination(options);
         const promptText = await resolvePromptText(options);
+        const conversionMode = resolveConversionMode(promptText);
         const inputKind = await detectInputKind(inputPath);
         let usageTotals = emptyUsage();
         if (inputKind === "file") {
-            usageTotals = await processSingleFile(inputPath, options, promptText);
+            usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
         }
         else {
-            const summary = await processFolder(inputPath, options, promptText);
+            const summary = await processFolder(inputPath, options, conversionMode, promptText);
             usageTotals = summary.usage;
             if (!summary.cancelled && summary.failed > 0) {
                 process.exitCode = 1;
@@ -112,26 +112,59 @@ program.parseAsync(process.argv).catch((error) => {
     console.error(`Command failed: ${message}`);
     process.exitCode = 1;
 });
-async function processSingleFile(inputPath, options, promptText) {
+async function processSingleFile(inputPath, options, mode, promptText) {
     if (!isPdfPath(inputPath)) {
         throw new Error("Input file must have a .pdf extension.");
     }
     await ensureApiKey();
-    const result = await convertPdf({
-        inputPath,
-        model: options.model,
-        mode: options.mode,
-        format: options.format,
-        instructions: options.instructions,
-        promptText
-    });
-    const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
-    await mkdir(dirname(outputPath), { recursive: true });
-    await writeFile(outputPath, result.content, "utf8");
-    console.log(`Output (${result.format}) written to: ${outputPath}`);
-    return result.usage;
+    const startedAt = Date.now();
+    const displayInput = relative(process.cwd(), inputPath) || inputPath;
+    const workerDashboard = process.stdout.isTTY
+        ? new AsciiWorkerDashboard(1, 1)
+        : null;
+    workerDashboard?.setSummary(0, 0);
+    workerDashboard?.setWorkerRunning(0, displayInput);
+    if (!workerDashboard) {
+        console.log(`[worker-1] Running ${displayInput}`);
+    }
+    try {
+        const result = await convertPdf({
+            inputPath,
+            model: options.model,
+            mode,
+            format: options.format,
+            instructions: options.instructions,
+            promptText
+        });
+        const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
+        await mkdir(dirname(outputPath), { recursive: true });
+        await writeFile(outputPath, result.content, "utf8");
+        if (workerDashboard) {
+            workerDashboard.setWorkerDone(0, displayInput, `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`);
+            workerDashboard.setSummary(1, 0);
+        }
+        else {
+            console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`);
+        }
+        console.log(`Output (${result.format}) written to: ${outputPath}`);
+        return result.usage;
+    }
+    catch (error) {
+        const message = error instanceof Error ? error.message : String(error);
+        if (workerDashboard) {
+            workerDashboard.setWorkerFailed(0, displayInput, `${truncate(message, 42)} (${formatDurationMs(Date.now() - startedAt)})`);
+            workerDashboard.setSummary(1, 1);
+        }
+        else {
+            console.error(`[worker-1] Failed ${displayInput}: ${message} (${formatDurationMs(Date.now() - startedAt)})`);
+        }
+        throw error;
+    }
+    finally {
+        workerDashboard?.stop();
+    }
 }
-async function processFolder(inputDir, options, promptText) {
+async function processFolder(inputDir, options, mode, promptText) {
     if (options.output && looksLikeFileOutput(options.output)) {
         throw new Error("In folder mode, --output must be a directory path (not a .md/.txt file path).");
     }
@@ -167,7 +200,7 @@ async function processFolder(inputDir, options, promptText) {
                 const result = await convertPdf({
                     inputPath: filePath,
                     model: options.model,
-                    mode: options.mode,
+                    mode,
                     format: options.format,
                     instructions: options.instructions,
                     promptText
@@ -217,9 +250,6 @@ async function processFolder(inputDir, options, promptText) {
     return { total: files.length, succeeded, failed, cancelled: false, usage };
 }
 async function resolvePromptText(options) {
-    if (options.mode !== "prompt") {
-        return undefined;
-    }
     if (options.prompt) {
         const prompt = options.prompt.trim();
         if (!prompt) {
@@ -237,6 +267,9 @@ async function resolvePromptText(options) {
     }
     return promptFromFile;
 }
+function resolveConversionMode(promptText) {
+    return promptText ? "prompt" : "auto";
+}
 async function handleConfigInit(options) {
     const existingKey = await getStoredApiKey();
     if (existingKey && !options.force) {

package/dist/cliHelpers.d.ts CHANGED Viewed

@@ -1,16 +1,14 @@
-import { type ConversionMode, type OutputFormat } from "./openaiPdfToMarkdown.js";
+import { type OutputFormat } from "./openaiPdfToMarkdown.js";
 export type CliOptions = {
     output?: string;
     model: string;
     concurrency?: number;
     yes?: boolean;
-    mode: ConversionMode;
     format?: OutputFormat;
     instructions?: string;
     prompt?: string;
     promptFile?: string;
 };
-export declare function parseMode(value: string): ConversionMode;
 export declare function parseFormat(value: string): OutputFormat;
 export declare function parseConcurrency(value: string): number;
 export declare function validateOptionCombination(options: CliOptions): void;

package/dist/cliHelpers.js CHANGED Viewed

@@ -1,11 +1,5 @@
 import { InvalidArgumentError } from "commander";
 import { basename, dirname, extname, join, relative } from "node:path";
-export function parseMode(value) {
-    if (value === "auto" || value === "prompt") {
-        return value;
-    }
-    throw new InvalidArgumentError("Mode must be either 'auto' or 'prompt'.");
-}
 export function parseFormat(value) {
     if (value === "md" || value === "txt") {
         return value;
@@ -20,18 +14,12 @@ export function parseConcurrency(value) {
     return parsed;
 }
 export function validateOptionCombination(options) {
-    if (options.mode === "prompt") {
-        const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
-        if (promptSourceCount !== 1) {
-            throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
-        }
-        if (options.instructions) {
-            throw new Error("--instructions is only supported in auto mode.");
-        }
-        return;
+    const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
+    if (promptSourceCount > 1) {
+        throw new Error("Use exactly one of --prompt or --prompt-file.");
     }
-    if (options.prompt || options.promptFile) {
-        throw new Error("--prompt and --prompt-file are only supported in prompt mode.");
+    if (promptSourceCount === 1 && options.instructions) {
+        throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
     }
 }
 export function defaultOutputPath(inputPath, format) {

package/dist/openaiPdfToMarkdown.js CHANGED Viewed

@@ -8,6 +8,9 @@ const AUTO_RESPONSE_SCHEMA = z.object({
     format: z.enum(["md", "txt"]),
     content: z.string().min(1)
 });
+const RATE_LIMIT_MAX_RETRIES = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_RETRIES", 8);
+const RATE_LIMIT_BASE_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_BASE_DELAY_MS", 2_000);
+const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELAY_MS", 120_000);
 export async function convertPdf(options) {
     const inputPath = resolve(options.inputPath);
     await access(inputPath);
@@ -16,17 +19,17 @@ export async function convertPdf(options) {
         throw new Error("OPENAI_API_KEY is not set.");
     }
     const client = new OpenAI({ apiKey });
-    const uploaded = await client.files.create({
+    const uploaded = await withRateLimitRetry("file upload", () => client.files.create({
         file: createReadStream(inputPath),
         purpose: "user_data"
-    });
+    }));
     const agent = new Agent({
         name: "PDF Converter",
         instructions: "You convert PDF files precisely according to the requested output format.",
         model: options.model
     });
     const promptText = buildPromptText(options);
-    const result = await run(agent, [
+    const result = await withRateLimitRetry("model run", () => run(agent, [
         {
             role: "user",
             content: [
@@ -40,7 +43,7 @@ export async function convertPdf(options) {
                 }
             ]
         }
-    ]);
+    ]));
     const rawOutput = (result.finalOutput ?? "").trim();
     if (!rawOutput) {
         throw new Error("No content returned by the API.");
@@ -142,3 +145,188 @@ function parseAutoResponse(rawOutput) {
     }
     return { format: validated.data.format, content };
 }
+async function withRateLimitRetry(operationName, operation) {
+    let attempt = 0;
+    while (true) {
+        try {
+            return await operation();
+        }
+        catch (error) {
+            if (!isRetriableRateLimitError(error) || attempt >= RATE_LIMIT_MAX_RETRIES) {
+                throw error;
+            }
+            const retryAfterMs = getRetryAfterMs(error);
+            const exponentialBackoffMs = RATE_LIMIT_BASE_DELAY_MS * (2 ** attempt);
+            const jitterMs = Math.floor(Math.random() * 750);
+            const computedDelayMs = retryAfterMs ?? (exponentialBackoffMs + jitterMs);
+            const waitMs = clampDelayMs(computedDelayMs, RATE_LIMIT_MAX_DELAY_MS);
+            const nextAttempt = attempt + 2;
+            const totalAttempts = RATE_LIMIT_MAX_RETRIES + 1;
+            const reason = extractErrorMessage(error);
+            console.warn(`[retry] ${operationName} hit OpenAI rate limits. Waiting ${formatDelay(waitMs)} before retry ${nextAttempt}/${totalAttempts}. ${reason}`);
+            await sleep(waitMs);
+            attempt += 1;
+        }
+    }
+}
+function isRetriableRateLimitError(error) {
+    if (typeof error !== "object" || error === null) {
+        return false;
+    }
+    const candidate = error;
+    if (candidate.status === 429) {
+        const code = typeof candidate.code === "string" ? candidate.code : undefined;
+        const nestedCode = typeof candidate.error?.code === "string" ? candidate.error.code : undefined;
+        if (code === "insufficient_quota" || nestedCode === "insufficient_quota") {
+            return false;
+        }
+        return true;
+    }
+    const searchableText = [
+        toLowerCaseIfString(candidate.code),
+        toLowerCaseIfString(candidate.type),
+        toLowerCaseIfString(candidate.error?.code),
+        toLowerCaseIfString(candidate.error?.type),
+        toLowerCaseIfString(candidate.message),
+        toLowerCaseIfString(candidate.error?.message)
+    ]
+        .filter(Boolean)
+        .join(" ");
+    if (searchableText.includes("insufficient_quota")) {
+        return false;
+    }
+    return (searchableText.includes("rate_limit") ||
+        searchableText.includes("rate limit") ||
+        searchableText.includes("too many requests"));
+}
+function getRetryAfterMs(error) {
+    const headerDelay = getRetryAfterMsFromHeaders(error);
+    if (typeof headerDelay === "number" && Number.isFinite(headerDelay) && headerDelay >= 0) {
+        return headerDelay;
+    }
+    const textDelay = getRetryAfterMsFromText(extractErrorMessage(error));
+    if (typeof textDelay === "number" && Number.isFinite(textDelay) && textDelay >= 0) {
+        return textDelay;
+    }
+    return undefined;
+}
+function getRetryAfterMsFromHeaders(error) {
+    if (typeof error !== "object" || error === null) {
+        return undefined;
+    }
+    const candidate = error;
+    const retryAfterMsHeader = readHeader(candidate.headers, "retry-after-ms")
+        ?? readHeader(candidate.response?.headers, "retry-after-ms");
+    if (retryAfterMsHeader) {
+        const milliseconds = Number.parseInt(retryAfterMsHeader, 10);
+        if (Number.isFinite(milliseconds) && milliseconds >= 0) {
+            return milliseconds;
+        }
+    }
+    const retryAfterHeader = readHeader(candidate.headers, "retry-after")
+        ?? readHeader(candidate.response?.headers, "retry-after");
+    if (!retryAfterHeader) {
+        return undefined;
+    }
+    const seconds = Number.parseFloat(retryAfterHeader);
+    if (Number.isFinite(seconds)) {
+        return Math.max(0, Math.round(seconds * 1_000));
+    }
+    const parsedDate = Date.parse(retryAfterHeader);
+    if (Number.isFinite(parsedDate)) {
+        return Math.max(0, parsedDate - Date.now());
+    }
+    return undefined;
+}
+function getRetryAfterMsFromText(message) {
+    const match = message.match(/(?:try again in|retry after)\s*([0-9]+(?:\.[0-9]+)?)\s*(ms|msec|millisecond|milliseconds|s|sec|second|seconds|m|min|minute|minutes)?/i);
+    if (!match) {
+        return undefined;
+    }
+    const rawValue = Number.parseFloat(match[1] ?? "");
+    if (!Number.isFinite(rawValue) || rawValue < 0) {
+        return undefined;
+    }
+    const unit = (match[2] ?? "s").toLowerCase();
+    if (unit === "ms" || unit === "msec" || unit === "millisecond" || unit === "milliseconds") {
+        return Math.round(rawValue);
+    }
+    if (unit === "m" || unit === "min" || unit === "minute" || unit === "minutes") {
+        return Math.round(rawValue * 60_000);
+    }
+    return Math.round(rawValue * 1_000);
+}
+function readHeader(headersLike, headerName) {
+    if (!headersLike) {
+        return undefined;
+    }
+    if (typeof headersLike === "object"
+        && "get" in headersLike
+        && typeof headersLike.get === "function") {
+        const value = headersLike.get(headerName);
+        return value ?? undefined;
+    }
+    if (typeof headersLike !== "object") {
+        return undefined;
+    }
+    const headersRecord = headersLike;
+    const lowerTarget = headerName.toLowerCase();
+    for (const [key, value] of Object.entries(headersRecord)) {
+        if (key.toLowerCase() !== lowerTarget) {
+            continue;
+        }
+        if (typeof value === "string") {
+            return value;
+        }
+        if (Array.isArray(value)) {
+            const first = value.find((entry) => typeof entry === "string");
+            return typeof first === "string" ? first : undefined;
+        }
+    }
+    return undefined;
+}
+function parsePositiveIntEnv(name, fallback) {
+    const raw = process.env[name];
+    if (!raw) {
+        return fallback;
+    }
+    const parsed = Number.parseInt(raw, 10);
+    if (!Number.isFinite(parsed) || parsed < 0) {
+        return fallback;
+    }
+    return parsed;
+}
+function clampDelayMs(value, max) {
+    return Math.max(250, Math.min(Math.round(value), max));
+}
+function formatDelay(milliseconds) {
+    if (milliseconds < 1_000) {
+        return `${milliseconds}ms`;
+    }
+    const seconds = milliseconds / 1_000;
+    return `${seconds.toFixed(seconds >= 10 ? 0 : 1)}s`;
+}
+function extractErrorMessage(error) {
+    if (error instanceof Error && error.message.trim().length > 0) {
+        return error.message;
+    }
+    if (typeof error === "object" && error !== null) {
+        const message = error.message;
+        if (typeof message === "string" && message.trim().length > 0) {
+            return message;
+        }
+        const nestedMessage = error.error?.message;
+        if (typeof nestedMessage === "string" && nestedMessage.trim().length > 0) {
+            return nestedMessage;
+        }
+    }
+    return String(error);
+}
+function toLowerCaseIfString(value) {
+    return typeof value === "string" ? value.toLowerCase() : "";
+}
+function sleep(milliseconds) {
+    return new Promise((resolveSleep) => {
+        setTimeout(resolveSleep, milliseconds);
+    });
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@robin7331/papyrus-cli",
-  "version": "0.1.5",
+  "version": "0.1.7",
   "private": false,
   "description": "Convert PDF to markdown or text with the OpenAI Agents SDK",
   "repository": {
@@ -37,6 +37,7 @@
   },
   "dependencies": {
     "@openai/agents": "^0.5.3",
+    "@robin7331/papyrus-cli": "^0.1.4",
     "commander": "^14.0.0",
     "dotenv": "^17.3.1",
     "openai": "^6.7.0",

package/src/cli.ts CHANGED Viewed

@@ -14,6 +14,7 @@ import {
 } from "./config.js";
 import {
   convertPdf,
+  type ConversionMode,
   type ConvertUsage
 } from "./openaiPdfToMarkdown.js";
 import {
@@ -23,7 +24,6 @@ import {
   looksLikeFileOutput,
   parseConcurrency,
   parseFormat,
-  parseMode,
   resolveFolderOutputPath,
   truncate,
   type CliOptions,
@@ -52,14 +52,13 @@ program
     parseConcurrency
   )
   .option("-y, --yes", "Skip confirmation prompt in folder mode")
-  .option("--mode <mode>", "Conversion mode: auto or prompt", parseMode, "auto")
   .option("--format <format>", "Output format override: md or txt", parseFormat)
   .option(
     "--instructions <text>",
-    "Additional conversion instructions for auto mode"
+    "Additional conversion instructions (only when not using --prompt/--prompt-file)"
   )
-  .option("--prompt <text>", "Custom prompt text for prompt mode")
-  .option("--prompt-file <path>", "Path to file containing prompt text for prompt mode")
+  .option("--prompt <text>", "Custom prompt text (enables prompt mode)")
+  .option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
   .action(async (input: string, options: CliOptions) => {
     const inputPath = resolve(input);
     const startedAt = Date.now();
@@ -68,13 +67,14 @@ program
       validateOptionCombination(options);
       const promptText = await resolvePromptText(options);
+      const conversionMode = resolveConversionMode(promptText);
       const inputKind = await detectInputKind(inputPath);
       let usageTotals: ConvertUsage = emptyUsage();
       if (inputKind === "file") {
-        usageTotals = await processSingleFile(inputPath, options, promptText);
+        usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
       } else {
-        const summary = await processFolder(inputPath, options, promptText);
+        const summary = await processFolder(inputPath, options, conversionMode, promptText);
         usageTotals = summary.usage;
         if (!summary.cancelled && summary.failed > 0) {
           process.exitCode = 1;
@@ -157,6 +157,7 @@ program.parseAsync(process.argv).catch((error: unknown) => {
 async function processSingleFile(
   inputPath: string,
   options: CliOptions,
+  mode: ConversionMode,
   promptText?: string
 ): Promise<ConvertUsage> {
   if (!isPdfPath(inputPath)) {
@@ -164,20 +165,66 @@ async function processSingleFile(
   }
   await ensureApiKey();
-  const result = await convertPdf({
-    inputPath,
-    model: options.model,
-    mode: options.mode,
-    format: options.format,
-    instructions: options.instructions,
-    promptText
-  });
+  const startedAt = Date.now();
+  const displayInput = relative(process.cwd(), inputPath) || inputPath;
+  const workerDashboard = process.stdout.isTTY
+    ? new AsciiWorkerDashboard(1, 1)
+    : null;
+  workerDashboard?.setSummary(0, 0);
+  workerDashboard?.setWorkerRunning(0, displayInput);
+  if (!workerDashboard) {
+    console.log(`[worker-1] Running ${displayInput}`);
+  }
+  try {
+    const result = await convertPdf({
+      inputPath,
+      model: options.model,
+      mode,
+      format: options.format,
+      instructions: options.instructions,
+      promptText
+    });
+    const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
+    await mkdir(dirname(outputPath), { recursive: true });
+    await writeFile(outputPath, result.content, "utf8");
+    if (workerDashboard) {
+      workerDashboard.setWorkerDone(
+        0,
+        displayInput,
+        `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`
+      );
+      workerDashboard.setSummary(1, 0);
+    } else {
+      console.log(
+        `[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`
+      );
+    }
-  const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
-  await mkdir(dirname(outputPath), { recursive: true });
-  await writeFile(outputPath, result.content, "utf8");
-  console.log(`Output (${result.format}) written to: ${outputPath}`);
-  return result.usage;
+    console.log(`Output (${result.format}) written to: ${outputPath}`);
+    return result.usage;
+  } catch (error) {
+    const message = error instanceof Error ? error.message : String(error);
+    if (workerDashboard) {
+      workerDashboard.setWorkerFailed(
+        0,
+        displayInput,
+        `${truncate(message, 42)} (${formatDurationMs(Date.now() - startedAt)})`
+      );
+      workerDashboard.setSummary(1, 1);
+    } else {
+      console.error(
+        `[worker-1] Failed ${displayInput}: ${message} (${formatDurationMs(Date.now() - startedAt)})`
+      );
+    }
+    throw error;
+  } finally {
+    workerDashboard?.stop();
+  }
 }
 type FolderSummary = {
@@ -191,6 +238,7 @@ type FolderSummary = {
 async function processFolder(
   inputDir: string,
   options: CliOptions,
+  mode: ConversionMode,
   promptText?: string
 ): Promise<FolderSummary> {
   if (options.output && looksLikeFileOutput(options.output)) {
@@ -236,7 +284,7 @@ async function processFolder(
         const result = await convertPdf({
           inputPath: filePath,
           model: options.model,
-          mode: options.mode,
+          mode,
           format: options.format,
           instructions: options.instructions,
           promptText
@@ -301,10 +349,6 @@ async function processFolder(
 }
 async function resolvePromptText(options: CliOptions): Promise<string | undefined> {
-  if (options.mode !== "prompt") {
-    return undefined;
-  }
   if (options.prompt) {
     const prompt = options.prompt.trim();
     if (!prompt) {
@@ -327,6 +371,10 @@ async function resolvePromptText(options: CliOptions): Promise<string | undefine
   return promptFromFile;
 }
+function resolveConversionMode(promptText: string | undefined): ConversionMode {
+  return promptText ? "prompt" : "auto";
+}
 async function handleConfigInit(options: ConfigInitOptions): Promise<void> {
   const existingKey = await getStoredApiKey();
   if (existingKey && !options.force) {

package/src/cliHelpers.ts CHANGED Viewed

@@ -1,27 +1,18 @@
 import { InvalidArgumentError } from "commander";
 import { basename, dirname, extname, join, relative } from "node:path";
-import { type ConversionMode, type OutputFormat } from "./openaiPdfToMarkdown.js";
+import { type OutputFormat } from "./openaiPdfToMarkdown.js";
 export type CliOptions = {
   output?: string;
   model: string;
   concurrency?: number;
   yes?: boolean;
-  mode: ConversionMode;
   format?: OutputFormat;
   instructions?: string;
   prompt?: string;
   promptFile?: string;
 };
-export function parseMode(value: string): ConversionMode {
-  if (value === "auto" || value === "prompt") {
-    return value;
-  }
-  throw new InvalidArgumentError("Mode must be either 'auto' or 'prompt'.");
-}
 export function parseFormat(value: string): OutputFormat {
   if (value === "md" || value === "txt") {
     return value;
@@ -40,21 +31,13 @@ export function parseConcurrency(value: string): number {
 }
 export function validateOptionCombination(options: CliOptions): void {
-  if (options.mode === "prompt") {
-    const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
-    if (promptSourceCount !== 1) {
-      throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
-    }
-    if (options.instructions) {
-      throw new Error("--instructions is only supported in auto mode.");
-    }
-    return;
+  const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
+  if (promptSourceCount > 1) {
+    throw new Error("Use exactly one of --prompt or --prompt-file.");
   }
-  if (options.prompt || options.promptFile) {
-    throw new Error("--prompt and --prompt-file are only supported in prompt mode.");
+  if (promptSourceCount === 1 && options.instructions) {
+    throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
   }
 }

package/src/openaiPdfToMarkdown.ts CHANGED Viewed

@@ -35,6 +35,10 @@ const AUTO_RESPONSE_SCHEMA = z.object({
   content: z.string().min(1)
 });
+const RATE_LIMIT_MAX_RETRIES = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_RETRIES", 8);
+const RATE_LIMIT_BASE_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_BASE_DELAY_MS", 2_000);
+const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELAY_MS", 120_000);
 export async function convertPdf(options: ConvertOptions): Promise<ConvertResult> {
   const inputPath = resolve(options.inputPath);
   await access(inputPath);
@@ -46,10 +50,12 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
   const client = new OpenAI({ apiKey });
-  const uploaded = await client.files.create({
-    file: createReadStream(inputPath),
-    purpose: "user_data"
-  });
+  const uploaded = await withRateLimitRetry("file upload", () =>
+    client.files.create({
+      file: createReadStream(inputPath),
+      purpose: "user_data"
+    })
+  );
   const agent = new Agent({
     name: "PDF Converter",
@@ -58,21 +64,23 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
   });
   const promptText = buildPromptText(options);
-  const result = await run(agent, [
-    {
-      role: "user",
-      content: [
-        {
-          type: "input_text",
-          text: promptText
-        },
-        {
-          type: "input_file",
-          file: { id: uploaded.id }
-        }
-      ]
-    }
-  ]);
+  const result = await withRateLimitRetry("model run", () =>
+    run(agent, [
+      {
+        role: "user",
+        content: [
+          {
+            type: "input_text",
+            text: promptText
+          },
+          {
+            type: "input_file",
+            file: { id: uploaded.id }
+          }
+        ]
+      }
+    ])
+  );
   const rawOutput = (result.finalOutput ?? "").trim();
   if (!rawOutput) {
@@ -201,3 +209,249 @@ function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
   return { format: validated.data.format, content };
 }
+async function withRateLimitRetry<T>(operationName: string, operation: () => Promise<T>): Promise<T> {
+  let attempt = 0;
+  while (true) {
+    try {
+      return await operation();
+    } catch (error) {
+      if (!isRetriableRateLimitError(error) || attempt >= RATE_LIMIT_MAX_RETRIES) {
+        throw error;
+      }
+      const retryAfterMs = getRetryAfterMs(error);
+      const exponentialBackoffMs = RATE_LIMIT_BASE_DELAY_MS * (2 ** attempt);
+      const jitterMs = Math.floor(Math.random() * 750);
+      const computedDelayMs = retryAfterMs ?? (exponentialBackoffMs + jitterMs);
+      const waitMs = clampDelayMs(computedDelayMs, RATE_LIMIT_MAX_DELAY_MS);
+      const nextAttempt = attempt + 2;
+      const totalAttempts = RATE_LIMIT_MAX_RETRIES + 1;
+      const reason = extractErrorMessage(error);
+      console.warn(
+        `[retry] ${operationName} hit OpenAI rate limits. Waiting ${formatDelay(waitMs)} before retry ${nextAttempt}/${totalAttempts}. ${reason}`
+      );
+      await sleep(waitMs);
+      attempt += 1;
+    }
+  }
+}
+function isRetriableRateLimitError(error: unknown): boolean {
+  if (typeof error !== "object" || error === null) {
+    return false;
+  }
+  const candidate = error as {
+    status?: unknown;
+    code?: unknown;
+    type?: unknown;
+    error?: { code?: unknown; type?: unknown; message?: unknown };
+    message?: unknown;
+  };
+  if (candidate.status === 429) {
+    const code = typeof candidate.code === "string" ? candidate.code : undefined;
+    const nestedCode = typeof candidate.error?.code === "string" ? candidate.error.code : undefined;
+    if (code === "insufficient_quota" || nestedCode === "insufficient_quota") {
+      return false;
+    }
+    return true;
+  }
+  const searchableText = [
+    toLowerCaseIfString(candidate.code),
+    toLowerCaseIfString(candidate.type),
+    toLowerCaseIfString(candidate.error?.code),
+    toLowerCaseIfString(candidate.error?.type),
+    toLowerCaseIfString(candidate.message),
+    toLowerCaseIfString(candidate.error?.message)
+  ]
+    .filter(Boolean)
+    .join(" ");
+  if (searchableText.includes("insufficient_quota")) {
+    return false;
+  }
+  return (
+    searchableText.includes("rate_limit") ||
+    searchableText.includes("rate limit") ||
+    searchableText.includes("too many requests")
+  );
+}
+function getRetryAfterMs(error: unknown): number | undefined {
+  const headerDelay = getRetryAfterMsFromHeaders(error);
+  if (typeof headerDelay === "number" && Number.isFinite(headerDelay) && headerDelay >= 0) {
+    return headerDelay;
+  }
+  const textDelay = getRetryAfterMsFromText(extractErrorMessage(error));
+  if (typeof textDelay === "number" && Number.isFinite(textDelay) && textDelay >= 0) {
+    return textDelay;
+  }
+  return undefined;
+}
+function getRetryAfterMsFromHeaders(error: unknown): number | undefined {
+  if (typeof error !== "object" || error === null) {
+    return undefined;
+  }
+  const candidate = error as {
+    headers?: unknown;
+    response?: { headers?: unknown };
+  };
+  const retryAfterMsHeader = readHeader(candidate.headers, "retry-after-ms")
+    ?? readHeader(candidate.response?.headers, "retry-after-ms");
+  if (retryAfterMsHeader) {
+    const milliseconds = Number.parseInt(retryAfterMsHeader, 10);
+    if (Number.isFinite(milliseconds) && milliseconds >= 0) {
+      return milliseconds;
+    }
+  }
+  const retryAfterHeader = readHeader(candidate.headers, "retry-after")
+    ?? readHeader(candidate.response?.headers, "retry-after");
+  if (!retryAfterHeader) {
+    return undefined;
+  }
+  const seconds = Number.parseFloat(retryAfterHeader);
+  if (Number.isFinite(seconds)) {
+    return Math.max(0, Math.round(seconds * 1_000));
+  }
+  const parsedDate = Date.parse(retryAfterHeader);
+  if (Number.isFinite(parsedDate)) {
+    return Math.max(0, parsedDate - Date.now());
+  }
+  return undefined;
+}
+function getRetryAfterMsFromText(message: string): number | undefined {
+  const match = message.match(
+    /(?:try again in|retry after)\s*([0-9]+(?:\.[0-9]+)?)\s*(ms|msec|millisecond|milliseconds|s|sec|second|seconds|m|min|minute|minutes)?/i
+  );
+  if (!match) {
+    return undefined;
+  }
+  const rawValue = Number.parseFloat(match[1] ?? "");
+  if (!Number.isFinite(rawValue) || rawValue < 0) {
+    return undefined;
+  }
+  const unit = (match[2] ?? "s").toLowerCase();
+  if (unit === "ms" || unit === "msec" || unit === "millisecond" || unit === "milliseconds") {
+    return Math.round(rawValue);
+  }
+  if (unit === "m" || unit === "min" || unit === "minute" || unit === "minutes") {
+    return Math.round(rawValue * 60_000);
+  }
+  return Math.round(rawValue * 1_000);
+}
+function readHeader(headersLike: unknown, headerName: string): string | undefined {
+  if (!headersLike) {
+    return undefined;
+  }
+  if (
+    typeof headersLike === "object"
+    && "get" in headersLike
+    && typeof (headersLike as { get?: unknown }).get === "function"
+  ) {
+    const value = (headersLike as { get: (name: string) => string | null }).get(headerName);
+    return value ?? undefined;
+  }
+  if (typeof headersLike !== "object") {
+    return undefined;
+  }
+  const headersRecord = headersLike as Record<string, unknown>;
+  const lowerTarget = headerName.toLowerCase();
+  for (const [key, value] of Object.entries(headersRecord)) {
+    if (key.toLowerCase() !== lowerTarget) {
+      continue;
+    }
+    if (typeof value === "string") {
+      return value;
+    }
+    if (Array.isArray(value)) {
+      const first = value.find((entry) => typeof entry === "string");
+      return typeof first === "string" ? first : undefined;
+    }
+  }
+  return undefined;
+}
+function parsePositiveIntEnv(name: string, fallback: number): number {
+  const raw = process.env[name];
+  if (!raw) {
+    return fallback;
+  }
+  const parsed = Number.parseInt(raw, 10);
+  if (!Number.isFinite(parsed) || parsed < 0) {
+    return fallback;
+  }
+  return parsed;
+}
+function clampDelayMs(value: number, max: number): number {
+  return Math.max(250, Math.min(Math.round(value), max));
+}
+function formatDelay(milliseconds: number): string {
+  if (milliseconds < 1_000) {
+    return `${milliseconds}ms`;
+  }
+  const seconds = milliseconds / 1_000;
+  return `${seconds.toFixed(seconds >= 10 ? 0 : 1)}s`;
+}
+function extractErrorMessage(error: unknown): string {
+  if (error instanceof Error && error.message.trim().length > 0) {
+    return error.message;
+  }
+  if (typeof error === "object" && error !== null) {
+    const message = (error as { message?: unknown; error?: { message?: unknown } }).message;
+    if (typeof message === "string" && message.trim().length > 0) {
+      return message;
+    }
+    const nestedMessage = (error as { error?: { message?: unknown } }).error?.message;
+    if (typeof nestedMessage === "string" && nestedMessage.trim().length > 0) {
+      return nestedMessage;
+    }
+  }
+  return String(error);
+}
+function toLowerCaseIfString(value: unknown): string {
+  return typeof value === "string" ? value.toLowerCase() : "";
+}
+function sleep(milliseconds: number): Promise<void> {
+  return new Promise((resolveSleep) => {
+    setTimeout(resolveSleep, milliseconds);
+  });
+}

package/test/cliHelpers.test.ts CHANGED Viewed

@@ -8,22 +8,12 @@ import {
   looksLikeFileOutput,
   parseConcurrency,
   parseFormat,
-  parseMode,
   resolveFolderOutputPath,
   truncate,
   validateOptionCombination,
   type CliOptions
 } from "../src/cliHelpers.js";
-test("parseMode accepts valid values", () => {
-  assert.equal(parseMode("auto"), "auto");
-  assert.equal(parseMode("prompt"), "prompt");
-});
-test("parseMode rejects invalid values", () => {
-  assert.throws(() => parseMode("invalid"), InvalidArgumentError);
-});
 test("parseFormat accepts valid values", () => {
   assert.equal(parseFormat("md"), "md");
   assert.equal(parseFormat("txt"), "txt");
@@ -45,42 +35,40 @@ test("parseConcurrency rejects invalid values", () => {
   assert.throws(() => parseConcurrency("abc"), InvalidArgumentError);
 });
-test("validateOptionCombination enforces prompt mode requirements", () => {
+test("validateOptionCombination allows default auto behavior without prompt flags", () => {
   const base: CliOptions = {
-    model: "gpt-4o-mini",
-    mode: "prompt"
+    model: "gpt-4o-mini"
   };
-  assert.throws(
-    () => validateOptionCombination(base),
-    /Prompt mode requires exactly one of --prompt or --prompt-file\./
-  );
-  assert.doesNotThrow(() => validateOptionCombination({ ...base, prompt: "Convert this" }));
+  assert.doesNotThrow(() => validateOptionCombination(base));
+  assert.doesNotThrow(() => validateOptionCombination({ ...base, instructions: "Extra formatting rules" }));
+});
+test("validateOptionCombination treats --prompt and --prompt-file as mutually exclusive", () => {
+  const base: CliOptions = {
+    model: "gpt-4o-mini"
+  };
+  assert.doesNotThrow(() => validateOptionCombination({ ...base, prompt: "Convert" }));
   assert.doesNotThrow(() => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }));
   assert.throws(
     () => validateOptionCombination({ ...base, prompt: "x", promptFile: "./prompt.txt" }),
-    /Prompt mode requires exactly one of --prompt or --prompt-file\./
-  );
-  assert.throws(
-    () => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
-    /--instructions is only supported in auto mode\./
+    /Use exactly one of --prompt or --prompt-file\./
   );
 });
-test("validateOptionCombination rejects prompt flags in auto mode", () => {
+test("validateOptionCombination rejects --instructions with prompt flags", () => {
   const base: CliOptions = {
-    model: "gpt-4o-mini",
-    mode: "auto"
+    model: "gpt-4o-mini"
   };
-  assert.doesNotThrow(() => validateOptionCombination(base));
   assert.throws(
-    () => validateOptionCombination({ ...base, prompt: "Convert" }),
-    /--prompt and --prompt-file are only supported in prompt mode\./
+    () => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
+    /--instructions cannot be combined with --prompt or --prompt-file\./
   );
   assert.throws(
-    () => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }),
-    /--prompt and --prompt-file are only supported in prompt mode\./
+    () => validateOptionCombination({ ...base, promptFile: "./prompt.txt", instructions: "Extra" }),
+    /--instructions cannot be combined with --prompt or --prompt-file\./
   );
 });