@robin7331/papyrus-cli 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,7 +30,7 @@ papyrus --version
30
30
  # Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
31
31
  papyrus ./path/to/input.pdf
32
32
 
33
- # Single file with explicit format/output/model
33
+ # Single file with explicit output extension/output/model
34
34
  papyrus ./path/to/input.pdf --format md --output ./out/result.md --model gpt-4o-mini
35
35
 
36
36
  # Default conversion with extra instructions
@@ -110,14 +110,14 @@ papyrus --version
110
110
 
111
111
  ### `--format <format>`
112
112
 
113
- Output format override:
114
- - `md` for GitHub-flavored Markdown
115
- - `txt` for plain text
113
+ Output file extension override. Any extension is allowed (for example `md`, `txt`, `csv`, `json`).
114
+ This flag controls the output filename extension only.
115
+ When provided, Papyrus also passes the extension as a guidance hint to the model.
116
116
 
117
117
  Example:
118
118
 
119
119
  ```bash
120
- papyrus ./docs/invoice.pdf --format md
120
+ papyrus ./docs/invoice.pdf --format csv
121
121
  ```
122
122
 
123
123
  ### `-o, --output <path>`
@@ -194,8 +194,10 @@ papyrus ./docs --yes
194
194
 
195
195
  ## Notes
196
196
 
197
- - In default conversion (without `--prompt`/`--prompt-file`) and without `--format`, the model returns structured JSON with `format` + `content`.
198
- - Single-file input now also shows a live worker lane (spinner in TTY) while conversion is running.
197
+ - In default conversion (without `--prompt`/`--prompt-file`), the model returns structured JSON with `format` + `content`.
198
+ - Without `--format`, output extension follows model-selected content format (`.md` or `.txt`).
199
+ - With `--format`, only the output extension changes.
200
+ - Single-file input now also shows a live worker lane in TTY while conversion is running.
199
201
  - Folder input is scanned recursively for `.pdf` files and processed in parallel.
200
202
  - In folder mode, `--output` must be a directory path and mirrored subfolders are preserved.
201
203
  - OpenAI rate-limit (`429`) responses are retried automatically using `Retry-After` (when present) plus exponential backoff.
package/dist/cli.js CHANGED
@@ -20,7 +20,7 @@ program
20
20
  .option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
21
21
  .option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
22
22
  .option("-y, --yes", "Skip confirmation prompt in folder mode")
23
- .option("--format <format>", "Output format override: md or txt", parseFormat)
23
+ .option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
24
24
  .option("--instructions <text>", "Additional conversion instructions (only when not using --prompt/--prompt-file)")
25
25
  .option("--prompt <text>", "Custom prompt text (enables prompt mode)")
26
26
  .option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
@@ -132,21 +132,22 @@ async function processSingleFile(inputPath, options, mode, promptText) {
132
132
  inputPath,
133
133
  model: options.model,
134
134
  mode,
135
- format: options.format,
136
135
  instructions: options.instructions,
137
- promptText
136
+ promptText,
137
+ outputExtensionHint: options.format
138
138
  });
139
- const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
139
+ const outputExtension = options.format ?? result.format;
140
+ const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, outputExtension));
140
141
  await mkdir(dirname(outputPath), { recursive: true });
141
142
  await writeFile(outputPath, result.content, "utf8");
142
143
  if (workerDashboard) {
143
- workerDashboard.setWorkerDone(0, displayInput, `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`);
144
+ workerDashboard.setWorkerDone(0, displayInput, `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`);
144
145
  workerDashboard.setSummary(1, 0);
145
146
  }
146
147
  else {
147
- console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`);
148
+ console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`);
148
149
  }
149
- console.log(`Output (${result.format}) written to: ${outputPath}`);
150
+ console.log(`Output (.${outputExtension}) written to: ${outputPath}`);
150
151
  return result.usage;
151
152
  }
152
153
  catch (error) {
@@ -166,7 +167,7 @@ async function processSingleFile(inputPath, options, mode, promptText) {
166
167
  }
167
168
  async function processFolder(inputDir, options, mode, promptText) {
168
169
  if (options.output && looksLikeFileOutput(options.output)) {
169
- throw new Error("In folder mode, --output must be a directory path (not a .md/.txt file path).");
170
+ throw new Error("In folder mode, --output must be a directory path.");
170
171
  }
171
172
  const files = await collectPdfFiles(inputDir);
172
173
  if (files.length === 0) {
@@ -201,20 +202,21 @@ async function processFolder(inputDir, options, mode, promptText) {
201
202
  inputPath: filePath,
202
203
  model: options.model,
203
204
  mode,
204
- format: options.format,
205
205
  instructions: options.instructions,
206
- promptText
206
+ promptText,
207
+ outputExtensionHint: options.format
207
208
  });
208
- const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, result.format);
209
+ const outputExtension = options.format ?? result.format;
210
+ const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, outputExtension);
209
211
  await mkdir(dirname(outputPath), { recursive: true });
210
212
  await writeFile(outputPath, result.content, "utf8");
211
213
  succeeded += 1;
212
214
  mergeUsage(usage, result.usage);
213
215
  if (workerDashboard) {
214
- workerDashboard.setWorkerDone(workerId, relativeInput, `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`);
216
+ workerDashboard.setWorkerDone(workerId, relativeInput, `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`);
215
217
  }
216
218
  else {
217
- console.log(`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`);
219
+ console.log(`[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`);
218
220
  }
219
221
  }
220
222
  catch (error) {
@@ -431,12 +433,10 @@ async function runWithConcurrency(items, concurrency, worker) {
431
433
  });
432
434
  await Promise.all(workers);
433
435
  }
434
- const SPINNER_FRAMES = ["-", "\\", "|", "/"];
435
436
  class AsciiWorkerDashboard {
436
437
  lanes;
437
438
  total;
438
439
  workerCount;
439
- spinnerTimer;
440
440
  completed = 0;
441
441
  failed = 0;
442
442
  renderedLineCount = 0;
@@ -444,15 +444,10 @@ class AsciiWorkerDashboard {
444
444
  this.total = total;
445
445
  this.workerCount = workerCount;
446
446
  this.lanes = Array.from({ length: workerCount }, () => ({
447
- state: "idle",
448
- spinnerFrame: 0
447
+ state: "idle"
449
448
  }));
450
449
  process.stdout.write("\x1b[?25l");
451
450
  this.render();
452
- this.spinnerTimer = setInterval(() => {
453
- this.tickSpinners();
454
- this.render();
455
- }, 100);
456
451
  }
457
452
  setSummary(completed, failed) {
458
453
  this.completed = completed;
@@ -466,7 +461,7 @@ class AsciiWorkerDashboard {
466
461
  }
467
462
  lane.state = "running";
468
463
  lane.file = file;
469
- lane.message = "processing";
464
+ lane.message = "processing...";
470
465
  this.render();
471
466
  }
472
467
  setWorkerDone(workerId, file, message) {
@@ -490,7 +485,6 @@ class AsciiWorkerDashboard {
490
485
  this.render();
491
486
  }
492
487
  stop() {
493
- clearInterval(this.spinnerTimer);
494
488
  this.render();
495
489
  process.stdout.write("\x1b[?25h");
496
490
  }
@@ -519,17 +513,9 @@ class AsciiWorkerDashboard {
519
513
  }
520
514
  return lines;
521
515
  }
522
- tickSpinners() {
523
- for (const lane of this.lanes) {
524
- if (lane.state !== "running") {
525
- continue;
526
- }
527
- lane.spinnerFrame = (lane.spinnerFrame + 1) % SPINNER_FRAMES.length;
528
- }
529
- }
530
516
  renderIcon(lane) {
531
517
  if (lane.state === "running") {
532
- return SPINNER_FRAMES[lane.spinnerFrame];
518
+ return ">>";
533
519
  }
534
520
  if (lane.state === "done") {
535
521
  return "OK";
@@ -1,19 +1,18 @@
1
- import { type OutputFormat } from "./openaiPdfToMarkdown.js";
2
1
  export type CliOptions = {
3
2
  output?: string;
4
3
  model: string;
5
4
  concurrency?: number;
6
5
  yes?: boolean;
7
- format?: OutputFormat;
6
+ format?: string;
8
7
  instructions?: string;
9
8
  prompt?: string;
10
9
  promptFile?: string;
11
10
  };
12
- export declare function parseFormat(value: string): OutputFormat;
11
+ export declare function parseFormat(value: string): string;
13
12
  export declare function parseConcurrency(value: string): number;
14
13
  export declare function validateOptionCombination(options: CliOptions): void;
15
- export declare function defaultOutputPath(inputPath: string, format: OutputFormat): string;
16
- export declare function resolveFolderOutputPath(inputPath: string, inputRoot: string, outputRoot: string | undefined, format: OutputFormat): string;
14
+ export declare function defaultOutputPath(inputPath: string, extension: string): string;
15
+ export declare function resolveFolderOutputPath(inputPath: string, inputRoot: string, outputRoot: string | undefined, extension: string): string;
17
16
  export declare function isPdfPath(inputPath: string): boolean;
18
17
  export declare function looksLikeFileOutput(outputPath: string): boolean;
19
18
  export declare function truncate(value: string, maxLength: number): string;
@@ -1,10 +1,14 @@
1
1
  import { InvalidArgumentError } from "commander";
2
2
  import { basename, dirname, extname, join, relative } from "node:path";
3
3
  export function parseFormat(value) {
4
- if (value === "md" || value === "txt") {
5
- return value;
4
+ const normalized = value.trim().replace(/^\.+/, "");
5
+ if (!normalized) {
6
+ throw new InvalidArgumentError("Format must be a non-empty file extension.");
7
+ }
8
+ if (normalized.includes("/") || normalized.includes("\\")) {
9
+ throw new InvalidArgumentError("Format must be a file extension, not a path.");
6
10
  }
7
- throw new InvalidArgumentError("Format must be either 'md' or 'txt'.");
11
+ return normalized;
8
12
  }
9
13
  export function parseConcurrency(value) {
10
14
  const parsed = Number(value);
@@ -22,21 +26,22 @@ export function validateOptionCombination(options) {
22
26
  throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
23
27
  }
24
28
  }
25
- export function defaultOutputPath(inputPath, format) {
26
- const extension = format === "md" ? ".md" : ".txt";
29
+ export function defaultOutputPath(inputPath, extension) {
30
+ const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`;
27
31
  if (extname(inputPath).toLowerCase() === ".pdf") {
28
- return inputPath.slice(0, -4) + extension;
32
+ return inputPath.slice(0, -4) + normalizedExtension;
29
33
  }
30
- return inputPath + extension;
34
+ return inputPath + normalizedExtension;
31
35
  }
32
- export function resolveFolderOutputPath(inputPath, inputRoot, outputRoot, format) {
36
+ export function resolveFolderOutputPath(inputPath, inputRoot, outputRoot, extension) {
33
37
  if (!outputRoot) {
34
- return defaultOutputPath(inputPath, format);
38
+ return defaultOutputPath(inputPath, extension);
35
39
  }
36
40
  const relativePath = relative(inputRoot, inputPath);
37
41
  const relativeDir = dirname(relativePath);
38
42
  const base = basename(relativePath, extname(relativePath));
39
- const filename = `${base}.${format}`;
43
+ const normalizedExtension = extension.startsWith(".") ? extension.slice(1) : extension;
44
+ const filename = `${base}.${normalizedExtension}`;
40
45
  if (relativeDir === ".") {
41
46
  return join(outputRoot, filename);
42
47
  }
@@ -2,9 +2,9 @@ export type ConvertOptions = {
2
2
  inputPath: string;
3
3
  model: string;
4
4
  mode: ConversionMode;
5
- format?: OutputFormat;
6
5
  instructions?: string;
7
6
  promptText?: string;
7
+ outputExtensionHint?: string;
8
8
  };
9
9
  export type ConversionMode = "auto" | "prompt";
10
10
  export type OutputFormat = "md" | "txt";
@@ -54,13 +54,13 @@ export async function convertPdf(options) {
54
54
  outputTokens: result.state.usage.outputTokens,
55
55
  totalTokens: result.state.usage.totalTokens
56
56
  };
57
- if (options.mode === "auto" && !options.format) {
57
+ if (options.mode === "auto") {
58
58
  return { ...parseAutoResponse(rawOutput), usage };
59
59
  }
60
- const format = options.format ?? "txt";
61
- return { format, content: rawOutput, usage };
60
+ return { format: "txt", content: rawOutput, usage };
62
61
  }
63
62
  function buildPromptText(options) {
63
+ const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
64
64
  if (options.mode === "prompt") {
65
65
  if (!options.promptText) {
66
66
  throw new Error("promptText is required when mode is 'prompt'.");
@@ -70,35 +70,16 @@ function buildPromptText(options) {
70
70
  "Return only the final converted content.",
71
71
  `User prompt:\n${options.promptText}`
72
72
  ];
73
- if (options.format === "md") {
74
- promptModeParts.push("Output format requirement: Return only GitHub-flavored Markdown.");
75
- }
76
- else if (options.format === "txt") {
77
- promptModeParts.push("Output format requirement: Return plain text only and do not use Markdown syntax.");
78
- }
79
- else {
80
- promptModeParts.push("If the prompt does not enforce a format, prefer plain text without Markdown syntax.");
73
+ if (outputExtensionHint) {
74
+ promptModeParts.push([
75
+ `Output file extension hint: .${outputExtensionHint}.`,
76
+ "Prefer content that is practical for saving under this extension.",
77
+ "Treat this as guidance and still follow the user prompt exactly."
78
+ ].join(" "));
81
79
  }
82
80
  return promptModeParts.join("\n\n");
83
81
  }
84
- if (options.format === "md") {
85
- return withAdditionalInstructions([
86
- "Convert this PDF into clean GitHub-flavored Markdown.",
87
- "Preserve headings, paragraphs, lists, and tables.",
88
- "Render tables as Markdown pipe tables with header separators.",
89
- "If cells are empty due to merged cells, keep the table readable and consistent.",
90
- "Return only Markdown without code fences."
91
- ].join(" "), options.instructions);
92
- }
93
- if (options.format === "txt") {
94
- return withAdditionalInstructions([
95
- "Convert this PDF into clean plain text.",
96
- "Preserve reading order and paragraph boundaries.",
97
- "Represent tables in readable plain text (no Markdown syntax).",
98
- "Return plain text only and do not use Markdown syntax or code fences."
99
- ].join(" "), options.instructions);
100
- }
101
- return withAdditionalInstructions([
82
+ let autoPrompt = withAdditionalInstructions([
102
83
  "Decide the best output format for this PDF: Markdown ('md') or plain text ('txt').",
103
84
  "Choose 'md' for documents with meaningful headings, lists, and tables that benefit from Markdown.",
104
85
  "Choose 'txt' for mostly linear text where Markdown adds little value.",
@@ -108,6 +89,14 @@ function buildPromptText(options) {
108
89
  "If format is 'txt', output plain text only and do not use Markdown syntax.",
109
90
  "Do not wrap the JSON in code fences."
110
91
  ].join("\n"), options.instructions);
92
+ if (outputExtensionHint) {
93
+ autoPrompt = `${autoPrompt}\n\n${[
94
+ `Output file extension hint: .${outputExtensionHint}.`,
95
+ "Prefer content that is practical for that extension while still returning JSON with format='md' or 'txt'.",
96
+ "This is guidance only and should not break the required JSON schema."
97
+ ].join(" ")}`;
98
+ }
99
+ return autoPrompt;
111
100
  }
112
101
  function withAdditionalInstructions(base, additional) {
113
102
  if (!additional) {
@@ -115,6 +104,13 @@ function withAdditionalInstructions(base, additional) {
115
104
  }
116
105
  return `${base}\n\nAdditional user instructions:\n${additional}`;
117
106
  }
107
+ function normalizeExtensionHint(extension) {
108
+ if (!extension) {
109
+ return undefined;
110
+ }
111
+ const normalized = extension.trim().replace(/^\.+/, "");
112
+ return normalized || undefined;
113
+ }
118
114
  function parseAutoResponse(rawOutput) {
119
115
  let candidate = rawOutput.trim();
120
116
  const fencedMatch = candidate.match(/```(?:json)?\s*([\s\S]*?)```/i);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@robin7331/papyrus-cli",
3
- "version": "0.1.7",
3
+ "version": "0.1.9",
4
4
  "private": false,
5
5
  "description": "Convert PDF to markdown or text with the OpenAI Agents SDK",
6
6
  "repository": {
package/src/cli.ts CHANGED
@@ -52,7 +52,7 @@ program
52
52
  parseConcurrency
53
53
  )
54
54
  .option("-y, --yes", "Skip confirmation prompt in folder mode")
55
- .option("--format <format>", "Output format override: md or txt", parseFormat)
55
+ .option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
56
56
  .option(
57
57
  "--instructions <text>",
58
58
  "Additional conversion instructions (only when not using --prompt/--prompt-file)"
@@ -182,12 +182,13 @@ async function processSingleFile(
182
182
  inputPath,
183
183
  model: options.model,
184
184
  mode,
185
- format: options.format,
186
185
  instructions: options.instructions,
187
- promptText
186
+ promptText,
187
+ outputExtensionHint: options.format
188
188
  });
189
189
 
190
- const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
190
+ const outputExtension = options.format ?? result.format;
191
+ const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, outputExtension));
191
192
  await mkdir(dirname(outputPath), { recursive: true });
192
193
  await writeFile(outputPath, result.content, "utf8");
193
194
 
@@ -195,16 +196,16 @@ async function processSingleFile(
195
196
  workerDashboard.setWorkerDone(
196
197
  0,
197
198
  displayInput,
198
- `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`
199
+ `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`
199
200
  );
200
201
  workerDashboard.setSummary(1, 0);
201
202
  } else {
202
203
  console.log(
203
- `[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`
204
+ `[worker-1] Done ${displayInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`
204
205
  );
205
206
  }
206
207
 
207
- console.log(`Output (${result.format}) written to: ${outputPath}`);
208
+ console.log(`Output (.${outputExtension}) written to: ${outputPath}`);
208
209
  return result.usage;
209
210
  } catch (error) {
210
211
  const message = error instanceof Error ? error.message : String(error);
@@ -242,9 +243,7 @@ async function processFolder(
242
243
  promptText?: string
243
244
  ): Promise<FolderSummary> {
244
245
  if (options.output && looksLikeFileOutput(options.output)) {
245
- throw new Error(
246
- "In folder mode, --output must be a directory path (not a .md/.txt file path)."
247
- );
246
+ throw new Error("In folder mode, --output must be a directory path.");
248
247
  }
249
248
 
250
249
  const files = await collectPdfFiles(inputDir);
@@ -285,12 +284,13 @@ async function processFolder(
285
284
  inputPath: filePath,
286
285
  model: options.model,
287
286
  mode,
288
- format: options.format,
289
287
  instructions: options.instructions,
290
- promptText
288
+ promptText,
289
+ outputExtensionHint: options.format
291
290
  });
292
291
 
293
- const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, result.format);
292
+ const outputExtension = options.format ?? result.format;
293
+ const outputPath = resolveFolderOutputPath(filePath, inputDir, outputRoot, outputExtension);
294
294
  await mkdir(dirname(outputPath), { recursive: true });
295
295
  await writeFile(outputPath, result.content, "utf8");
296
296
  succeeded += 1;
@@ -300,11 +300,11 @@ async function processFolder(
300
300
  workerDashboard.setWorkerDone(
301
301
  workerId,
302
302
  relativeInput,
303
- `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`
303
+ `${outputExtension} in ${formatDurationMs(Date.now() - startedAt)}`
304
304
  );
305
305
  } else {
306
306
  console.log(
307
- `[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`
307
+ `[worker-${workerId + 1}] Done ${relativeInput} -> ${outputPath} (${outputExtension}, ${formatDurationMs(Date.now() - startedAt)})`
308
308
  );
309
309
  }
310
310
  } catch (error) {
@@ -583,20 +583,16 @@ async function runWithConcurrency<T>(
583
583
  await Promise.all(workers);
584
584
  }
585
585
 
586
- const SPINNER_FRAMES = ["-", "\\", "|", "/"];
587
-
588
586
  type WorkerLane = {
589
587
  state: "idle" | "running" | "done" | "failed";
590
588
  file?: string;
591
589
  message?: string;
592
- spinnerFrame: number;
593
590
  };
594
591
 
595
592
  class AsciiWorkerDashboard {
596
593
  private readonly lanes: WorkerLane[];
597
594
  private readonly total: number;
598
595
  private readonly workerCount: number;
599
- private readonly spinnerTimer: NodeJS.Timeout;
600
596
  private completed = 0;
601
597
  private failed = 0;
602
598
  private renderedLineCount = 0;
@@ -605,16 +601,11 @@ class AsciiWorkerDashboard {
605
601
  this.total = total;
606
602
  this.workerCount = workerCount;
607
603
  this.lanes = Array.from({ length: workerCount }, () => ({
608
- state: "idle",
609
- spinnerFrame: 0
604
+ state: "idle"
610
605
  }));
611
606
 
612
607
  process.stdout.write("\x1b[?25l");
613
608
  this.render();
614
- this.spinnerTimer = setInterval(() => {
615
- this.tickSpinners();
616
- this.render();
617
- }, 100);
618
609
  }
619
610
 
620
611
  setSummary(completed: number, failed: number): void {
@@ -631,7 +622,7 @@ class AsciiWorkerDashboard {
631
622
 
632
623
  lane.state = "running";
633
624
  lane.file = file;
634
- lane.message = "processing";
625
+ lane.message = "processing...";
635
626
  this.render();
636
627
  }
637
628
 
@@ -660,7 +651,6 @@ class AsciiWorkerDashboard {
660
651
  }
661
652
 
662
653
  stop(): void {
663
- clearInterval(this.spinnerTimer);
664
654
  this.render();
665
655
  process.stdout.write("\x1b[?25h");
666
656
  }
@@ -696,19 +686,9 @@ class AsciiWorkerDashboard {
696
686
  return lines;
697
687
  }
698
688
 
699
- private tickSpinners(): void {
700
- for (const lane of this.lanes) {
701
- if (lane.state !== "running") {
702
- continue;
703
- }
704
-
705
- lane.spinnerFrame = (lane.spinnerFrame + 1) % SPINNER_FRAMES.length;
706
- }
707
- }
708
-
709
689
  private renderIcon(lane: WorkerLane): string {
710
690
  if (lane.state === "running") {
711
- return SPINNER_FRAMES[lane.spinnerFrame];
691
+ return ">>";
712
692
  }
713
693
 
714
694
  if (lane.state === "done") {
package/src/cliHelpers.ts CHANGED
@@ -1,24 +1,28 @@
1
1
  import { InvalidArgumentError } from "commander";
2
2
  import { basename, dirname, extname, join, relative } from "node:path";
3
- import { type OutputFormat } from "./openaiPdfToMarkdown.js";
4
3
 
5
4
  export type CliOptions = {
6
5
  output?: string;
7
6
  model: string;
8
7
  concurrency?: number;
9
8
  yes?: boolean;
10
- format?: OutputFormat;
9
+ format?: string;
11
10
  instructions?: string;
12
11
  prompt?: string;
13
12
  promptFile?: string;
14
13
  };
15
14
 
16
- export function parseFormat(value: string): OutputFormat {
17
- if (value === "md" || value === "txt") {
18
- return value;
15
+ export function parseFormat(value: string): string {
16
+ const normalized = value.trim().replace(/^\.+/, "");
17
+ if (!normalized) {
18
+ throw new InvalidArgumentError("Format must be a non-empty file extension.");
19
+ }
20
+
21
+ if (normalized.includes("/") || normalized.includes("\\")) {
22
+ throw new InvalidArgumentError("Format must be a file extension, not a path.");
19
23
  }
20
24
 
21
- throw new InvalidArgumentError("Format must be either 'md' or 'txt'.");
25
+ return normalized;
22
26
  }
23
27
 
24
28
  export function parseConcurrency(value: string): number {
@@ -41,30 +45,31 @@ export function validateOptionCombination(options: CliOptions): void {
41
45
  }
42
46
  }
43
47
 
44
- export function defaultOutputPath(inputPath: string, format: OutputFormat): string {
45
- const extension = format === "md" ? ".md" : ".txt";
48
+ export function defaultOutputPath(inputPath: string, extension: string): string {
49
+ const normalizedExtension = extension.startsWith(".") ? extension : `.${extension}`;
46
50
 
47
51
  if (extname(inputPath).toLowerCase() === ".pdf") {
48
- return inputPath.slice(0, -4) + extension;
52
+ return inputPath.slice(0, -4) + normalizedExtension;
49
53
  }
50
54
 
51
- return inputPath + extension;
55
+ return inputPath + normalizedExtension;
52
56
  }
53
57
 
54
58
  export function resolveFolderOutputPath(
55
59
  inputPath: string,
56
60
  inputRoot: string,
57
61
  outputRoot: string | undefined,
58
- format: OutputFormat
62
+ extension: string
59
63
  ): string {
60
64
  if (!outputRoot) {
61
- return defaultOutputPath(inputPath, format);
65
+ return defaultOutputPath(inputPath, extension);
62
66
  }
63
67
 
64
68
  const relativePath = relative(inputRoot, inputPath);
65
69
  const relativeDir = dirname(relativePath);
66
70
  const base = basename(relativePath, extname(relativePath));
67
- const filename = `${base}.${format}`;
71
+ const normalizedExtension = extension.startsWith(".") ? extension.slice(1) : extension;
72
+ const filename = `${base}.${normalizedExtension}`;
68
73
 
69
74
  if (relativeDir === ".") {
70
75
  return join(outputRoot, filename);
@@ -9,9 +9,9 @@ export type ConvertOptions = {
9
9
  inputPath: string;
10
10
  model: string;
11
11
  mode: ConversionMode;
12
- format?: OutputFormat;
13
12
  instructions?: string;
14
13
  promptText?: string;
14
+ outputExtensionHint?: string;
15
15
  };
16
16
 
17
17
  export type ConversionMode = "auto" | "prompt";
@@ -94,63 +94,40 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
94
94
  totalTokens: result.state.usage.totalTokens
95
95
  };
96
96
 
97
- if (options.mode === "auto" && !options.format) {
97
+ if (options.mode === "auto") {
98
98
  return { ...parseAutoResponse(rawOutput), usage };
99
99
  }
100
100
 
101
- const format = options.format ?? "txt";
102
- return { format, content: rawOutput, usage };
101
+ return { format: "txt", content: rawOutput, usage };
103
102
  }
104
103
 
105
104
  function buildPromptText(options: ConvertOptions): string {
105
+ const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
106
106
  if (options.mode === "prompt") {
107
107
  if (!options.promptText) {
108
108
  throw new Error("promptText is required when mode is 'prompt'.");
109
109
  }
110
110
 
111
- const promptModeParts = [
111
+ const promptModeParts: string[] = [
112
112
  "Apply the following user prompt to the PDF.",
113
113
  "Return only the final converted content.",
114
114
  `User prompt:\n${options.promptText}`
115
115
  ];
116
116
 
117
- if (options.format === "md") {
118
- promptModeParts.push("Output format requirement: Return only GitHub-flavored Markdown.");
119
- } else if (options.format === "txt") {
120
- promptModeParts.push("Output format requirement: Return plain text only and do not use Markdown syntax.");
121
- } else {
122
- promptModeParts.push("If the prompt does not enforce a format, prefer plain text without Markdown syntax.");
117
+ if (outputExtensionHint) {
118
+ promptModeParts.push(
119
+ [
120
+ `Output file extension hint: .${outputExtensionHint}.`,
121
+ "Prefer content that is practical for saving under this extension.",
122
+ "Treat this as guidance and still follow the user prompt exactly."
123
+ ].join(" ")
124
+ );
123
125
  }
124
126
 
125
127
  return promptModeParts.join("\n\n");
126
128
  }
127
129
 
128
- if (options.format === "md") {
129
- return withAdditionalInstructions(
130
- [
131
- "Convert this PDF into clean GitHub-flavored Markdown.",
132
- "Preserve headings, paragraphs, lists, and tables.",
133
- "Render tables as Markdown pipe tables with header separators.",
134
- "If cells are empty due to merged cells, keep the table readable and consistent.",
135
- "Return only Markdown without code fences."
136
- ].join(" "),
137
- options.instructions
138
- );
139
- }
140
-
141
- if (options.format === "txt") {
142
- return withAdditionalInstructions(
143
- [
144
- "Convert this PDF into clean plain text.",
145
- "Preserve reading order and paragraph boundaries.",
146
- "Represent tables in readable plain text (no Markdown syntax).",
147
- "Return plain text only and do not use Markdown syntax or code fences."
148
- ].join(" "),
149
- options.instructions
150
- );
151
- }
152
-
153
- return withAdditionalInstructions(
130
+ let autoPrompt = withAdditionalInstructions(
154
131
  [
155
132
  "Decide the best output format for this PDF: Markdown ('md') or plain text ('txt').",
156
133
  "Choose 'md' for documents with meaningful headings, lists, and tables that benefit from Markdown.",
@@ -163,6 +140,18 @@ function buildPromptText(options: ConvertOptions): string {
163
140
  ].join("\n"),
164
141
  options.instructions
165
142
  );
143
+
144
+ if (outputExtensionHint) {
145
+ autoPrompt = `${autoPrompt}\n\n${
146
+ [
147
+ `Output file extension hint: .${outputExtensionHint}.`,
148
+ "Prefer content that is practical for that extension while still returning JSON with format='md' or 'txt'.",
149
+ "This is guidance only and should not break the required JSON schema."
150
+ ].join(" ")
151
+ }`;
152
+ }
153
+
154
+ return autoPrompt;
166
155
  }
167
156
 
168
157
  function withAdditionalInstructions(base: string, additional?: string): string {
@@ -173,6 +162,15 @@ function withAdditionalInstructions(base: string, additional?: string): string {
173
162
  return `${base}\n\nAdditional user instructions:\n${additional}`;
174
163
  }
175
164
 
165
+ function normalizeExtensionHint(extension: string | undefined): string | undefined {
166
+ if (!extension) {
167
+ return undefined;
168
+ }
169
+
170
+ const normalized = extension.trim().replace(/^\.+/, "");
171
+ return normalized || undefined;
172
+ }
173
+
176
174
  function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
177
175
  let candidate = rawOutput.trim();
178
176
 
@@ -17,10 +17,16 @@ import {
17
17
  test("parseFormat accepts valid values", () => {
18
18
  assert.equal(parseFormat("md"), "md");
19
19
  assert.equal(parseFormat("txt"), "txt");
20
+ assert.equal(parseFormat("csv"), "csv");
21
+ assert.equal(parseFormat(".json"), "json");
22
+ assert.equal(parseFormat("tar.gz"), "tar.gz");
20
23
  });
21
24
 
22
25
  test("parseFormat rejects invalid values", () => {
23
- assert.throws(() => parseFormat("json"), InvalidArgumentError);
26
+ assert.throws(() => parseFormat(""), InvalidArgumentError);
27
+ assert.throws(() => parseFormat(" "), InvalidArgumentError);
28
+ assert.throws(() => parseFormat("../json"), InvalidArgumentError);
29
+ assert.throws(() => parseFormat("a/b"), InvalidArgumentError);
24
30
  });
25
31
 
26
32
  test("parseConcurrency accepts in-range integers", () => {
@@ -75,6 +81,7 @@ test("validateOptionCombination rejects --instructions with prompt flags", () =>
75
81
  test("defaultOutputPath replaces .pdf extension and appends for other files", () => {
76
82
  assert.equal(defaultOutputPath("/tmp/input.pdf", "md"), "/tmp/input.md");
77
83
  assert.equal(defaultOutputPath("/tmp/input.PDF", "txt"), "/tmp/input.txt");
84
+ assert.equal(defaultOutputPath("/tmp/input.pdf", ".csv"), "/tmp/input.csv");
78
85
  assert.equal(defaultOutputPath("/tmp/input", "md"), "/tmp/input.md");
79
86
  });
80
87
 
@@ -93,6 +100,11 @@ test("resolveFolderOutputPath preserves nested structure when output root is set
93
100
  resolveFolderOutputPath("/data/invoices/file.pdf", "/data/invoices", "/exports", "txt"),
94
101
  "/exports/file.txt"
95
102
  );
103
+
104
+ assert.equal(
105
+ resolveFolderOutputPath("/data/invoices/file.pdf", "/data/invoices", "/exports", ".csv"),
106
+ "/exports/file.csv"
107
+ );
96
108
  });
97
109
 
98
110
  test("resolveFolderOutputPath falls back to default path when no output root", () => {