@robin7331/papyrus-cli 0.1.9 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -27,6 +27,9 @@ papyrus --help
27
27
  # Show installed CLI version
28
28
  papyrus --version
29
29
 
30
+ # List available models for the current API key
31
+ papyrus --models
32
+
30
33
  # Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
31
34
  papyrus ./path/to/input.pdf
32
35
 
@@ -88,9 +91,10 @@ papyrus config clear
88
91
 
89
92
  ## Arguments Reference
90
93
 
91
- ### `<input>`
94
+ ### `[input]`
92
95
 
93
96
  Path to a single PDF file or a folder containing PDFs (processed recursively).
97
+ Required unless you use `--models`.
94
98
 
95
99
  Example:
96
100
 
@@ -165,6 +169,7 @@ papyrus ./docs/invoice.pdf --prompt-file ./my-prompt.txt
165
169
  ### `-m, --model <model>`
166
170
 
167
171
  OpenAI model name used for conversion. Default is `gpt-4o-mini`.
172
+ If the selected model is not available, Papyrus prints the available model IDs before exiting.
168
173
 
169
174
  Example:
170
175
 
@@ -172,6 +177,16 @@ Example:
172
177
  papyrus ./docs/invoice.pdf --model gpt-4.1-mini
173
178
  ```
174
179
 
180
+ ### `--models`
181
+
182
+ Lists the available OpenAI model IDs for the current API key and exits.
183
+
184
+ Example:
185
+
186
+ ```bash
187
+ papyrus --models
188
+ ```
189
+
175
190
  ### `--concurrency <n>`
176
191
 
177
192
  Maximum parallel workers for folder input. Must be an integer between `1` and `100`. Default is `10`.
package/dist/cli.js CHANGED
@@ -5,7 +5,7 @@ import { mkdir, readFile, readdir, stat, writeFile } from "node:fs/promises";
5
5
  import { dirname, join, relative, resolve } from "node:path";
6
6
  import { Command } from "commander";
7
7
  import { clearStoredApiKey, getConfigFilePath, getStoredApiKey, maskApiKey, setStoredApiKey } from "./config.js";
8
- import { convertPdf } from "./openaiPdfToMarkdown.js";
8
+ import { assertModelAvailable, convertPdf, listAvailableModels, UnknownModelError } from "./openaiPdfToMarkdown.js";
9
9
  import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
10
10
  const program = new Command();
11
11
  const configFilePath = getConfigFilePath();
@@ -15,9 +15,10 @@ program
15
15
  .name("papyrus")
16
16
  .version(cliVersion, "-v, --version", "display version number")
17
17
  .description("Convert PDF files to Markdown or text using the OpenAI Agents SDK")
18
- .argument("<input>", "Path to input PDF file or folder")
18
+ .argument("[input]", "Path to input PDF file or folder")
19
19
  .option("-o, --output <path>", "Path to output file (single input) or output directory (folder input)")
20
20
  .option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
21
+ .option("--models", "List available OpenAI models for the current API key and exit")
21
22
  .option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
22
23
  .option("-y, --yes", "Skip confirmation prompt in folder mode")
23
24
  .option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
@@ -25,13 +26,26 @@ program
25
26
  .option("--prompt <text>", "Custom prompt text (enables prompt mode)")
26
27
  .option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
27
28
  .action(async (input, options) => {
28
- const inputPath = resolve(input);
29
29
  const startedAt = Date.now();
30
30
  try {
31
+ if (options.models) {
32
+ await ensureApiKey();
33
+ printAvailableModels(await listAvailableModels());
34
+ return;
35
+ }
36
+ if (!input) {
37
+ throw new Error('Input path is required unless "--models" is used.');
38
+ }
39
+ const inputPath = resolve(input);
31
40
  validateOptionCombination(options);
32
41
  const promptText = await resolvePromptText(options);
33
42
  const conversionMode = resolveConversionMode(promptText);
34
43
  const inputKind = await detectInputKind(inputPath);
44
+ if (inputKind === "file" && !isPdfPath(inputPath)) {
45
+ throw new Error("Input file must have a .pdf extension.");
46
+ }
47
+ await ensureApiKey();
48
+ await assertModelAvailable(options.model);
35
49
  let usageTotals = emptyUsage();
36
50
  if (inputKind === "file") {
37
51
  usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
@@ -47,6 +61,9 @@ program
47
61
  console.log(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
48
62
  }
49
63
  catch (error) {
64
+ if (error instanceof UnknownModelError) {
65
+ printAvailableModels(error.availableModels);
66
+ }
50
67
  const message = error instanceof Error ? error.message : String(error);
51
68
  console.error(`Conversion failed: ${message}`);
52
69
  console.error(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
@@ -113,10 +130,6 @@ program.parseAsync(process.argv).catch((error) => {
113
130
  process.exitCode = 1;
114
131
  });
115
132
  async function processSingleFile(inputPath, options, mode, promptText) {
116
- if (!isPdfPath(inputPath)) {
117
- throw new Error("Input file must have a .pdf extension.");
118
- }
119
- await ensureApiKey();
120
133
  const startedAt = Date.now();
121
134
  const displayInput = relative(process.cwd(), inputPath) || inputPath;
122
135
  const workerDashboard = process.stdout.isTTY
@@ -179,7 +192,6 @@ async function processFolder(inputDir, options, mode, promptText) {
179
192
  console.log("Cancelled. No files were processed.");
180
193
  return { total: files.length, succeeded: 0, failed: 0, cancelled: true, usage: emptyUsage() };
181
194
  }
182
- await ensureApiKey();
183
195
  const outputRoot = options.output ? resolve(options.output) : undefined;
184
196
  let succeeded = 0;
185
197
  let failed = 0;
@@ -569,6 +581,12 @@ function mergeUsage(target, delta) {
569
581
  function printUsageTotals(usage) {
570
582
  console.log(`Token usage: input=${usage.inputTokens}, output=${usage.outputTokens}, total=${usage.totalTokens}, requests=${usage.requests}`);
571
583
  }
584
+ function printAvailableModels(models) {
585
+ console.log(`Available models (${models.length}):`);
586
+ for (const model of models) {
587
+ console.log(model);
588
+ }
589
+ }
572
590
  function getCliVersion() {
573
591
  try {
574
592
  const packageJsonPath = new URL("../package.json", import.meta.url);
@@ -1,6 +1,7 @@
1
1
  export type CliOptions = {
2
2
  output?: string;
3
3
  model: string;
4
+ models?: boolean;
4
5
  concurrency?: number;
5
6
  yes?: boolean;
6
7
  format?: string;
@@ -1,3 +1,4 @@
1
+ import OpenAI from "openai";
1
2
  export type ConvertOptions = {
2
3
  inputPath: string;
3
4
  model: string;
@@ -19,4 +20,11 @@ export type ConvertUsage = {
19
20
  outputTokens: number;
20
21
  totalTokens: number;
21
22
  };
23
+ export declare class UnknownModelError extends Error {
24
+ readonly model: string;
25
+ readonly availableModels: string[];
26
+ constructor(model: string, availableModels: string[]);
27
+ }
22
28
  export declare function convertPdf(options: ConvertOptions): Promise<ConvertResult>;
29
+ export declare function assertModelAvailable(model: string): Promise<void>;
30
+ export declare function listAvailableModels(client?: OpenAI): Promise<string[]>;
@@ -4,6 +4,16 @@ import { resolve } from "node:path";
4
4
  import { Agent, run } from "@openai/agents";
5
5
  import OpenAI from "openai";
6
6
  import { z } from "zod";
7
+ export class UnknownModelError extends Error {
8
+ model;
9
+ availableModels;
10
+ constructor(model, availableModels) {
11
+ super(`Model "${model}" is not available for this API key.`);
12
+ this.name = "UnknownModelError";
13
+ this.model = model;
14
+ this.availableModels = availableModels;
15
+ }
16
+ }
7
17
  const AUTO_RESPONSE_SCHEMA = z.object({
8
18
  format: z.enum(["md", "txt"]),
9
19
  content: z.string().min(1)
@@ -14,11 +24,7 @@ const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELA
14
24
  export async function convertPdf(options) {
15
25
  const inputPath = resolve(options.inputPath);
16
26
  await access(inputPath);
17
- const apiKey = process.env.OPENAI_API_KEY;
18
- if (!apiKey) {
19
- throw new Error("OPENAI_API_KEY is not set.");
20
- }
21
- const client = new OpenAI({ apiKey });
27
+ const client = createOpenAiClient();
22
28
  const uploaded = await withRateLimitRetry("file upload", () => client.files.create({
23
29
  file: createReadStream(inputPath),
24
30
  purpose: "user_data"
@@ -29,21 +35,30 @@ export async function convertPdf(options) {
29
35
  model: options.model
30
36
  });
31
37
  const promptText = buildPromptText(options);
32
- const result = await withRateLimitRetry("model run", () => run(agent, [
33
- {
34
- role: "user",
35
- content: [
36
- {
37
- type: "input_text",
38
- text: promptText
39
- },
40
- {
41
- type: "input_file",
42
- file: { id: uploaded.id }
43
- }
44
- ]
38
+ let result;
39
+ try {
40
+ result = await withRateLimitRetry("model run", () => run(agent, [
41
+ {
42
+ role: "user",
43
+ content: [
44
+ {
45
+ type: "input_text",
46
+ text: promptText
47
+ },
48
+ {
49
+ type: "input_file",
50
+ file: { id: uploaded.id }
51
+ }
52
+ ]
53
+ }
54
+ ]));
55
+ }
56
+ catch (error) {
57
+ if (isUnknownModelError(error, options.model)) {
58
+ throw new UnknownModelError(options.model, await listAvailableModels(client));
45
59
  }
46
- ]));
60
+ throw error;
61
+ }
47
62
  const rawOutput = (result.finalOutput ?? "").trim();
48
63
  if (!rawOutput) {
49
64
  throw new Error("No content returned by the API.");
@@ -59,6 +74,32 @@ export async function convertPdf(options) {
59
74
  }
60
75
  return { format: "txt", content: rawOutput, usage };
61
76
  }
77
+ export async function assertModelAvailable(model) {
78
+ const client = createOpenAiClient();
79
+ try {
80
+ await client.models.retrieve(model);
81
+ }
82
+ catch (error) {
83
+ if (!isUnknownModelError(error, model)) {
84
+ throw error;
85
+ }
86
+ throw new UnknownModelError(model, await listAvailableModels(client));
87
+ }
88
+ }
89
+ export async function listAvailableModels(client = createOpenAiClient()) {
90
+ const modelIds = [];
91
+ for await (const model of client.models.list()) {
92
+ modelIds.push(model.id);
93
+ }
94
+ return modelIds.sort((left, right) => left.localeCompare(right, "en"));
95
+ }
96
+ function createOpenAiClient() {
97
+ const apiKey = process.env.OPENAI_API_KEY;
98
+ if (!apiKey) {
99
+ throw new Error("OPENAI_API_KEY is not set.");
100
+ }
101
+ return new OpenAI({ apiKey });
102
+ }
62
103
  function buildPromptText(options) {
63
104
  const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
64
105
  if (options.mode === "prompt") {
@@ -111,6 +152,20 @@ function normalizeExtensionHint(extension) {
111
152
  const normalized = extension.trim().replace(/^\.+/, "");
112
153
  return normalized || undefined;
113
154
  }
155
+ function isUnknownModelError(error, model) {
156
+ const message = error instanceof Error ? error.message : String(error);
157
+ const normalizedMessage = message.toLowerCase();
158
+ const errorStatus = typeof error === "object" && error !== null && "status" in error ? error.status : undefined;
159
+ const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined;
160
+ const quotedModel = model.toLowerCase();
161
+ if (errorStatus === 404 || errorCode === "model_not_found") {
162
+ return true;
163
+ }
164
+ return (normalizedMessage.includes(quotedModel) &&
165
+ (normalizedMessage.includes("does not exist") ||
166
+ normalizedMessage.includes("not found") ||
167
+ normalizedMessage.includes("unknown model")));
168
+ }
114
169
  function parseAutoResponse(rawOutput) {
115
170
  let candidate = rawOutput.trim();
116
171
  const fencedMatch = candidate.match(/```(?:json)?\s*([\s\S]*?)```/i);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@robin7331/papyrus-cli",
3
- "version": "0.1.9",
3
+ "version": "0.1.10",
4
4
  "private": false,
5
5
  "description": "Convert PDF to markdown or text with the OpenAI Agents SDK",
6
6
  "repository": {
package/src/cli.ts CHANGED
@@ -13,9 +13,12 @@ import {
13
13
  setStoredApiKey
14
14
  } from "./config.js";
15
15
  import {
16
+ assertModelAvailable,
16
17
  convertPdf,
17
18
  type ConversionMode,
18
- type ConvertUsage
19
+ type ConvertUsage,
20
+ listAvailableModels,
21
+ UnknownModelError
19
22
  } from "./openaiPdfToMarkdown.js";
20
23
  import {
21
24
  defaultOutputPath,
@@ -43,9 +46,10 @@ program
43
46
  .name("papyrus")
44
47
  .version(cliVersion, "-v, --version", "display version number")
45
48
  .description("Convert PDF files to Markdown or text using the OpenAI Agents SDK")
46
- .argument("<input>", "Path to input PDF file or folder")
49
+ .argument("[input]", "Path to input PDF file or folder")
47
50
  .option("-o, --output <path>", "Path to output file (single input) or output directory (folder input)")
48
51
  .option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
52
+ .option("--models", "List available OpenAI models for the current API key and exit")
49
53
  .option(
50
54
  "--concurrency <n>",
51
55
  "Max parallel workers for folder input (default: 10)",
@@ -59,16 +63,32 @@ program
59
63
  )
60
64
  .option("--prompt <text>", "Custom prompt text (enables prompt mode)")
61
65
  .option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
62
- .action(async (input: string, options: CliOptions) => {
63
- const inputPath = resolve(input);
66
+ .action(async (input: string | undefined, options: CliOptions) => {
64
67
  const startedAt = Date.now();
65
68
 
66
69
  try {
70
+ if (options.models) {
71
+ await ensureApiKey();
72
+ printAvailableModels(await listAvailableModels());
73
+ return;
74
+ }
75
+
76
+ if (!input) {
77
+ throw new Error('Input path is required unless "--models" is used.');
78
+ }
79
+
80
+ const inputPath = resolve(input);
67
81
  validateOptionCombination(options);
68
82
 
69
83
  const promptText = await resolvePromptText(options);
70
84
  const conversionMode = resolveConversionMode(promptText);
71
85
  const inputKind = await detectInputKind(inputPath);
86
+ if (inputKind === "file" && !isPdfPath(inputPath)) {
87
+ throw new Error("Input file must have a .pdf extension.");
88
+ }
89
+
90
+ await ensureApiKey();
91
+ await assertModelAvailable(options.model);
72
92
  let usageTotals: ConvertUsage = emptyUsage();
73
93
 
74
94
  if (inputKind === "file") {
@@ -84,6 +104,10 @@ program
84
104
  printUsageTotals(usageTotals);
85
105
  console.log(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
86
106
  } catch (error) {
107
+ if (error instanceof UnknownModelError) {
108
+ printAvailableModels(error.availableModels);
109
+ }
110
+
87
111
  const message = error instanceof Error ? error.message : String(error);
88
112
  console.error(`Conversion failed: ${message}`);
89
113
  console.error(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
@@ -160,11 +184,6 @@ async function processSingleFile(
160
184
  mode: ConversionMode,
161
185
  promptText?: string
162
186
  ): Promise<ConvertUsage> {
163
- if (!isPdfPath(inputPath)) {
164
- throw new Error("Input file must have a .pdf extension.");
165
- }
166
-
167
- await ensureApiKey();
168
187
  const startedAt = Date.now();
169
188
  const displayInput = relative(process.cwd(), inputPath) || inputPath;
170
189
  const workerDashboard = process.stdout.isTTY
@@ -258,7 +277,6 @@ async function processFolder(
258
277
  return { total: files.length, succeeded: 0, failed: 0, cancelled: true, usage: emptyUsage() };
259
278
  }
260
279
 
261
- await ensureApiKey();
262
280
  const outputRoot = options.output ? resolve(options.output) : undefined;
263
281
  let succeeded = 0;
264
282
  let failed = 0;
@@ -765,6 +783,13 @@ function printUsageTotals(usage: ConvertUsage): void {
765
783
  );
766
784
  }
767
785
 
786
+ function printAvailableModels(models: string[]): void {
787
+ console.log(`Available models (${models.length}):`);
788
+ for (const model of models) {
789
+ console.log(model);
790
+ }
791
+ }
792
+
768
793
  function getCliVersion(): string {
769
794
  try {
770
795
  const packageJsonPath = new URL("../package.json", import.meta.url);
package/src/cliHelpers.ts CHANGED
@@ -4,6 +4,7 @@ import { basename, dirname, extname, join, relative } from "node:path";
4
4
  export type CliOptions = {
5
5
  output?: string;
6
6
  model: string;
7
+ models?: boolean;
7
8
  concurrency?: number;
8
9
  yes?: boolean;
9
10
  format?: string;
@@ -30,6 +30,18 @@ export type ConvertUsage = {
30
30
  totalTokens: number;
31
31
  };
32
32
 
33
+ export class UnknownModelError extends Error {
34
+ readonly model: string;
35
+ readonly availableModels: string[];
36
+
37
+ constructor(model: string, availableModels: string[]) {
38
+ super(`Model "${model}" is not available for this API key.`);
39
+ this.name = "UnknownModelError";
40
+ this.model = model;
41
+ this.availableModels = availableModels;
42
+ }
43
+ }
44
+
33
45
  const AUTO_RESPONSE_SCHEMA = z.object({
34
46
  format: z.enum(["md", "txt"]),
35
47
  content: z.string().min(1)
@@ -43,12 +55,7 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
43
55
  const inputPath = resolve(options.inputPath);
44
56
  await access(inputPath);
45
57
 
46
- const apiKey = process.env.OPENAI_API_KEY;
47
- if (!apiKey) {
48
- throw new Error("OPENAI_API_KEY is not set.");
49
- }
50
-
51
- const client = new OpenAI({ apiKey });
58
+ const client = createOpenAiClient();
52
59
 
53
60
  const uploaded = await withRateLimitRetry("file upload", () =>
54
61
  client.files.create({
@@ -64,23 +71,32 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
64
71
  });
65
72
 
66
73
  const promptText = buildPromptText(options);
67
- const result = await withRateLimitRetry("model run", () =>
68
- run(agent, [
69
- {
70
- role: "user",
71
- content: [
72
- {
73
- type: "input_text",
74
- text: promptText
75
- },
76
- {
77
- type: "input_file",
78
- file: { id: uploaded.id }
79
- }
80
- ]
81
- }
82
- ])
83
- );
74
+ let result;
75
+ try {
76
+ result = await withRateLimitRetry("model run", () =>
77
+ run(agent, [
78
+ {
79
+ role: "user",
80
+ content: [
81
+ {
82
+ type: "input_text",
83
+ text: promptText
84
+ },
85
+ {
86
+ type: "input_file",
87
+ file: { id: uploaded.id }
88
+ }
89
+ ]
90
+ }
91
+ ])
92
+ );
93
+ } catch (error) {
94
+ if (isUnknownModelError(error, options.model)) {
95
+ throw new UnknownModelError(options.model, await listAvailableModels(client));
96
+ }
97
+
98
+ throw error;
99
+ }
84
100
 
85
101
  const rawOutput = (result.finalOutput ?? "").trim();
86
102
  if (!rawOutput) {
@@ -101,6 +117,39 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
101
117
  return { format: "txt", content: rawOutput, usage };
102
118
  }
103
119
 
120
+ export async function assertModelAvailable(model: string): Promise<void> {
121
+ const client = createOpenAiClient();
122
+
123
+ try {
124
+ await client.models.retrieve(model);
125
+ } catch (error) {
126
+ if (!isUnknownModelError(error, model)) {
127
+ throw error;
128
+ }
129
+
130
+ throw new UnknownModelError(model, await listAvailableModels(client));
131
+ }
132
+ }
133
+
134
+ export async function listAvailableModels(client = createOpenAiClient()): Promise<string[]> {
135
+ const modelIds: string[] = [];
136
+
137
+ for await (const model of client.models.list()) {
138
+ modelIds.push(model.id);
139
+ }
140
+
141
+ return modelIds.sort((left, right) => left.localeCompare(right, "en"));
142
+ }
143
+
144
+ function createOpenAiClient(): OpenAI {
145
+ const apiKey = process.env.OPENAI_API_KEY;
146
+ if (!apiKey) {
147
+ throw new Error("OPENAI_API_KEY is not set.");
148
+ }
149
+
150
+ return new OpenAI({ apiKey });
151
+ }
152
+
104
153
  function buildPromptText(options: ConvertOptions): string {
105
154
  const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
106
155
  if (options.mode === "prompt") {
@@ -171,6 +220,27 @@ function normalizeExtensionHint(extension: string | undefined): string | undefin
171
220
  return normalized || undefined;
172
221
  }
173
222
 
223
+ function isUnknownModelError(error: unknown, model: string): boolean {
224
+ const message = error instanceof Error ? error.message : String(error);
225
+ const normalizedMessage = message.toLowerCase();
226
+ const errorStatus =
227
+ typeof error === "object" && error !== null && "status" in error ? error.status : undefined;
228
+ const errorCode =
229
+ typeof error === "object" && error !== null && "code" in error ? error.code : undefined;
230
+ const quotedModel = model.toLowerCase();
231
+
232
+ if (errorStatus === 404 || errorCode === "model_not_found") {
233
+ return true;
234
+ }
235
+
236
+ return (
237
+ normalizedMessage.includes(quotedModel) &&
238
+ (normalizedMessage.includes("does not exist") ||
239
+ normalizedMessage.includes("not found") ||
240
+ normalizedMessage.includes("unknown model"))
241
+ );
242
+ }
243
+
174
244
  function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
175
245
  let candidate = rawOutput.trim();
176
246