@robin7331/papyrus-cli 0.1.9 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -1
- package/dist/cli.js +26 -8
- package/dist/cliHelpers.d.ts +1 -0
- package/dist/openaiPdfToMarkdown.d.ts +8 -0
- package/dist/openaiPdfToMarkdown.js +74 -19
- package/package.json +1 -1
- package/src/cli.ts +35 -10
- package/src/cliHelpers.ts +1 -0
- package/src/openaiPdfToMarkdown.ts +93 -23
package/README.md
CHANGED
|
@@ -27,6 +27,9 @@ papyrus --help
|
|
|
27
27
|
# Show installed CLI version
|
|
28
28
|
papyrus --version
|
|
29
29
|
|
|
30
|
+
# List available models for the current API key
|
|
31
|
+
papyrus --models
|
|
32
|
+
|
|
30
33
|
# Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
|
|
31
34
|
papyrus ./path/to/input.pdf
|
|
32
35
|
|
|
@@ -88,9 +91,10 @@ papyrus config clear
|
|
|
88
91
|
|
|
89
92
|
## Arguments Reference
|
|
90
93
|
|
|
91
|
-
###
|
|
94
|
+
### `[input]`
|
|
92
95
|
|
|
93
96
|
Path to a single PDF file or a folder containing PDFs (processed recursively).
|
|
97
|
+
Required unless you use `--models`.
|
|
94
98
|
|
|
95
99
|
Example:
|
|
96
100
|
|
|
@@ -165,6 +169,7 @@ papyrus ./docs/invoice.pdf --prompt-file ./my-prompt.txt
|
|
|
165
169
|
### `-m, --model <model>`
|
|
166
170
|
|
|
167
171
|
OpenAI model name used for conversion. Default is `gpt-4o-mini`.
|
|
172
|
+
If the selected model is not available, Papyrus prints the available model IDs before exiting.
|
|
168
173
|
|
|
169
174
|
Example:
|
|
170
175
|
|
|
@@ -172,6 +177,16 @@ Example:
|
|
|
172
177
|
papyrus ./docs/invoice.pdf --model gpt-4.1-mini
|
|
173
178
|
```
|
|
174
179
|
|
|
180
|
+
### `--models`
|
|
181
|
+
|
|
182
|
+
Lists the available OpenAI model IDs for the current API key and exits.
|
|
183
|
+
|
|
184
|
+
Example:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
papyrus --models
|
|
188
|
+
```
|
|
189
|
+
|
|
175
190
|
### `--concurrency <n>`
|
|
176
191
|
|
|
177
192
|
Maximum parallel workers for folder input. Must be an integer between `1` and `100`. Default is `10`.
|
package/dist/cli.js
CHANGED
|
@@ -5,7 +5,7 @@ import { mkdir, readFile, readdir, stat, writeFile } from "node:fs/promises";
|
|
|
5
5
|
import { dirname, join, relative, resolve } from "node:path";
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
import { clearStoredApiKey, getConfigFilePath, getStoredApiKey, maskApiKey, setStoredApiKey } from "./config.js";
|
|
8
|
-
import { convertPdf } from "./openaiPdfToMarkdown.js";
|
|
8
|
+
import { assertModelAvailable, convertPdf, listAvailableModels, UnknownModelError } from "./openaiPdfToMarkdown.js";
|
|
9
9
|
import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
|
|
10
10
|
const program = new Command();
|
|
11
11
|
const configFilePath = getConfigFilePath();
|
|
@@ -15,9 +15,10 @@ program
|
|
|
15
15
|
.name("papyrus")
|
|
16
16
|
.version(cliVersion, "-v, --version", "display version number")
|
|
17
17
|
.description("Convert PDF files to Markdown or text using the OpenAI Agents SDK")
|
|
18
|
-
.argument("
|
|
18
|
+
.argument("[input]", "Path to input PDF file or folder")
|
|
19
19
|
.option("-o, --output <path>", "Path to output file (single input) or output directory (folder input)")
|
|
20
20
|
.option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
|
|
21
|
+
.option("--models", "List available OpenAI models for the current API key and exit")
|
|
21
22
|
.option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
|
|
22
23
|
.option("-y, --yes", "Skip confirmation prompt in folder mode")
|
|
23
24
|
.option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
|
|
@@ -25,13 +26,26 @@ program
|
|
|
25
26
|
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
26
27
|
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
27
28
|
.action(async (input, options) => {
|
|
28
|
-
const inputPath = resolve(input);
|
|
29
29
|
const startedAt = Date.now();
|
|
30
30
|
try {
|
|
31
|
+
if (options.models) {
|
|
32
|
+
await ensureApiKey();
|
|
33
|
+
printAvailableModels(await listAvailableModels());
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
if (!input) {
|
|
37
|
+
throw new Error('Input path is required unless "--models" is used.');
|
|
38
|
+
}
|
|
39
|
+
const inputPath = resolve(input);
|
|
31
40
|
validateOptionCombination(options);
|
|
32
41
|
const promptText = await resolvePromptText(options);
|
|
33
42
|
const conversionMode = resolveConversionMode(promptText);
|
|
34
43
|
const inputKind = await detectInputKind(inputPath);
|
|
44
|
+
if (inputKind === "file" && !isPdfPath(inputPath)) {
|
|
45
|
+
throw new Error("Input file must have a .pdf extension.");
|
|
46
|
+
}
|
|
47
|
+
await ensureApiKey();
|
|
48
|
+
await assertModelAvailable(options.model);
|
|
35
49
|
let usageTotals = emptyUsage();
|
|
36
50
|
if (inputKind === "file") {
|
|
37
51
|
usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
|
|
@@ -47,6 +61,9 @@ program
|
|
|
47
61
|
console.log(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
48
62
|
}
|
|
49
63
|
catch (error) {
|
|
64
|
+
if (error instanceof UnknownModelError) {
|
|
65
|
+
printAvailableModels(error.availableModels);
|
|
66
|
+
}
|
|
50
67
|
const message = error instanceof Error ? error.message : String(error);
|
|
51
68
|
console.error(`Conversion failed: ${message}`);
|
|
52
69
|
console.error(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
@@ -113,10 +130,6 @@ program.parseAsync(process.argv).catch((error) => {
|
|
|
113
130
|
process.exitCode = 1;
|
|
114
131
|
});
|
|
115
132
|
async function processSingleFile(inputPath, options, mode, promptText) {
|
|
116
|
-
if (!isPdfPath(inputPath)) {
|
|
117
|
-
throw new Error("Input file must have a .pdf extension.");
|
|
118
|
-
}
|
|
119
|
-
await ensureApiKey();
|
|
120
133
|
const startedAt = Date.now();
|
|
121
134
|
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
122
135
|
const workerDashboard = process.stdout.isTTY
|
|
@@ -179,7 +192,6 @@ async function processFolder(inputDir, options, mode, promptText) {
|
|
|
179
192
|
console.log("Cancelled. No files were processed.");
|
|
180
193
|
return { total: files.length, succeeded: 0, failed: 0, cancelled: true, usage: emptyUsage() };
|
|
181
194
|
}
|
|
182
|
-
await ensureApiKey();
|
|
183
195
|
const outputRoot = options.output ? resolve(options.output) : undefined;
|
|
184
196
|
let succeeded = 0;
|
|
185
197
|
let failed = 0;
|
|
@@ -569,6 +581,12 @@ function mergeUsage(target, delta) {
|
|
|
569
581
|
function printUsageTotals(usage) {
|
|
570
582
|
console.log(`Token usage: input=${usage.inputTokens}, output=${usage.outputTokens}, total=${usage.totalTokens}, requests=${usage.requests}`);
|
|
571
583
|
}
|
|
584
|
+
function printAvailableModels(models) {
|
|
585
|
+
console.log(`Available models (${models.length}):`);
|
|
586
|
+
for (const model of models) {
|
|
587
|
+
console.log(model);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
572
590
|
function getCliVersion() {
|
|
573
591
|
try {
|
|
574
592
|
const packageJsonPath = new URL("../package.json", import.meta.url);
|
package/dist/cliHelpers.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
1
2
|
export type ConvertOptions = {
|
|
2
3
|
inputPath: string;
|
|
3
4
|
model: string;
|
|
@@ -19,4 +20,11 @@ export type ConvertUsage = {
|
|
|
19
20
|
outputTokens: number;
|
|
20
21
|
totalTokens: number;
|
|
21
22
|
};
|
|
23
|
+
export declare class UnknownModelError extends Error {
|
|
24
|
+
readonly model: string;
|
|
25
|
+
readonly availableModels: string[];
|
|
26
|
+
constructor(model: string, availableModels: string[]);
|
|
27
|
+
}
|
|
22
28
|
export declare function convertPdf(options: ConvertOptions): Promise<ConvertResult>;
|
|
29
|
+
export declare function assertModelAvailable(model: string): Promise<void>;
|
|
30
|
+
export declare function listAvailableModels(client?: OpenAI): Promise<string[]>;
|
|
@@ -4,6 +4,16 @@ import { resolve } from "node:path";
|
|
|
4
4
|
import { Agent, run } from "@openai/agents";
|
|
5
5
|
import OpenAI from "openai";
|
|
6
6
|
import { z } from "zod";
|
|
7
|
+
export class UnknownModelError extends Error {
|
|
8
|
+
model;
|
|
9
|
+
availableModels;
|
|
10
|
+
constructor(model, availableModels) {
|
|
11
|
+
super(`Model "${model}" is not available for this API key.`);
|
|
12
|
+
this.name = "UnknownModelError";
|
|
13
|
+
this.model = model;
|
|
14
|
+
this.availableModels = availableModels;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
7
17
|
const AUTO_RESPONSE_SCHEMA = z.object({
|
|
8
18
|
format: z.enum(["md", "txt"]),
|
|
9
19
|
content: z.string().min(1)
|
|
@@ -14,11 +24,7 @@ const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELA
|
|
|
14
24
|
export async function convertPdf(options) {
|
|
15
25
|
const inputPath = resolve(options.inputPath);
|
|
16
26
|
await access(inputPath);
|
|
17
|
-
const
|
|
18
|
-
if (!apiKey) {
|
|
19
|
-
throw new Error("OPENAI_API_KEY is not set.");
|
|
20
|
-
}
|
|
21
|
-
const client = new OpenAI({ apiKey });
|
|
27
|
+
const client = createOpenAiClient();
|
|
22
28
|
const uploaded = await withRateLimitRetry("file upload", () => client.files.create({
|
|
23
29
|
file: createReadStream(inputPath),
|
|
24
30
|
purpose: "user_data"
|
|
@@ -29,21 +35,30 @@ export async function convertPdf(options) {
|
|
|
29
35
|
model: options.model
|
|
30
36
|
});
|
|
31
37
|
const promptText = buildPromptText(options);
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
38
|
+
let result;
|
|
39
|
+
try {
|
|
40
|
+
result = await withRateLimitRetry("model run", () => run(agent, [
|
|
41
|
+
{
|
|
42
|
+
role: "user",
|
|
43
|
+
content: [
|
|
44
|
+
{
|
|
45
|
+
type: "input_text",
|
|
46
|
+
text: promptText
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
type: "input_file",
|
|
50
|
+
file: { id: uploaded.id }
|
|
51
|
+
}
|
|
52
|
+
]
|
|
53
|
+
}
|
|
54
|
+
]));
|
|
55
|
+
}
|
|
56
|
+
catch (error) {
|
|
57
|
+
if (isUnknownModelError(error, options.model)) {
|
|
58
|
+
throw new UnknownModelError(options.model, await listAvailableModels(client));
|
|
45
59
|
}
|
|
46
|
-
|
|
60
|
+
throw error;
|
|
61
|
+
}
|
|
47
62
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
48
63
|
if (!rawOutput) {
|
|
49
64
|
throw new Error("No content returned by the API.");
|
|
@@ -59,6 +74,32 @@ export async function convertPdf(options) {
|
|
|
59
74
|
}
|
|
60
75
|
return { format: "txt", content: rawOutput, usage };
|
|
61
76
|
}
|
|
77
|
+
export async function assertModelAvailable(model) {
|
|
78
|
+
const client = createOpenAiClient();
|
|
79
|
+
try {
|
|
80
|
+
await client.models.retrieve(model);
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
if (!isUnknownModelError(error, model)) {
|
|
84
|
+
throw error;
|
|
85
|
+
}
|
|
86
|
+
throw new UnknownModelError(model, await listAvailableModels(client));
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
export async function listAvailableModels(client = createOpenAiClient()) {
|
|
90
|
+
const modelIds = [];
|
|
91
|
+
for await (const model of client.models.list()) {
|
|
92
|
+
modelIds.push(model.id);
|
|
93
|
+
}
|
|
94
|
+
return modelIds.sort((left, right) => left.localeCompare(right, "en"));
|
|
95
|
+
}
|
|
96
|
+
function createOpenAiClient() {
|
|
97
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
98
|
+
if (!apiKey) {
|
|
99
|
+
throw new Error("OPENAI_API_KEY is not set.");
|
|
100
|
+
}
|
|
101
|
+
return new OpenAI({ apiKey });
|
|
102
|
+
}
|
|
62
103
|
function buildPromptText(options) {
|
|
63
104
|
const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
|
|
64
105
|
if (options.mode === "prompt") {
|
|
@@ -111,6 +152,20 @@ function normalizeExtensionHint(extension) {
|
|
|
111
152
|
const normalized = extension.trim().replace(/^\.+/, "");
|
|
112
153
|
return normalized || undefined;
|
|
113
154
|
}
|
|
155
|
+
function isUnknownModelError(error, model) {
|
|
156
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
157
|
+
const normalizedMessage = message.toLowerCase();
|
|
158
|
+
const errorStatus = typeof error === "object" && error !== null && "status" in error ? error.status : undefined;
|
|
159
|
+
const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined;
|
|
160
|
+
const quotedModel = model.toLowerCase();
|
|
161
|
+
if (errorStatus === 404 || errorCode === "model_not_found") {
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
return (normalizedMessage.includes(quotedModel) &&
|
|
165
|
+
(normalizedMessage.includes("does not exist") ||
|
|
166
|
+
normalizedMessage.includes("not found") ||
|
|
167
|
+
normalizedMessage.includes("unknown model")));
|
|
168
|
+
}
|
|
114
169
|
function parseAutoResponse(rawOutput) {
|
|
115
170
|
let candidate = rawOutput.trim();
|
|
116
171
|
const fencedMatch = candidate.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -13,9 +13,12 @@ import {
|
|
|
13
13
|
setStoredApiKey
|
|
14
14
|
} from "./config.js";
|
|
15
15
|
import {
|
|
16
|
+
assertModelAvailable,
|
|
16
17
|
convertPdf,
|
|
17
18
|
type ConversionMode,
|
|
18
|
-
type ConvertUsage
|
|
19
|
+
type ConvertUsage,
|
|
20
|
+
listAvailableModels,
|
|
21
|
+
UnknownModelError
|
|
19
22
|
} from "./openaiPdfToMarkdown.js";
|
|
20
23
|
import {
|
|
21
24
|
defaultOutputPath,
|
|
@@ -43,9 +46,10 @@ program
|
|
|
43
46
|
.name("papyrus")
|
|
44
47
|
.version(cliVersion, "-v, --version", "display version number")
|
|
45
48
|
.description("Convert PDF files to Markdown or text using the OpenAI Agents SDK")
|
|
46
|
-
.argument("
|
|
49
|
+
.argument("[input]", "Path to input PDF file or folder")
|
|
47
50
|
.option("-o, --output <path>", "Path to output file (single input) or output directory (folder input)")
|
|
48
51
|
.option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
|
|
52
|
+
.option("--models", "List available OpenAI models for the current API key and exit")
|
|
49
53
|
.option(
|
|
50
54
|
"--concurrency <n>",
|
|
51
55
|
"Max parallel workers for folder input (default: 10)",
|
|
@@ -59,16 +63,32 @@ program
|
|
|
59
63
|
)
|
|
60
64
|
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
61
65
|
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
62
|
-
.action(async (input: string, options: CliOptions) => {
|
|
63
|
-
const inputPath = resolve(input);
|
|
66
|
+
.action(async (input: string | undefined, options: CliOptions) => {
|
|
64
67
|
const startedAt = Date.now();
|
|
65
68
|
|
|
66
69
|
try {
|
|
70
|
+
if (options.models) {
|
|
71
|
+
await ensureApiKey();
|
|
72
|
+
printAvailableModels(await listAvailableModels());
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (!input) {
|
|
77
|
+
throw new Error('Input path is required unless "--models" is used.');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const inputPath = resolve(input);
|
|
67
81
|
validateOptionCombination(options);
|
|
68
82
|
|
|
69
83
|
const promptText = await resolvePromptText(options);
|
|
70
84
|
const conversionMode = resolveConversionMode(promptText);
|
|
71
85
|
const inputKind = await detectInputKind(inputPath);
|
|
86
|
+
if (inputKind === "file" && !isPdfPath(inputPath)) {
|
|
87
|
+
throw new Error("Input file must have a .pdf extension.");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
await ensureApiKey();
|
|
91
|
+
await assertModelAvailable(options.model);
|
|
72
92
|
let usageTotals: ConvertUsage = emptyUsage();
|
|
73
93
|
|
|
74
94
|
if (inputKind === "file") {
|
|
@@ -84,6 +104,10 @@ program
|
|
|
84
104
|
printUsageTotals(usageTotals);
|
|
85
105
|
console.log(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
86
106
|
} catch (error) {
|
|
107
|
+
if (error instanceof UnknownModelError) {
|
|
108
|
+
printAvailableModels(error.availableModels);
|
|
109
|
+
}
|
|
110
|
+
|
|
87
111
|
const message = error instanceof Error ? error.message : String(error);
|
|
88
112
|
console.error(`Conversion failed: ${message}`);
|
|
89
113
|
console.error(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
@@ -160,11 +184,6 @@ async function processSingleFile(
|
|
|
160
184
|
mode: ConversionMode,
|
|
161
185
|
promptText?: string
|
|
162
186
|
): Promise<ConvertUsage> {
|
|
163
|
-
if (!isPdfPath(inputPath)) {
|
|
164
|
-
throw new Error("Input file must have a .pdf extension.");
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
await ensureApiKey();
|
|
168
187
|
const startedAt = Date.now();
|
|
169
188
|
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
170
189
|
const workerDashboard = process.stdout.isTTY
|
|
@@ -258,7 +277,6 @@ async function processFolder(
|
|
|
258
277
|
return { total: files.length, succeeded: 0, failed: 0, cancelled: true, usage: emptyUsage() };
|
|
259
278
|
}
|
|
260
279
|
|
|
261
|
-
await ensureApiKey();
|
|
262
280
|
const outputRoot = options.output ? resolve(options.output) : undefined;
|
|
263
281
|
let succeeded = 0;
|
|
264
282
|
let failed = 0;
|
|
@@ -765,6 +783,13 @@ function printUsageTotals(usage: ConvertUsage): void {
|
|
|
765
783
|
);
|
|
766
784
|
}
|
|
767
785
|
|
|
786
|
+
function printAvailableModels(models: string[]): void {
|
|
787
|
+
console.log(`Available models (${models.length}):`);
|
|
788
|
+
for (const model of models) {
|
|
789
|
+
console.log(model);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
|
|
768
793
|
function getCliVersion(): string {
|
|
769
794
|
try {
|
|
770
795
|
const packageJsonPath = new URL("../package.json", import.meta.url);
|
package/src/cliHelpers.ts
CHANGED
|
@@ -30,6 +30,18 @@ export type ConvertUsage = {
|
|
|
30
30
|
totalTokens: number;
|
|
31
31
|
};
|
|
32
32
|
|
|
33
|
+
export class UnknownModelError extends Error {
|
|
34
|
+
readonly model: string;
|
|
35
|
+
readonly availableModels: string[];
|
|
36
|
+
|
|
37
|
+
constructor(model: string, availableModels: string[]) {
|
|
38
|
+
super(`Model "${model}" is not available for this API key.`);
|
|
39
|
+
this.name = "UnknownModelError";
|
|
40
|
+
this.model = model;
|
|
41
|
+
this.availableModels = availableModels;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
33
45
|
const AUTO_RESPONSE_SCHEMA = z.object({
|
|
34
46
|
format: z.enum(["md", "txt"]),
|
|
35
47
|
content: z.string().min(1)
|
|
@@ -43,12 +55,7 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
43
55
|
const inputPath = resolve(options.inputPath);
|
|
44
56
|
await access(inputPath);
|
|
45
57
|
|
|
46
|
-
const
|
|
47
|
-
if (!apiKey) {
|
|
48
|
-
throw new Error("OPENAI_API_KEY is not set.");
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
const client = new OpenAI({ apiKey });
|
|
58
|
+
const client = createOpenAiClient();
|
|
52
59
|
|
|
53
60
|
const uploaded = await withRateLimitRetry("file upload", () =>
|
|
54
61
|
client.files.create({
|
|
@@ -64,23 +71,32 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
64
71
|
});
|
|
65
72
|
|
|
66
73
|
const promptText = buildPromptText(options);
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
let result;
|
|
75
|
+
try {
|
|
76
|
+
result = await withRateLimitRetry("model run", () =>
|
|
77
|
+
run(agent, [
|
|
78
|
+
{
|
|
79
|
+
role: "user",
|
|
80
|
+
content: [
|
|
81
|
+
{
|
|
82
|
+
type: "input_text",
|
|
83
|
+
text: promptText
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
type: "input_file",
|
|
87
|
+
file: { id: uploaded.id }
|
|
88
|
+
}
|
|
89
|
+
]
|
|
90
|
+
}
|
|
91
|
+
])
|
|
92
|
+
);
|
|
93
|
+
} catch (error) {
|
|
94
|
+
if (isUnknownModelError(error, options.model)) {
|
|
95
|
+
throw new UnknownModelError(options.model, await listAvailableModels(client));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
throw error;
|
|
99
|
+
}
|
|
84
100
|
|
|
85
101
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
86
102
|
if (!rawOutput) {
|
|
@@ -101,6 +117,39 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
101
117
|
return { format: "txt", content: rawOutput, usage };
|
|
102
118
|
}
|
|
103
119
|
|
|
120
|
+
export async function assertModelAvailable(model: string): Promise<void> {
|
|
121
|
+
const client = createOpenAiClient();
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
await client.models.retrieve(model);
|
|
125
|
+
} catch (error) {
|
|
126
|
+
if (!isUnknownModelError(error, model)) {
|
|
127
|
+
throw error;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
throw new UnknownModelError(model, await listAvailableModels(client));
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export async function listAvailableModels(client = createOpenAiClient()): Promise<string[]> {
|
|
135
|
+
const modelIds: string[] = [];
|
|
136
|
+
|
|
137
|
+
for await (const model of client.models.list()) {
|
|
138
|
+
modelIds.push(model.id);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return modelIds.sort((left, right) => left.localeCompare(right, "en"));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function createOpenAiClient(): OpenAI {
|
|
145
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
146
|
+
if (!apiKey) {
|
|
147
|
+
throw new Error("OPENAI_API_KEY is not set.");
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return new OpenAI({ apiKey });
|
|
151
|
+
}
|
|
152
|
+
|
|
104
153
|
function buildPromptText(options: ConvertOptions): string {
|
|
105
154
|
const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
|
|
106
155
|
if (options.mode === "prompt") {
|
|
@@ -171,6 +220,27 @@ function normalizeExtensionHint(extension: string | undefined): string | undefin
|
|
|
171
220
|
return normalized || undefined;
|
|
172
221
|
}
|
|
173
222
|
|
|
223
|
+
function isUnknownModelError(error: unknown, model: string): boolean {
|
|
224
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
225
|
+
const normalizedMessage = message.toLowerCase();
|
|
226
|
+
const errorStatus =
|
|
227
|
+
typeof error === "object" && error !== null && "status" in error ? error.status : undefined;
|
|
228
|
+
const errorCode =
|
|
229
|
+
typeof error === "object" && error !== null && "code" in error ? error.code : undefined;
|
|
230
|
+
const quotedModel = model.toLowerCase();
|
|
231
|
+
|
|
232
|
+
if (errorStatus === 404 || errorCode === "model_not_found") {
|
|
233
|
+
return true;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return (
|
|
237
|
+
normalizedMessage.includes(quotedModel) &&
|
|
238
|
+
(normalizedMessage.includes("does not exist") ||
|
|
239
|
+
normalizedMessage.includes("not found") ||
|
|
240
|
+
normalizedMessage.includes("unknown model"))
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
|
|
174
244
|
function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
|
|
175
245
|
let candidate = rawOutput.trim();
|
|
176
246
|
|