@robin7331/papyrus-cli 0.1.8 → 0.1.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +17 -2
- package/dist/cli.js +29 -27
- package/dist/cliHelpers.d.ts +1 -0
- package/dist/openaiPdfToMarkdown.d.ts +8 -0
- package/dist/openaiPdfToMarkdown.js +74 -19
- package/package.json +1 -1
- package/src/cli.ts +38 -33
- package/src/cliHelpers.ts +1 -0
- package/src/openaiPdfToMarkdown.ts +93 -23
package/README.md
CHANGED
|
@@ -27,6 +27,9 @@ papyrus --help
|
|
|
27
27
|
# Show installed CLI version
|
|
28
28
|
papyrus --version
|
|
29
29
|
|
|
30
|
+
# List available models for the current API key
|
|
31
|
+
papyrus --models
|
|
32
|
+
|
|
30
33
|
# Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
|
|
31
34
|
papyrus ./path/to/input.pdf
|
|
32
35
|
|
|
@@ -88,9 +91,10 @@ papyrus config clear
|
|
|
88
91
|
|
|
89
92
|
## Arguments Reference
|
|
90
93
|
|
|
91
|
-
###
|
|
94
|
+
### `[input]`
|
|
92
95
|
|
|
93
96
|
Path to a single PDF file or a folder containing PDFs (processed recursively).
|
|
97
|
+
Required unless you use `--models`.
|
|
94
98
|
|
|
95
99
|
Example:
|
|
96
100
|
|
|
@@ -165,6 +169,7 @@ papyrus ./docs/invoice.pdf --prompt-file ./my-prompt.txt
|
|
|
165
169
|
### `-m, --model <model>`
|
|
166
170
|
|
|
167
171
|
OpenAI model name used for conversion. Default is `gpt-4o-mini`.
|
|
172
|
+
If the selected model is not available, Papyrus prints the available model IDs before exiting.
|
|
168
173
|
|
|
169
174
|
Example:
|
|
170
175
|
|
|
@@ -172,6 +177,16 @@ Example:
|
|
|
172
177
|
papyrus ./docs/invoice.pdf --model gpt-4.1-mini
|
|
173
178
|
```
|
|
174
179
|
|
|
180
|
+
### `--models`
|
|
181
|
+
|
|
182
|
+
Lists the available OpenAI model IDs for the current API key and exits.
|
|
183
|
+
|
|
184
|
+
Example:
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
papyrus --models
|
|
188
|
+
```
|
|
189
|
+
|
|
175
190
|
### `--concurrency <n>`
|
|
176
191
|
|
|
177
192
|
Maximum parallel workers for folder input. Must be an integer between `1` and `100`. Default is `10`.
|
|
@@ -197,7 +212,7 @@ papyrus ./docs --yes
|
|
|
197
212
|
- In default conversion (without `--prompt`/`--prompt-file`), the model returns structured JSON with `format` + `content`.
|
|
198
213
|
- Without `--format`, output extension follows model-selected content format (`.md` or `.txt`).
|
|
199
214
|
- With `--format`, only the output extension changes.
|
|
200
|
-
- Single-file input now also shows a live worker lane
|
|
215
|
+
- Single-file input now also shows a live worker lane in TTY while conversion is running.
|
|
201
216
|
- Folder input is scanned recursively for `.pdf` files and processed in parallel.
|
|
202
217
|
- In folder mode, `--output` must be a directory path and mirrored subfolders are preserved.
|
|
203
218
|
- OpenAI rate-limit (`429`) responses are retried automatically using `Retry-After` (when present) plus exponential backoff.
|
package/dist/cli.js
CHANGED
|
@@ -5,7 +5,7 @@ import { mkdir, readFile, readdir, stat, writeFile } from "node:fs/promises";
|
|
|
5
5
|
import { dirname, join, relative, resolve } from "node:path";
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
import { clearStoredApiKey, getConfigFilePath, getStoredApiKey, maskApiKey, setStoredApiKey } from "./config.js";
|
|
8
|
-
import { convertPdf } from "./openaiPdfToMarkdown.js";
|
|
8
|
+
import { assertModelAvailable, convertPdf, listAvailableModels, UnknownModelError } from "./openaiPdfToMarkdown.js";
|
|
9
9
|
import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
|
|
10
10
|
const program = new Command();
|
|
11
11
|
const configFilePath = getConfigFilePath();
|
|
@@ -15,9 +15,10 @@ program
|
|
|
15
15
|
.name("papyrus")
|
|
16
16
|
.version(cliVersion, "-v, --version", "display version number")
|
|
17
17
|
.description("Convert PDF files to Markdown or text using the OpenAI Agents SDK")
|
|
18
|
-
.argument("
|
|
18
|
+
.argument("[input]", "Path to input PDF file or folder")
|
|
19
19
|
.option("-o, --output <path>", "Path to output file (single input) or output directory (folder input)")
|
|
20
20
|
.option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
|
|
21
|
+
.option("--models", "List available OpenAI models for the current API key and exit")
|
|
21
22
|
.option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
|
|
22
23
|
.option("-y, --yes", "Skip confirmation prompt in folder mode")
|
|
23
24
|
.option("--format <format>", "Output file extension override (for example: md, txt, csv, json)", parseFormat)
|
|
@@ -25,13 +26,26 @@ program
|
|
|
25
26
|
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
26
27
|
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
27
28
|
.action(async (input, options) => {
|
|
28
|
-
const inputPath = resolve(input);
|
|
29
29
|
const startedAt = Date.now();
|
|
30
30
|
try {
|
|
31
|
+
if (options.models) {
|
|
32
|
+
await ensureApiKey();
|
|
33
|
+
printAvailableModels(await listAvailableModels());
|
|
34
|
+
return;
|
|
35
|
+
}
|
|
36
|
+
if (!input) {
|
|
37
|
+
throw new Error('Input path is required unless "--models" is used.');
|
|
38
|
+
}
|
|
39
|
+
const inputPath = resolve(input);
|
|
31
40
|
validateOptionCombination(options);
|
|
32
41
|
const promptText = await resolvePromptText(options);
|
|
33
42
|
const conversionMode = resolveConversionMode(promptText);
|
|
34
43
|
const inputKind = await detectInputKind(inputPath);
|
|
44
|
+
if (inputKind === "file" && !isPdfPath(inputPath)) {
|
|
45
|
+
throw new Error("Input file must have a .pdf extension.");
|
|
46
|
+
}
|
|
47
|
+
await ensureApiKey();
|
|
48
|
+
await assertModelAvailable(options.model);
|
|
35
49
|
let usageTotals = emptyUsage();
|
|
36
50
|
if (inputKind === "file") {
|
|
37
51
|
usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
|
|
@@ -47,6 +61,9 @@ program
|
|
|
47
61
|
console.log(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
48
62
|
}
|
|
49
63
|
catch (error) {
|
|
64
|
+
if (error instanceof UnknownModelError) {
|
|
65
|
+
printAvailableModels(error.availableModels);
|
|
66
|
+
}
|
|
50
67
|
const message = error instanceof Error ? error.message : String(error);
|
|
51
68
|
console.error(`Conversion failed: ${message}`);
|
|
52
69
|
console.error(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
@@ -113,10 +130,6 @@ program.parseAsync(process.argv).catch((error) => {
|
|
|
113
130
|
process.exitCode = 1;
|
|
114
131
|
});
|
|
115
132
|
async function processSingleFile(inputPath, options, mode, promptText) {
|
|
116
|
-
if (!isPdfPath(inputPath)) {
|
|
117
|
-
throw new Error("Input file must have a .pdf extension.");
|
|
118
|
-
}
|
|
119
|
-
await ensureApiKey();
|
|
120
133
|
const startedAt = Date.now();
|
|
121
134
|
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
122
135
|
const workerDashboard = process.stdout.isTTY
|
|
@@ -179,7 +192,6 @@ async function processFolder(inputDir, options, mode, promptText) {
|
|
|
179
192
|
console.log("Cancelled. No files were processed.");
|
|
180
193
|
return { total: files.length, succeeded: 0, failed: 0, cancelled: true, usage: emptyUsage() };
|
|
181
194
|
}
|
|
182
|
-
await ensureApiKey();
|
|
183
195
|
const outputRoot = options.output ? resolve(options.output) : undefined;
|
|
184
196
|
let succeeded = 0;
|
|
185
197
|
let failed = 0;
|
|
@@ -433,12 +445,10 @@ async function runWithConcurrency(items, concurrency, worker) {
|
|
|
433
445
|
});
|
|
434
446
|
await Promise.all(workers);
|
|
435
447
|
}
|
|
436
|
-
const SPINNER_FRAMES = ["-", "\\", "|", "/"];
|
|
437
448
|
class AsciiWorkerDashboard {
|
|
438
449
|
lanes;
|
|
439
450
|
total;
|
|
440
451
|
workerCount;
|
|
441
|
-
spinnerTimer;
|
|
442
452
|
completed = 0;
|
|
443
453
|
failed = 0;
|
|
444
454
|
renderedLineCount = 0;
|
|
@@ -446,15 +456,10 @@ class AsciiWorkerDashboard {
|
|
|
446
456
|
this.total = total;
|
|
447
457
|
this.workerCount = workerCount;
|
|
448
458
|
this.lanes = Array.from({ length: workerCount }, () => ({
|
|
449
|
-
state: "idle"
|
|
450
|
-
spinnerFrame: 0
|
|
459
|
+
state: "idle"
|
|
451
460
|
}));
|
|
452
461
|
process.stdout.write("\x1b[?25l");
|
|
453
462
|
this.render();
|
|
454
|
-
this.spinnerTimer = setInterval(() => {
|
|
455
|
-
this.tickSpinners();
|
|
456
|
-
this.render();
|
|
457
|
-
}, 100);
|
|
458
463
|
}
|
|
459
464
|
setSummary(completed, failed) {
|
|
460
465
|
this.completed = completed;
|
|
@@ -468,7 +473,7 @@ class AsciiWorkerDashboard {
|
|
|
468
473
|
}
|
|
469
474
|
lane.state = "running";
|
|
470
475
|
lane.file = file;
|
|
471
|
-
lane.message = "processing";
|
|
476
|
+
lane.message = "processing...";
|
|
472
477
|
this.render();
|
|
473
478
|
}
|
|
474
479
|
setWorkerDone(workerId, file, message) {
|
|
@@ -492,7 +497,6 @@ class AsciiWorkerDashboard {
|
|
|
492
497
|
this.render();
|
|
493
498
|
}
|
|
494
499
|
stop() {
|
|
495
|
-
clearInterval(this.spinnerTimer);
|
|
496
500
|
this.render();
|
|
497
501
|
process.stdout.write("\x1b[?25h");
|
|
498
502
|
}
|
|
@@ -521,17 +525,9 @@ class AsciiWorkerDashboard {
|
|
|
521
525
|
}
|
|
522
526
|
return lines;
|
|
523
527
|
}
|
|
524
|
-
tickSpinners() {
|
|
525
|
-
for (const lane of this.lanes) {
|
|
526
|
-
if (lane.state !== "running") {
|
|
527
|
-
continue;
|
|
528
|
-
}
|
|
529
|
-
lane.spinnerFrame = (lane.spinnerFrame + 1) % SPINNER_FRAMES.length;
|
|
530
|
-
}
|
|
531
|
-
}
|
|
532
528
|
renderIcon(lane) {
|
|
533
529
|
if (lane.state === "running") {
|
|
534
|
-
return
|
|
530
|
+
return ">>";
|
|
535
531
|
}
|
|
536
532
|
if (lane.state === "done") {
|
|
537
533
|
return "OK";
|
|
@@ -585,6 +581,12 @@ function mergeUsage(target, delta) {
|
|
|
585
581
|
function printUsageTotals(usage) {
|
|
586
582
|
console.log(`Token usage: input=${usage.inputTokens}, output=${usage.outputTokens}, total=${usage.totalTokens}, requests=${usage.requests}`);
|
|
587
583
|
}
|
|
584
|
+
function printAvailableModels(models) {
|
|
585
|
+
console.log(`Available models (${models.length}):`);
|
|
586
|
+
for (const model of models) {
|
|
587
|
+
console.log(model);
|
|
588
|
+
}
|
|
589
|
+
}
|
|
588
590
|
function getCliVersion() {
|
|
589
591
|
try {
|
|
590
592
|
const packageJsonPath = new URL("../package.json", import.meta.url);
|
package/dist/cliHelpers.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import OpenAI from "openai";
|
|
1
2
|
export type ConvertOptions = {
|
|
2
3
|
inputPath: string;
|
|
3
4
|
model: string;
|
|
@@ -19,4 +20,11 @@ export type ConvertUsage = {
|
|
|
19
20
|
outputTokens: number;
|
|
20
21
|
totalTokens: number;
|
|
21
22
|
};
|
|
23
|
+
export declare class UnknownModelError extends Error {
|
|
24
|
+
readonly model: string;
|
|
25
|
+
readonly availableModels: string[];
|
|
26
|
+
constructor(model: string, availableModels: string[]);
|
|
27
|
+
}
|
|
22
28
|
export declare function convertPdf(options: ConvertOptions): Promise<ConvertResult>;
|
|
29
|
+
export declare function assertModelAvailable(model: string): Promise<void>;
|
|
30
|
+
export declare function listAvailableModels(client?: OpenAI): Promise<string[]>;
|
|
@@ -4,6 +4,16 @@ import { resolve } from "node:path";
|
|
|
4
4
|
import { Agent, run } from "@openai/agents";
|
|
5
5
|
import OpenAI from "openai";
|
|
6
6
|
import { z } from "zod";
|
|
7
|
+
export class UnknownModelError extends Error {
|
|
8
|
+
model;
|
|
9
|
+
availableModels;
|
|
10
|
+
constructor(model, availableModels) {
|
|
11
|
+
super(`Model "${model}" is not available for this API key.`);
|
|
12
|
+
this.name = "UnknownModelError";
|
|
13
|
+
this.model = model;
|
|
14
|
+
this.availableModels = availableModels;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
7
17
|
const AUTO_RESPONSE_SCHEMA = z.object({
|
|
8
18
|
format: z.enum(["md", "txt"]),
|
|
9
19
|
content: z.string().min(1)
|
|
@@ -14,11 +24,7 @@ const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELA
|
|
|
14
24
|
export async function convertPdf(options) {
|
|
15
25
|
const inputPath = resolve(options.inputPath);
|
|
16
26
|
await access(inputPath);
|
|
17
|
-
const
|
|
18
|
-
if (!apiKey) {
|
|
19
|
-
throw new Error("OPENAI_API_KEY is not set.");
|
|
20
|
-
}
|
|
21
|
-
const client = new OpenAI({ apiKey });
|
|
27
|
+
const client = createOpenAiClient();
|
|
22
28
|
const uploaded = await withRateLimitRetry("file upload", () => client.files.create({
|
|
23
29
|
file: createReadStream(inputPath),
|
|
24
30
|
purpose: "user_data"
|
|
@@ -29,21 +35,30 @@ export async function convertPdf(options) {
|
|
|
29
35
|
model: options.model
|
|
30
36
|
});
|
|
31
37
|
const promptText = buildPromptText(options);
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
38
|
+
let result;
|
|
39
|
+
try {
|
|
40
|
+
result = await withRateLimitRetry("model run", () => run(agent, [
|
|
41
|
+
{
|
|
42
|
+
role: "user",
|
|
43
|
+
content: [
|
|
44
|
+
{
|
|
45
|
+
type: "input_text",
|
|
46
|
+
text: promptText
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
type: "input_file",
|
|
50
|
+
file: { id: uploaded.id }
|
|
51
|
+
}
|
|
52
|
+
]
|
|
53
|
+
}
|
|
54
|
+
]));
|
|
55
|
+
}
|
|
56
|
+
catch (error) {
|
|
57
|
+
if (isUnknownModelError(error, options.model)) {
|
|
58
|
+
throw new UnknownModelError(options.model, await listAvailableModels(client));
|
|
45
59
|
}
|
|
46
|
-
|
|
60
|
+
throw error;
|
|
61
|
+
}
|
|
47
62
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
48
63
|
if (!rawOutput) {
|
|
49
64
|
throw new Error("No content returned by the API.");
|
|
@@ -59,6 +74,32 @@ export async function convertPdf(options) {
|
|
|
59
74
|
}
|
|
60
75
|
return { format: "txt", content: rawOutput, usage };
|
|
61
76
|
}
|
|
77
|
+
export async function assertModelAvailable(model) {
|
|
78
|
+
const client = createOpenAiClient();
|
|
79
|
+
try {
|
|
80
|
+
await client.models.retrieve(model);
|
|
81
|
+
}
|
|
82
|
+
catch (error) {
|
|
83
|
+
if (!isUnknownModelError(error, model)) {
|
|
84
|
+
throw error;
|
|
85
|
+
}
|
|
86
|
+
throw new UnknownModelError(model, await listAvailableModels(client));
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
export async function listAvailableModels(client = createOpenAiClient()) {
|
|
90
|
+
const modelIds = [];
|
|
91
|
+
for await (const model of client.models.list()) {
|
|
92
|
+
modelIds.push(model.id);
|
|
93
|
+
}
|
|
94
|
+
return modelIds.sort((left, right) => left.localeCompare(right, "en"));
|
|
95
|
+
}
|
|
96
|
+
function createOpenAiClient() {
|
|
97
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
98
|
+
if (!apiKey) {
|
|
99
|
+
throw new Error("OPENAI_API_KEY is not set.");
|
|
100
|
+
}
|
|
101
|
+
return new OpenAI({ apiKey });
|
|
102
|
+
}
|
|
62
103
|
function buildPromptText(options) {
|
|
63
104
|
const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
|
|
64
105
|
if (options.mode === "prompt") {
|
|
@@ -111,6 +152,20 @@ function normalizeExtensionHint(extension) {
|
|
|
111
152
|
const normalized = extension.trim().replace(/^\.+/, "");
|
|
112
153
|
return normalized || undefined;
|
|
113
154
|
}
|
|
155
|
+
function isUnknownModelError(error, model) {
|
|
156
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
157
|
+
const normalizedMessage = message.toLowerCase();
|
|
158
|
+
const errorStatus = typeof error === "object" && error !== null && "status" in error ? error.status : undefined;
|
|
159
|
+
const errorCode = typeof error === "object" && error !== null && "code" in error ? error.code : undefined;
|
|
160
|
+
const quotedModel = model.toLowerCase();
|
|
161
|
+
if (errorStatus === 404 || errorCode === "model_not_found") {
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
return (normalizedMessage.includes(quotedModel) &&
|
|
165
|
+
(normalizedMessage.includes("does not exist") ||
|
|
166
|
+
normalizedMessage.includes("not found") ||
|
|
167
|
+
normalizedMessage.includes("unknown model")));
|
|
168
|
+
}
|
|
114
169
|
function parseAutoResponse(rawOutput) {
|
|
115
170
|
let candidate = rawOutput.trim();
|
|
116
171
|
const fencedMatch = candidate.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -13,9 +13,12 @@ import {
|
|
|
13
13
|
setStoredApiKey
|
|
14
14
|
} from "./config.js";
|
|
15
15
|
import {
|
|
16
|
+
assertModelAvailable,
|
|
16
17
|
convertPdf,
|
|
17
18
|
type ConversionMode,
|
|
18
|
-
type ConvertUsage
|
|
19
|
+
type ConvertUsage,
|
|
20
|
+
listAvailableModels,
|
|
21
|
+
UnknownModelError
|
|
19
22
|
} from "./openaiPdfToMarkdown.js";
|
|
20
23
|
import {
|
|
21
24
|
defaultOutputPath,
|
|
@@ -43,9 +46,10 @@ program
|
|
|
43
46
|
.name("papyrus")
|
|
44
47
|
.version(cliVersion, "-v, --version", "display version number")
|
|
45
48
|
.description("Convert PDF files to Markdown or text using the OpenAI Agents SDK")
|
|
46
|
-
.argument("
|
|
49
|
+
.argument("[input]", "Path to input PDF file or folder")
|
|
47
50
|
.option("-o, --output <path>", "Path to output file (single input) or output directory (folder input)")
|
|
48
51
|
.option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
|
|
52
|
+
.option("--models", "List available OpenAI models for the current API key and exit")
|
|
49
53
|
.option(
|
|
50
54
|
"--concurrency <n>",
|
|
51
55
|
"Max parallel workers for folder input (default: 10)",
|
|
@@ -59,16 +63,32 @@ program
|
|
|
59
63
|
)
|
|
60
64
|
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
61
65
|
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
62
|
-
.action(async (input: string, options: CliOptions) => {
|
|
63
|
-
const inputPath = resolve(input);
|
|
66
|
+
.action(async (input: string | undefined, options: CliOptions) => {
|
|
64
67
|
const startedAt = Date.now();
|
|
65
68
|
|
|
66
69
|
try {
|
|
70
|
+
if (options.models) {
|
|
71
|
+
await ensureApiKey();
|
|
72
|
+
printAvailableModels(await listAvailableModels());
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (!input) {
|
|
77
|
+
throw new Error('Input path is required unless "--models" is used.');
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const inputPath = resolve(input);
|
|
67
81
|
validateOptionCombination(options);
|
|
68
82
|
|
|
69
83
|
const promptText = await resolvePromptText(options);
|
|
70
84
|
const conversionMode = resolveConversionMode(promptText);
|
|
71
85
|
const inputKind = await detectInputKind(inputPath);
|
|
86
|
+
if (inputKind === "file" && !isPdfPath(inputPath)) {
|
|
87
|
+
throw new Error("Input file must have a .pdf extension.");
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
await ensureApiKey();
|
|
91
|
+
await assertModelAvailable(options.model);
|
|
72
92
|
let usageTotals: ConvertUsage = emptyUsage();
|
|
73
93
|
|
|
74
94
|
if (inputKind === "file") {
|
|
@@ -84,6 +104,10 @@ program
|
|
|
84
104
|
printUsageTotals(usageTotals);
|
|
85
105
|
console.log(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
86
106
|
} catch (error) {
|
|
107
|
+
if (error instanceof UnknownModelError) {
|
|
108
|
+
printAvailableModels(error.availableModels);
|
|
109
|
+
}
|
|
110
|
+
|
|
87
111
|
const message = error instanceof Error ? error.message : String(error);
|
|
88
112
|
console.error(`Conversion failed: ${message}`);
|
|
89
113
|
console.error(`Duration: ${((Date.now() - startedAt) / 1000).toFixed(2)}s`);
|
|
@@ -160,11 +184,6 @@ async function processSingleFile(
|
|
|
160
184
|
mode: ConversionMode,
|
|
161
185
|
promptText?: string
|
|
162
186
|
): Promise<ConvertUsage> {
|
|
163
|
-
if (!isPdfPath(inputPath)) {
|
|
164
|
-
throw new Error("Input file must have a .pdf extension.");
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
await ensureApiKey();
|
|
168
187
|
const startedAt = Date.now();
|
|
169
188
|
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
170
189
|
const workerDashboard = process.stdout.isTTY
|
|
@@ -258,7 +277,6 @@ async function processFolder(
|
|
|
258
277
|
return { total: files.length, succeeded: 0, failed: 0, cancelled: true, usage: emptyUsage() };
|
|
259
278
|
}
|
|
260
279
|
|
|
261
|
-
await ensureApiKey();
|
|
262
280
|
const outputRoot = options.output ? resolve(options.output) : undefined;
|
|
263
281
|
let succeeded = 0;
|
|
264
282
|
let failed = 0;
|
|
@@ -583,20 +601,16 @@ async function runWithConcurrency<T>(
|
|
|
583
601
|
await Promise.all(workers);
|
|
584
602
|
}
|
|
585
603
|
|
|
586
|
-
const SPINNER_FRAMES = ["-", "\\", "|", "/"];
|
|
587
|
-
|
|
588
604
|
type WorkerLane = {
|
|
589
605
|
state: "idle" | "running" | "done" | "failed";
|
|
590
606
|
file?: string;
|
|
591
607
|
message?: string;
|
|
592
|
-
spinnerFrame: number;
|
|
593
608
|
};
|
|
594
609
|
|
|
595
610
|
class AsciiWorkerDashboard {
|
|
596
611
|
private readonly lanes: WorkerLane[];
|
|
597
612
|
private readonly total: number;
|
|
598
613
|
private readonly workerCount: number;
|
|
599
|
-
private readonly spinnerTimer: NodeJS.Timeout;
|
|
600
614
|
private completed = 0;
|
|
601
615
|
private failed = 0;
|
|
602
616
|
private renderedLineCount = 0;
|
|
@@ -605,16 +619,11 @@ class AsciiWorkerDashboard {
|
|
|
605
619
|
this.total = total;
|
|
606
620
|
this.workerCount = workerCount;
|
|
607
621
|
this.lanes = Array.from({ length: workerCount }, () => ({
|
|
608
|
-
state: "idle"
|
|
609
|
-
spinnerFrame: 0
|
|
622
|
+
state: "idle"
|
|
610
623
|
}));
|
|
611
624
|
|
|
612
625
|
process.stdout.write("\x1b[?25l");
|
|
613
626
|
this.render();
|
|
614
|
-
this.spinnerTimer = setInterval(() => {
|
|
615
|
-
this.tickSpinners();
|
|
616
|
-
this.render();
|
|
617
|
-
}, 100);
|
|
618
627
|
}
|
|
619
628
|
|
|
620
629
|
setSummary(completed: number, failed: number): void {
|
|
@@ -631,7 +640,7 @@ class AsciiWorkerDashboard {
|
|
|
631
640
|
|
|
632
641
|
lane.state = "running";
|
|
633
642
|
lane.file = file;
|
|
634
|
-
lane.message = "processing";
|
|
643
|
+
lane.message = "processing...";
|
|
635
644
|
this.render();
|
|
636
645
|
}
|
|
637
646
|
|
|
@@ -660,7 +669,6 @@ class AsciiWorkerDashboard {
|
|
|
660
669
|
}
|
|
661
670
|
|
|
662
671
|
stop(): void {
|
|
663
|
-
clearInterval(this.spinnerTimer);
|
|
664
672
|
this.render();
|
|
665
673
|
process.stdout.write("\x1b[?25h");
|
|
666
674
|
}
|
|
@@ -696,19 +704,9 @@ class AsciiWorkerDashboard {
|
|
|
696
704
|
return lines;
|
|
697
705
|
}
|
|
698
706
|
|
|
699
|
-
private tickSpinners(): void {
|
|
700
|
-
for (const lane of this.lanes) {
|
|
701
|
-
if (lane.state !== "running") {
|
|
702
|
-
continue;
|
|
703
|
-
}
|
|
704
|
-
|
|
705
|
-
lane.spinnerFrame = (lane.spinnerFrame + 1) % SPINNER_FRAMES.length;
|
|
706
|
-
}
|
|
707
|
-
}
|
|
708
|
-
|
|
709
707
|
private renderIcon(lane: WorkerLane): string {
|
|
710
708
|
if (lane.state === "running") {
|
|
711
|
-
return
|
|
709
|
+
return ">>";
|
|
712
710
|
}
|
|
713
711
|
|
|
714
712
|
if (lane.state === "done") {
|
|
@@ -785,6 +783,13 @@ function printUsageTotals(usage: ConvertUsage): void {
|
|
|
785
783
|
);
|
|
786
784
|
}
|
|
787
785
|
|
|
786
|
+
function printAvailableModels(models: string[]): void {
|
|
787
|
+
console.log(`Available models (${models.length}):`);
|
|
788
|
+
for (const model of models) {
|
|
789
|
+
console.log(model);
|
|
790
|
+
}
|
|
791
|
+
}
|
|
792
|
+
|
|
788
793
|
function getCliVersion(): string {
|
|
789
794
|
try {
|
|
790
795
|
const packageJsonPath = new URL("../package.json", import.meta.url);
|
package/src/cliHelpers.ts
CHANGED
|
@@ -30,6 +30,18 @@ export type ConvertUsage = {
|
|
|
30
30
|
totalTokens: number;
|
|
31
31
|
};
|
|
32
32
|
|
|
33
|
+
export class UnknownModelError extends Error {
|
|
34
|
+
readonly model: string;
|
|
35
|
+
readonly availableModels: string[];
|
|
36
|
+
|
|
37
|
+
constructor(model: string, availableModels: string[]) {
|
|
38
|
+
super(`Model "${model}" is not available for this API key.`);
|
|
39
|
+
this.name = "UnknownModelError";
|
|
40
|
+
this.model = model;
|
|
41
|
+
this.availableModels = availableModels;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
33
45
|
const AUTO_RESPONSE_SCHEMA = z.object({
|
|
34
46
|
format: z.enum(["md", "txt"]),
|
|
35
47
|
content: z.string().min(1)
|
|
@@ -43,12 +55,7 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
43
55
|
const inputPath = resolve(options.inputPath);
|
|
44
56
|
await access(inputPath);
|
|
45
57
|
|
|
46
|
-
const
|
|
47
|
-
if (!apiKey) {
|
|
48
|
-
throw new Error("OPENAI_API_KEY is not set.");
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
const client = new OpenAI({ apiKey });
|
|
58
|
+
const client = createOpenAiClient();
|
|
52
59
|
|
|
53
60
|
const uploaded = await withRateLimitRetry("file upload", () =>
|
|
54
61
|
client.files.create({
|
|
@@ -64,23 +71,32 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
64
71
|
});
|
|
65
72
|
|
|
66
73
|
const promptText = buildPromptText(options);
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
74
|
+
let result;
|
|
75
|
+
try {
|
|
76
|
+
result = await withRateLimitRetry("model run", () =>
|
|
77
|
+
run(agent, [
|
|
78
|
+
{
|
|
79
|
+
role: "user",
|
|
80
|
+
content: [
|
|
81
|
+
{
|
|
82
|
+
type: "input_text",
|
|
83
|
+
text: promptText
|
|
84
|
+
},
|
|
85
|
+
{
|
|
86
|
+
type: "input_file",
|
|
87
|
+
file: { id: uploaded.id }
|
|
88
|
+
}
|
|
89
|
+
]
|
|
90
|
+
}
|
|
91
|
+
])
|
|
92
|
+
);
|
|
93
|
+
} catch (error) {
|
|
94
|
+
if (isUnknownModelError(error, options.model)) {
|
|
95
|
+
throw new UnknownModelError(options.model, await listAvailableModels(client));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
throw error;
|
|
99
|
+
}
|
|
84
100
|
|
|
85
101
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
86
102
|
if (!rawOutput) {
|
|
@@ -101,6 +117,39 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
101
117
|
return { format: "txt", content: rawOutput, usage };
|
|
102
118
|
}
|
|
103
119
|
|
|
120
|
+
export async function assertModelAvailable(model: string): Promise<void> {
|
|
121
|
+
const client = createOpenAiClient();
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
await client.models.retrieve(model);
|
|
125
|
+
} catch (error) {
|
|
126
|
+
if (!isUnknownModelError(error, model)) {
|
|
127
|
+
throw error;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
throw new UnknownModelError(model, await listAvailableModels(client));
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export async function listAvailableModels(client = createOpenAiClient()): Promise<string[]> {
|
|
135
|
+
const modelIds: string[] = [];
|
|
136
|
+
|
|
137
|
+
for await (const model of client.models.list()) {
|
|
138
|
+
modelIds.push(model.id);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
return modelIds.sort((left, right) => left.localeCompare(right, "en"));
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
function createOpenAiClient(): OpenAI {
|
|
145
|
+
const apiKey = process.env.OPENAI_API_KEY;
|
|
146
|
+
if (!apiKey) {
|
|
147
|
+
throw new Error("OPENAI_API_KEY is not set.");
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
return new OpenAI({ apiKey });
|
|
151
|
+
}
|
|
152
|
+
|
|
104
153
|
function buildPromptText(options: ConvertOptions): string {
|
|
105
154
|
const outputExtensionHint = normalizeExtensionHint(options.outputExtensionHint);
|
|
106
155
|
if (options.mode === "prompt") {
|
|
@@ -171,6 +220,27 @@ function normalizeExtensionHint(extension: string | undefined): string | undefin
|
|
|
171
220
|
return normalized || undefined;
|
|
172
221
|
}
|
|
173
222
|
|
|
223
|
+
function isUnknownModelError(error: unknown, model: string): boolean {
|
|
224
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
225
|
+
const normalizedMessage = message.toLowerCase();
|
|
226
|
+
const errorStatus =
|
|
227
|
+
typeof error === "object" && error !== null && "status" in error ? error.status : undefined;
|
|
228
|
+
const errorCode =
|
|
229
|
+
typeof error === "object" && error !== null && "code" in error ? error.code : undefined;
|
|
230
|
+
const quotedModel = model.toLowerCase();
|
|
231
|
+
|
|
232
|
+
if (errorStatus === 404 || errorCode === "model_not_found") {
|
|
233
|
+
return true;
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
return (
|
|
237
|
+
normalizedMessage.includes(quotedModel) &&
|
|
238
|
+
(normalizedMessage.includes("does not exist") ||
|
|
239
|
+
normalizedMessage.includes("not found") ||
|
|
240
|
+
normalizedMessage.includes("unknown model"))
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
|
|
174
244
|
function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
|
|
175
245
|
let candidate = rawOutput.trim();
|
|
176
246
|
|