@robin7331/papyrus-cli 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -25
- package/dist/cli.js +59 -26
- package/dist/cliHelpers.d.ts +1 -3
- package/dist/cliHelpers.js +5 -17
- package/dist/openaiPdfToMarkdown.js +192 -4
- package/package.json +2 -1
- package/src/cli.ts +73 -25
- package/src/cliHelpers.ts +6 -23
- package/src/openaiPdfToMarkdown.ts +273 -19
- package/test/cliHelpers.test.ts +19 -31
package/README.md
CHANGED
|
@@ -27,20 +27,20 @@ papyrus --help
|
|
|
27
27
|
# Show installed CLI version
|
|
28
28
|
papyrus --version
|
|
29
29
|
|
|
30
|
-
# Single file (
|
|
30
|
+
# Single file (default behavior; if no API key is found, Papyrus prompts you to paste one)
|
|
31
31
|
papyrus ./path/to/input.pdf
|
|
32
32
|
|
|
33
33
|
# Single file with explicit format/output/model
|
|
34
34
|
papyrus ./path/to/input.pdf --format md --output ./out/result.md --model gpt-4o-mini
|
|
35
35
|
|
|
36
|
-
#
|
|
36
|
+
# Default conversion with extra instructions
|
|
37
37
|
papyrus ./path/to/input.pdf --instructions "Prioritize table accuracy." --format txt
|
|
38
38
|
|
|
39
|
-
# Prompt
|
|
40
|
-
papyrus ./path/to/input.pdf --
|
|
39
|
+
# Prompt conversion (inline prompt)
|
|
40
|
+
papyrus ./path/to/input.pdf --prompt "Extract all invoice line items as bullet points." --format md
|
|
41
41
|
|
|
42
|
-
# Prompt
|
|
43
|
-
papyrus ./path/to/input.pdf --
|
|
42
|
+
# Prompt conversion (prompt file)
|
|
43
|
+
papyrus ./path/to/input.pdf --prompt-file ./my-prompt.txt --format txt
|
|
44
44
|
|
|
45
45
|
# Folder mode (recursive scan, asks for confirmation)
|
|
46
46
|
papyrus ./path/to/folder
|
|
@@ -132,46 +132,34 @@ Example:
|
|
|
132
132
|
papyrus ./docs --output ./converted
|
|
133
133
|
```
|
|
134
134
|
|
|
135
|
-
### `--mode <mode>`
|
|
136
|
-
|
|
137
|
-
Conversion mode:
|
|
138
|
-
- `auto` (default): built-in conversion behavior.
|
|
139
|
-
- `prompt`: use your own prompt via `--prompt` or `--prompt-file`.
|
|
140
|
-
|
|
141
|
-
Example:
|
|
142
|
-
|
|
143
|
-
```bash
|
|
144
|
-
papyrus ./docs/invoice.pdf --mode prompt --prompt "Extract all line items."
|
|
145
|
-
```
|
|
146
|
-
|
|
147
135
|
### `--instructions <text>`
|
|
148
136
|
|
|
149
|
-
Additional conversion instructions
|
|
137
|
+
Additional conversion instructions for default conversion behavior. Cannot be combined with `--prompt` or `--prompt-file`.
|
|
150
138
|
|
|
151
139
|
Example:
|
|
152
140
|
|
|
153
141
|
```bash
|
|
154
|
-
papyrus ./docs/invoice.pdf --
|
|
142
|
+
papyrus ./docs/invoice.pdf --instructions "Keep table columns aligned."
|
|
155
143
|
```
|
|
156
144
|
|
|
157
145
|
### `--prompt <text>`
|
|
158
146
|
|
|
159
|
-
Inline prompt text for
|
|
147
|
+
Inline prompt text for prompt-based conversion. Must be non-empty. Use exactly one of `--prompt` or `--prompt-file`.
|
|
160
148
|
|
|
161
149
|
Example:
|
|
162
150
|
|
|
163
151
|
```bash
|
|
164
|
-
papyrus ./docs/invoice.pdf --
|
|
152
|
+
papyrus ./docs/invoice.pdf --prompt "Summarize payment terms."
|
|
165
153
|
```
|
|
166
154
|
|
|
167
155
|
### `--prompt-file <path>`
|
|
168
156
|
|
|
169
|
-
Path to a text file containing the prompt for
|
|
157
|
+
Path to a text file containing the prompt for prompt-based conversion. File must contain non-empty text. Use exactly one of `--prompt` or `--prompt-file`.
|
|
170
158
|
|
|
171
159
|
Example:
|
|
172
160
|
|
|
173
161
|
```bash
|
|
174
|
-
papyrus ./docs/invoice.pdf --
|
|
162
|
+
papyrus ./docs/invoice.pdf --prompt-file ./my-prompt.txt
|
|
175
163
|
```
|
|
176
164
|
|
|
177
165
|
### `-m, --model <model>`
|
|
@@ -206,9 +194,15 @@ papyrus ./docs --yes
|
|
|
206
194
|
|
|
207
195
|
## Notes
|
|
208
196
|
|
|
209
|
-
- In `
|
|
197
|
+
- In default conversion (without `--prompt`/`--prompt-file`) and without `--format`, the model returns structured JSON with `format` + `content`.
|
|
198
|
+
- Single-file input now also shows a live worker lane (spinner in TTY) while conversion is running.
|
|
210
199
|
- Folder input is scanned recursively for `.pdf` files and processed in parallel.
|
|
211
200
|
- In folder mode, `--output` must be a directory path and mirrored subfolders are preserved.
|
|
201
|
+
- OpenAI rate-limit (`429`) responses are retried automatically using `Retry-After` (when present) plus exponential backoff.
|
|
202
|
+
- Rate-limit retry tuning is available via environment variables:
|
|
203
|
+
- `PAPYRUS_RATE_LIMIT_MAX_RETRIES` (default `8`)
|
|
204
|
+
- `PAPYRUS_RATE_LIMIT_BASE_DELAY_MS` (default `2000`)
|
|
205
|
+
- `PAPYRUS_RATE_LIMIT_MAX_DELAY_MS` (default `120000`)
|
|
212
206
|
- For scanned PDFs, output quality depends on OCR quality from the model.
|
|
213
207
|
|
|
214
208
|
## Development
|
package/dist/cli.js
CHANGED
|
@@ -6,7 +6,7 @@ import { dirname, join, relative, resolve } from "node:path";
|
|
|
6
6
|
import { Command } from "commander";
|
|
7
7
|
import { clearStoredApiKey, getConfigFilePath, getStoredApiKey, maskApiKey, setStoredApiKey } from "./config.js";
|
|
8
8
|
import { convertPdf } from "./openaiPdfToMarkdown.js";
|
|
9
|
-
import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat,
|
|
9
|
+
import { defaultOutputPath, formatDurationMs, isPdfPath, looksLikeFileOutput, parseConcurrency, parseFormat, resolveFolderOutputPath, truncate, validateOptionCombination } from "./cliHelpers.js";
|
|
10
10
|
const program = new Command();
|
|
11
11
|
const configFilePath = getConfigFilePath();
|
|
12
12
|
const OPENAI_API_KEYS_URL = "https://platform.openai.com/settings/organization/api-keys";
|
|
@@ -20,24 +20,24 @@ program
|
|
|
20
20
|
.option("-m, --model <model>", "OpenAI model to use", "gpt-4o-mini")
|
|
21
21
|
.option("--concurrency <n>", "Max parallel workers for folder input (default: 10)", parseConcurrency)
|
|
22
22
|
.option("-y, --yes", "Skip confirmation prompt in folder mode")
|
|
23
|
-
.option("--mode <mode>", "Conversion mode: auto or prompt", parseMode, "auto")
|
|
24
23
|
.option("--format <format>", "Output format override: md or txt", parseFormat)
|
|
25
|
-
.option("--instructions <text>", "Additional conversion instructions
|
|
26
|
-
.option("--prompt <text>", "Custom prompt text
|
|
27
|
-
.option("--prompt-file <path>", "Path to file containing prompt text
|
|
24
|
+
.option("--instructions <text>", "Additional conversion instructions (only when not using --prompt/--prompt-file)")
|
|
25
|
+
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
26
|
+
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
28
27
|
.action(async (input, options) => {
|
|
29
28
|
const inputPath = resolve(input);
|
|
30
29
|
const startedAt = Date.now();
|
|
31
30
|
try {
|
|
32
31
|
validateOptionCombination(options);
|
|
33
32
|
const promptText = await resolvePromptText(options);
|
|
33
|
+
const conversionMode = resolveConversionMode(promptText);
|
|
34
34
|
const inputKind = await detectInputKind(inputPath);
|
|
35
35
|
let usageTotals = emptyUsage();
|
|
36
36
|
if (inputKind === "file") {
|
|
37
|
-
usageTotals = await processSingleFile(inputPath, options, promptText);
|
|
37
|
+
usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
|
|
38
38
|
}
|
|
39
39
|
else {
|
|
40
|
-
const summary = await processFolder(inputPath, options, promptText);
|
|
40
|
+
const summary = await processFolder(inputPath, options, conversionMode, promptText);
|
|
41
41
|
usageTotals = summary.usage;
|
|
42
42
|
if (!summary.cancelled && summary.failed > 0) {
|
|
43
43
|
process.exitCode = 1;
|
|
@@ -112,26 +112,59 @@ program.parseAsync(process.argv).catch((error) => {
|
|
|
112
112
|
console.error(`Command failed: ${message}`);
|
|
113
113
|
process.exitCode = 1;
|
|
114
114
|
});
|
|
115
|
-
async function processSingleFile(inputPath, options, promptText) {
|
|
115
|
+
async function processSingleFile(inputPath, options, mode, promptText) {
|
|
116
116
|
if (!isPdfPath(inputPath)) {
|
|
117
117
|
throw new Error("Input file must have a .pdf extension.");
|
|
118
118
|
}
|
|
119
119
|
await ensureApiKey();
|
|
120
|
-
const
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
120
|
+
const startedAt = Date.now();
|
|
121
|
+
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
122
|
+
const workerDashboard = process.stdout.isTTY
|
|
123
|
+
? new AsciiWorkerDashboard(1, 1)
|
|
124
|
+
: null;
|
|
125
|
+
workerDashboard?.setSummary(0, 0);
|
|
126
|
+
workerDashboard?.setWorkerRunning(0, displayInput);
|
|
127
|
+
if (!workerDashboard) {
|
|
128
|
+
console.log(`[worker-1] Running ${displayInput}`);
|
|
129
|
+
}
|
|
130
|
+
try {
|
|
131
|
+
const result = await convertPdf({
|
|
132
|
+
inputPath,
|
|
133
|
+
model: options.model,
|
|
134
|
+
mode,
|
|
135
|
+
format: options.format,
|
|
136
|
+
instructions: options.instructions,
|
|
137
|
+
promptText
|
|
138
|
+
});
|
|
139
|
+
const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
|
|
140
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
141
|
+
await writeFile(outputPath, result.content, "utf8");
|
|
142
|
+
if (workerDashboard) {
|
|
143
|
+
workerDashboard.setWorkerDone(0, displayInput, `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`);
|
|
144
|
+
workerDashboard.setSummary(1, 0);
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`);
|
|
148
|
+
}
|
|
149
|
+
console.log(`Output (${result.format}) written to: ${outputPath}`);
|
|
150
|
+
return result.usage;
|
|
151
|
+
}
|
|
152
|
+
catch (error) {
|
|
153
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
154
|
+
if (workerDashboard) {
|
|
155
|
+
workerDashboard.setWorkerFailed(0, displayInput, `${truncate(message, 42)} (${formatDurationMs(Date.now() - startedAt)})`);
|
|
156
|
+
workerDashboard.setSummary(1, 1);
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
console.error(`[worker-1] Failed ${displayInput}: ${message} (${formatDurationMs(Date.now() - startedAt)})`);
|
|
160
|
+
}
|
|
161
|
+
throw error;
|
|
162
|
+
}
|
|
163
|
+
finally {
|
|
164
|
+
workerDashboard?.stop();
|
|
165
|
+
}
|
|
133
166
|
}
|
|
134
|
-
async function processFolder(inputDir, options, promptText) {
|
|
167
|
+
async function processFolder(inputDir, options, mode, promptText) {
|
|
135
168
|
if (options.output && looksLikeFileOutput(options.output)) {
|
|
136
169
|
throw new Error("In folder mode, --output must be a directory path (not a .md/.txt file path).");
|
|
137
170
|
}
|
|
@@ -167,7 +200,7 @@ async function processFolder(inputDir, options, promptText) {
|
|
|
167
200
|
const result = await convertPdf({
|
|
168
201
|
inputPath: filePath,
|
|
169
202
|
model: options.model,
|
|
170
|
-
mode
|
|
203
|
+
mode,
|
|
171
204
|
format: options.format,
|
|
172
205
|
instructions: options.instructions,
|
|
173
206
|
promptText
|
|
@@ -217,9 +250,6 @@ async function processFolder(inputDir, options, promptText) {
|
|
|
217
250
|
return { total: files.length, succeeded, failed, cancelled: false, usage };
|
|
218
251
|
}
|
|
219
252
|
async function resolvePromptText(options) {
|
|
220
|
-
if (options.mode !== "prompt") {
|
|
221
|
-
return undefined;
|
|
222
|
-
}
|
|
223
253
|
if (options.prompt) {
|
|
224
254
|
const prompt = options.prompt.trim();
|
|
225
255
|
if (!prompt) {
|
|
@@ -237,6 +267,9 @@ async function resolvePromptText(options) {
|
|
|
237
267
|
}
|
|
238
268
|
return promptFromFile;
|
|
239
269
|
}
|
|
270
|
+
function resolveConversionMode(promptText) {
|
|
271
|
+
return promptText ? "prompt" : "auto";
|
|
272
|
+
}
|
|
240
273
|
async function handleConfigInit(options) {
|
|
241
274
|
const existingKey = await getStoredApiKey();
|
|
242
275
|
if (existingKey && !options.force) {
|
package/dist/cliHelpers.d.ts
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
|
-
import { type
|
|
1
|
+
import { type OutputFormat } from "./openaiPdfToMarkdown.js";
|
|
2
2
|
export type CliOptions = {
|
|
3
3
|
output?: string;
|
|
4
4
|
model: string;
|
|
5
5
|
concurrency?: number;
|
|
6
6
|
yes?: boolean;
|
|
7
|
-
mode: ConversionMode;
|
|
8
7
|
format?: OutputFormat;
|
|
9
8
|
instructions?: string;
|
|
10
9
|
prompt?: string;
|
|
11
10
|
promptFile?: string;
|
|
12
11
|
};
|
|
13
|
-
export declare function parseMode(value: string): ConversionMode;
|
|
14
12
|
export declare function parseFormat(value: string): OutputFormat;
|
|
15
13
|
export declare function parseConcurrency(value: string): number;
|
|
16
14
|
export declare function validateOptionCombination(options: CliOptions): void;
|
package/dist/cliHelpers.js
CHANGED
|
@@ -1,11 +1,5 @@
|
|
|
1
1
|
import { InvalidArgumentError } from "commander";
|
|
2
2
|
import { basename, dirname, extname, join, relative } from "node:path";
|
|
3
|
-
export function parseMode(value) {
|
|
4
|
-
if (value === "auto" || value === "prompt") {
|
|
5
|
-
return value;
|
|
6
|
-
}
|
|
7
|
-
throw new InvalidArgumentError("Mode must be either 'auto' or 'prompt'.");
|
|
8
|
-
}
|
|
9
3
|
export function parseFormat(value) {
|
|
10
4
|
if (value === "md" || value === "txt") {
|
|
11
5
|
return value;
|
|
@@ -20,18 +14,12 @@ export function parseConcurrency(value) {
|
|
|
20
14
|
return parsed;
|
|
21
15
|
}
|
|
22
16
|
export function validateOptionCombination(options) {
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
|
|
27
|
-
}
|
|
28
|
-
if (options.instructions) {
|
|
29
|
-
throw new Error("--instructions is only supported in auto mode.");
|
|
30
|
-
}
|
|
31
|
-
return;
|
|
17
|
+
const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
|
|
18
|
+
if (promptSourceCount > 1) {
|
|
19
|
+
throw new Error("Use exactly one of --prompt or --prompt-file.");
|
|
32
20
|
}
|
|
33
|
-
if (
|
|
34
|
-
throw new Error("--
|
|
21
|
+
if (promptSourceCount === 1 && options.instructions) {
|
|
22
|
+
throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
|
|
35
23
|
}
|
|
36
24
|
}
|
|
37
25
|
export function defaultOutputPath(inputPath, format) {
|
|
@@ -8,6 +8,9 @@ const AUTO_RESPONSE_SCHEMA = z.object({
|
|
|
8
8
|
format: z.enum(["md", "txt"]),
|
|
9
9
|
content: z.string().min(1)
|
|
10
10
|
});
|
|
11
|
+
const RATE_LIMIT_MAX_RETRIES = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_RETRIES", 8);
|
|
12
|
+
const RATE_LIMIT_BASE_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_BASE_DELAY_MS", 2_000);
|
|
13
|
+
const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELAY_MS", 120_000);
|
|
11
14
|
export async function convertPdf(options) {
|
|
12
15
|
const inputPath = resolve(options.inputPath);
|
|
13
16
|
await access(inputPath);
|
|
@@ -16,17 +19,17 @@ export async function convertPdf(options) {
|
|
|
16
19
|
throw new Error("OPENAI_API_KEY is not set.");
|
|
17
20
|
}
|
|
18
21
|
const client = new OpenAI({ apiKey });
|
|
19
|
-
const uploaded = await client.files.create({
|
|
22
|
+
const uploaded = await withRateLimitRetry("file upload", () => client.files.create({
|
|
20
23
|
file: createReadStream(inputPath),
|
|
21
24
|
purpose: "user_data"
|
|
22
|
-
});
|
|
25
|
+
}));
|
|
23
26
|
const agent = new Agent({
|
|
24
27
|
name: "PDF Converter",
|
|
25
28
|
instructions: "You convert PDF files precisely according to the requested output format.",
|
|
26
29
|
model: options.model
|
|
27
30
|
});
|
|
28
31
|
const promptText = buildPromptText(options);
|
|
29
|
-
const result = await run(agent, [
|
|
32
|
+
const result = await withRateLimitRetry("model run", () => run(agent, [
|
|
30
33
|
{
|
|
31
34
|
role: "user",
|
|
32
35
|
content: [
|
|
@@ -40,7 +43,7 @@ export async function convertPdf(options) {
|
|
|
40
43
|
}
|
|
41
44
|
]
|
|
42
45
|
}
|
|
43
|
-
]);
|
|
46
|
+
]));
|
|
44
47
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
45
48
|
if (!rawOutput) {
|
|
46
49
|
throw new Error("No content returned by the API.");
|
|
@@ -142,3 +145,188 @@ function parseAutoResponse(rawOutput) {
|
|
|
142
145
|
}
|
|
143
146
|
return { format: validated.data.format, content };
|
|
144
147
|
}
|
|
148
|
+
async function withRateLimitRetry(operationName, operation) {
|
|
149
|
+
let attempt = 0;
|
|
150
|
+
while (true) {
|
|
151
|
+
try {
|
|
152
|
+
return await operation();
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
if (!isRetriableRateLimitError(error) || attempt >= RATE_LIMIT_MAX_RETRIES) {
|
|
156
|
+
throw error;
|
|
157
|
+
}
|
|
158
|
+
const retryAfterMs = getRetryAfterMs(error);
|
|
159
|
+
const exponentialBackoffMs = RATE_LIMIT_BASE_DELAY_MS * (2 ** attempt);
|
|
160
|
+
const jitterMs = Math.floor(Math.random() * 750);
|
|
161
|
+
const computedDelayMs = retryAfterMs ?? (exponentialBackoffMs + jitterMs);
|
|
162
|
+
const waitMs = clampDelayMs(computedDelayMs, RATE_LIMIT_MAX_DELAY_MS);
|
|
163
|
+
const nextAttempt = attempt + 2;
|
|
164
|
+
const totalAttempts = RATE_LIMIT_MAX_RETRIES + 1;
|
|
165
|
+
const reason = extractErrorMessage(error);
|
|
166
|
+
console.warn(`[retry] ${operationName} hit OpenAI rate limits. Waiting ${formatDelay(waitMs)} before retry ${nextAttempt}/${totalAttempts}. ${reason}`);
|
|
167
|
+
await sleep(waitMs);
|
|
168
|
+
attempt += 1;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
function isRetriableRateLimitError(error) {
|
|
173
|
+
if (typeof error !== "object" || error === null) {
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
const candidate = error;
|
|
177
|
+
if (candidate.status === 429) {
|
|
178
|
+
const code = typeof candidate.code === "string" ? candidate.code : undefined;
|
|
179
|
+
const nestedCode = typeof candidate.error?.code === "string" ? candidate.error.code : undefined;
|
|
180
|
+
if (code === "insufficient_quota" || nestedCode === "insufficient_quota") {
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
return true;
|
|
184
|
+
}
|
|
185
|
+
const searchableText = [
|
|
186
|
+
toLowerCaseIfString(candidate.code),
|
|
187
|
+
toLowerCaseIfString(candidate.type),
|
|
188
|
+
toLowerCaseIfString(candidate.error?.code),
|
|
189
|
+
toLowerCaseIfString(candidate.error?.type),
|
|
190
|
+
toLowerCaseIfString(candidate.message),
|
|
191
|
+
toLowerCaseIfString(candidate.error?.message)
|
|
192
|
+
]
|
|
193
|
+
.filter(Boolean)
|
|
194
|
+
.join(" ");
|
|
195
|
+
if (searchableText.includes("insufficient_quota")) {
|
|
196
|
+
return false;
|
|
197
|
+
}
|
|
198
|
+
return (searchableText.includes("rate_limit") ||
|
|
199
|
+
searchableText.includes("rate limit") ||
|
|
200
|
+
searchableText.includes("too many requests"));
|
|
201
|
+
}
|
|
202
|
+
function getRetryAfterMs(error) {
|
|
203
|
+
const headerDelay = getRetryAfterMsFromHeaders(error);
|
|
204
|
+
if (typeof headerDelay === "number" && Number.isFinite(headerDelay) && headerDelay >= 0) {
|
|
205
|
+
return headerDelay;
|
|
206
|
+
}
|
|
207
|
+
const textDelay = getRetryAfterMsFromText(extractErrorMessage(error));
|
|
208
|
+
if (typeof textDelay === "number" && Number.isFinite(textDelay) && textDelay >= 0) {
|
|
209
|
+
return textDelay;
|
|
210
|
+
}
|
|
211
|
+
return undefined;
|
|
212
|
+
}
|
|
213
|
+
function getRetryAfterMsFromHeaders(error) {
|
|
214
|
+
if (typeof error !== "object" || error === null) {
|
|
215
|
+
return undefined;
|
|
216
|
+
}
|
|
217
|
+
const candidate = error;
|
|
218
|
+
const retryAfterMsHeader = readHeader(candidate.headers, "retry-after-ms")
|
|
219
|
+
?? readHeader(candidate.response?.headers, "retry-after-ms");
|
|
220
|
+
if (retryAfterMsHeader) {
|
|
221
|
+
const milliseconds = Number.parseInt(retryAfterMsHeader, 10);
|
|
222
|
+
if (Number.isFinite(milliseconds) && milliseconds >= 0) {
|
|
223
|
+
return milliseconds;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
const retryAfterHeader = readHeader(candidate.headers, "retry-after")
|
|
227
|
+
?? readHeader(candidate.response?.headers, "retry-after");
|
|
228
|
+
if (!retryAfterHeader) {
|
|
229
|
+
return undefined;
|
|
230
|
+
}
|
|
231
|
+
const seconds = Number.parseFloat(retryAfterHeader);
|
|
232
|
+
if (Number.isFinite(seconds)) {
|
|
233
|
+
return Math.max(0, Math.round(seconds * 1_000));
|
|
234
|
+
}
|
|
235
|
+
const parsedDate = Date.parse(retryAfterHeader);
|
|
236
|
+
if (Number.isFinite(parsedDate)) {
|
|
237
|
+
return Math.max(0, parsedDate - Date.now());
|
|
238
|
+
}
|
|
239
|
+
return undefined;
|
|
240
|
+
}
|
|
241
|
+
function getRetryAfterMsFromText(message) {
|
|
242
|
+
const match = message.match(/(?:try again in|retry after)\s*([0-9]+(?:\.[0-9]+)?)\s*(ms|msec|millisecond|milliseconds|s|sec|second|seconds|m|min|minute|minutes)?/i);
|
|
243
|
+
if (!match) {
|
|
244
|
+
return undefined;
|
|
245
|
+
}
|
|
246
|
+
const rawValue = Number.parseFloat(match[1] ?? "");
|
|
247
|
+
if (!Number.isFinite(rawValue) || rawValue < 0) {
|
|
248
|
+
return undefined;
|
|
249
|
+
}
|
|
250
|
+
const unit = (match[2] ?? "s").toLowerCase();
|
|
251
|
+
if (unit === "ms" || unit === "msec" || unit === "millisecond" || unit === "milliseconds") {
|
|
252
|
+
return Math.round(rawValue);
|
|
253
|
+
}
|
|
254
|
+
if (unit === "m" || unit === "min" || unit === "minute" || unit === "minutes") {
|
|
255
|
+
return Math.round(rawValue * 60_000);
|
|
256
|
+
}
|
|
257
|
+
return Math.round(rawValue * 1_000);
|
|
258
|
+
}
|
|
259
|
+
function readHeader(headersLike, headerName) {
|
|
260
|
+
if (!headersLike) {
|
|
261
|
+
return undefined;
|
|
262
|
+
}
|
|
263
|
+
if (typeof headersLike === "object"
|
|
264
|
+
&& "get" in headersLike
|
|
265
|
+
&& typeof headersLike.get === "function") {
|
|
266
|
+
const value = headersLike.get(headerName);
|
|
267
|
+
return value ?? undefined;
|
|
268
|
+
}
|
|
269
|
+
if (typeof headersLike !== "object") {
|
|
270
|
+
return undefined;
|
|
271
|
+
}
|
|
272
|
+
const headersRecord = headersLike;
|
|
273
|
+
const lowerTarget = headerName.toLowerCase();
|
|
274
|
+
for (const [key, value] of Object.entries(headersRecord)) {
|
|
275
|
+
if (key.toLowerCase() !== lowerTarget) {
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
if (typeof value === "string") {
|
|
279
|
+
return value;
|
|
280
|
+
}
|
|
281
|
+
if (Array.isArray(value)) {
|
|
282
|
+
const first = value.find((entry) => typeof entry === "string");
|
|
283
|
+
return typeof first === "string" ? first : undefined;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
return undefined;
|
|
287
|
+
}
|
|
288
|
+
function parsePositiveIntEnv(name, fallback) {
|
|
289
|
+
const raw = process.env[name];
|
|
290
|
+
if (!raw) {
|
|
291
|
+
return fallback;
|
|
292
|
+
}
|
|
293
|
+
const parsed = Number.parseInt(raw, 10);
|
|
294
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
295
|
+
return fallback;
|
|
296
|
+
}
|
|
297
|
+
return parsed;
|
|
298
|
+
}
|
|
299
|
+
function clampDelayMs(value, max) {
|
|
300
|
+
return Math.max(250, Math.min(Math.round(value), max));
|
|
301
|
+
}
|
|
302
|
+
function formatDelay(milliseconds) {
|
|
303
|
+
if (milliseconds < 1_000) {
|
|
304
|
+
return `${milliseconds}ms`;
|
|
305
|
+
}
|
|
306
|
+
const seconds = milliseconds / 1_000;
|
|
307
|
+
return `${seconds.toFixed(seconds >= 10 ? 0 : 1)}s`;
|
|
308
|
+
}
|
|
309
|
+
function extractErrorMessage(error) {
|
|
310
|
+
if (error instanceof Error && error.message.trim().length > 0) {
|
|
311
|
+
return error.message;
|
|
312
|
+
}
|
|
313
|
+
if (typeof error === "object" && error !== null) {
|
|
314
|
+
const message = error.message;
|
|
315
|
+
if (typeof message === "string" && message.trim().length > 0) {
|
|
316
|
+
return message;
|
|
317
|
+
}
|
|
318
|
+
const nestedMessage = error.error?.message;
|
|
319
|
+
if (typeof nestedMessage === "string" && nestedMessage.trim().length > 0) {
|
|
320
|
+
return nestedMessage;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return String(error);
|
|
324
|
+
}
|
|
325
|
+
function toLowerCaseIfString(value) {
|
|
326
|
+
return typeof value === "string" ? value.toLowerCase() : "";
|
|
327
|
+
}
|
|
328
|
+
function sleep(milliseconds) {
|
|
329
|
+
return new Promise((resolveSleep) => {
|
|
330
|
+
setTimeout(resolveSleep, milliseconds);
|
|
331
|
+
});
|
|
332
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@robin7331/papyrus-cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Convert PDF to markdown or text with the OpenAI Agents SDK",
|
|
6
6
|
"repository": {
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
39
|
"@openai/agents": "^0.5.3",
|
|
40
|
+
"@robin7331/papyrus-cli": "^0.1.4",
|
|
40
41
|
"commander": "^14.0.0",
|
|
41
42
|
"dotenv": "^17.3.1",
|
|
42
43
|
"openai": "^6.7.0",
|
package/src/cli.ts
CHANGED
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
} from "./config.js";
|
|
15
15
|
import {
|
|
16
16
|
convertPdf,
|
|
17
|
+
type ConversionMode,
|
|
17
18
|
type ConvertUsage
|
|
18
19
|
} from "./openaiPdfToMarkdown.js";
|
|
19
20
|
import {
|
|
@@ -23,7 +24,6 @@ import {
|
|
|
23
24
|
looksLikeFileOutput,
|
|
24
25
|
parseConcurrency,
|
|
25
26
|
parseFormat,
|
|
26
|
-
parseMode,
|
|
27
27
|
resolveFolderOutputPath,
|
|
28
28
|
truncate,
|
|
29
29
|
type CliOptions,
|
|
@@ -52,14 +52,13 @@ program
|
|
|
52
52
|
parseConcurrency
|
|
53
53
|
)
|
|
54
54
|
.option("-y, --yes", "Skip confirmation prompt in folder mode")
|
|
55
|
-
.option("--mode <mode>", "Conversion mode: auto or prompt", parseMode, "auto")
|
|
56
55
|
.option("--format <format>", "Output format override: md or txt", parseFormat)
|
|
57
56
|
.option(
|
|
58
57
|
"--instructions <text>",
|
|
59
|
-
"Additional conversion instructions
|
|
58
|
+
"Additional conversion instructions (only when not using --prompt/--prompt-file)"
|
|
60
59
|
)
|
|
61
|
-
.option("--prompt <text>", "Custom prompt text
|
|
62
|
-
.option("--prompt-file <path>", "Path to file containing prompt text
|
|
60
|
+
.option("--prompt <text>", "Custom prompt text (enables prompt mode)")
|
|
61
|
+
.option("--prompt-file <path>", "Path to file containing prompt text (enables prompt mode)")
|
|
63
62
|
.action(async (input: string, options: CliOptions) => {
|
|
64
63
|
const inputPath = resolve(input);
|
|
65
64
|
const startedAt = Date.now();
|
|
@@ -68,13 +67,14 @@ program
|
|
|
68
67
|
validateOptionCombination(options);
|
|
69
68
|
|
|
70
69
|
const promptText = await resolvePromptText(options);
|
|
70
|
+
const conversionMode = resolveConversionMode(promptText);
|
|
71
71
|
const inputKind = await detectInputKind(inputPath);
|
|
72
72
|
let usageTotals: ConvertUsage = emptyUsage();
|
|
73
73
|
|
|
74
74
|
if (inputKind === "file") {
|
|
75
|
-
usageTotals = await processSingleFile(inputPath, options, promptText);
|
|
75
|
+
usageTotals = await processSingleFile(inputPath, options, conversionMode, promptText);
|
|
76
76
|
} else {
|
|
77
|
-
const summary = await processFolder(inputPath, options, promptText);
|
|
77
|
+
const summary = await processFolder(inputPath, options, conversionMode, promptText);
|
|
78
78
|
usageTotals = summary.usage;
|
|
79
79
|
if (!summary.cancelled && summary.failed > 0) {
|
|
80
80
|
process.exitCode = 1;
|
|
@@ -157,6 +157,7 @@ program.parseAsync(process.argv).catch((error: unknown) => {
|
|
|
157
157
|
async function processSingleFile(
|
|
158
158
|
inputPath: string,
|
|
159
159
|
options: CliOptions,
|
|
160
|
+
mode: ConversionMode,
|
|
160
161
|
promptText?: string
|
|
161
162
|
): Promise<ConvertUsage> {
|
|
162
163
|
if (!isPdfPath(inputPath)) {
|
|
@@ -164,20 +165,66 @@ async function processSingleFile(
|
|
|
164
165
|
}
|
|
165
166
|
|
|
166
167
|
await ensureApiKey();
|
|
167
|
-
const
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
168
|
+
const startedAt = Date.now();
|
|
169
|
+
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
170
|
+
const workerDashboard = process.stdout.isTTY
|
|
171
|
+
? new AsciiWorkerDashboard(1, 1)
|
|
172
|
+
: null;
|
|
173
|
+
workerDashboard?.setSummary(0, 0);
|
|
174
|
+
workerDashboard?.setWorkerRunning(0, displayInput);
|
|
175
|
+
|
|
176
|
+
if (!workerDashboard) {
|
|
177
|
+
console.log(`[worker-1] Running ${displayInput}`);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
try {
|
|
181
|
+
const result = await convertPdf({
|
|
182
|
+
inputPath,
|
|
183
|
+
model: options.model,
|
|
184
|
+
mode,
|
|
185
|
+
format: options.format,
|
|
186
|
+
instructions: options.instructions,
|
|
187
|
+
promptText
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
|
|
191
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
192
|
+
await writeFile(outputPath, result.content, "utf8");
|
|
193
|
+
|
|
194
|
+
if (workerDashboard) {
|
|
195
|
+
workerDashboard.setWorkerDone(
|
|
196
|
+
0,
|
|
197
|
+
displayInput,
|
|
198
|
+
`${result.format} in ${formatDurationMs(Date.now() - startedAt)}`
|
|
199
|
+
);
|
|
200
|
+
workerDashboard.setSummary(1, 0);
|
|
201
|
+
} else {
|
|
202
|
+
console.log(
|
|
203
|
+
`[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`
|
|
204
|
+
);
|
|
205
|
+
}
|
|
175
206
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
207
|
+
console.log(`Output (${result.format}) written to: ${outputPath}`);
|
|
208
|
+
return result.usage;
|
|
209
|
+
} catch (error) {
|
|
210
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
211
|
+
if (workerDashboard) {
|
|
212
|
+
workerDashboard.setWorkerFailed(
|
|
213
|
+
0,
|
|
214
|
+
displayInput,
|
|
215
|
+
`${truncate(message, 42)} (${formatDurationMs(Date.now() - startedAt)})`
|
|
216
|
+
);
|
|
217
|
+
workerDashboard.setSummary(1, 1);
|
|
218
|
+
} else {
|
|
219
|
+
console.error(
|
|
220
|
+
`[worker-1] Failed ${displayInput}: ${message} (${formatDurationMs(Date.now() - startedAt)})`
|
|
221
|
+
);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
throw error;
|
|
225
|
+
} finally {
|
|
226
|
+
workerDashboard?.stop();
|
|
227
|
+
}
|
|
181
228
|
}
|
|
182
229
|
|
|
183
230
|
type FolderSummary = {
|
|
@@ -191,6 +238,7 @@ type FolderSummary = {
|
|
|
191
238
|
async function processFolder(
|
|
192
239
|
inputDir: string,
|
|
193
240
|
options: CliOptions,
|
|
241
|
+
mode: ConversionMode,
|
|
194
242
|
promptText?: string
|
|
195
243
|
): Promise<FolderSummary> {
|
|
196
244
|
if (options.output && looksLikeFileOutput(options.output)) {
|
|
@@ -236,7 +284,7 @@ async function processFolder(
|
|
|
236
284
|
const result = await convertPdf({
|
|
237
285
|
inputPath: filePath,
|
|
238
286
|
model: options.model,
|
|
239
|
-
mode
|
|
287
|
+
mode,
|
|
240
288
|
format: options.format,
|
|
241
289
|
instructions: options.instructions,
|
|
242
290
|
promptText
|
|
@@ -301,10 +349,6 @@ async function processFolder(
|
|
|
301
349
|
}
|
|
302
350
|
|
|
303
351
|
async function resolvePromptText(options: CliOptions): Promise<string | undefined> {
|
|
304
|
-
if (options.mode !== "prompt") {
|
|
305
|
-
return undefined;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
352
|
if (options.prompt) {
|
|
309
353
|
const prompt = options.prompt.trim();
|
|
310
354
|
if (!prompt) {
|
|
@@ -327,6 +371,10 @@ async function resolvePromptText(options: CliOptions): Promise<string | undefine
|
|
|
327
371
|
return promptFromFile;
|
|
328
372
|
}
|
|
329
373
|
|
|
374
|
+
function resolveConversionMode(promptText: string | undefined): ConversionMode {
|
|
375
|
+
return promptText ? "prompt" : "auto";
|
|
376
|
+
}
|
|
377
|
+
|
|
330
378
|
async function handleConfigInit(options: ConfigInitOptions): Promise<void> {
|
|
331
379
|
const existingKey = await getStoredApiKey();
|
|
332
380
|
if (existingKey && !options.force) {
|
package/src/cliHelpers.ts
CHANGED
|
@@ -1,27 +1,18 @@
|
|
|
1
1
|
import { InvalidArgumentError } from "commander";
|
|
2
2
|
import { basename, dirname, extname, join, relative } from "node:path";
|
|
3
|
-
import { type
|
|
3
|
+
import { type OutputFormat } from "./openaiPdfToMarkdown.js";
|
|
4
4
|
|
|
5
5
|
export type CliOptions = {
|
|
6
6
|
output?: string;
|
|
7
7
|
model: string;
|
|
8
8
|
concurrency?: number;
|
|
9
9
|
yes?: boolean;
|
|
10
|
-
mode: ConversionMode;
|
|
11
10
|
format?: OutputFormat;
|
|
12
11
|
instructions?: string;
|
|
13
12
|
prompt?: string;
|
|
14
13
|
promptFile?: string;
|
|
15
14
|
};
|
|
16
15
|
|
|
17
|
-
export function parseMode(value: string): ConversionMode {
|
|
18
|
-
if (value === "auto" || value === "prompt") {
|
|
19
|
-
return value;
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
throw new InvalidArgumentError("Mode must be either 'auto' or 'prompt'.");
|
|
23
|
-
}
|
|
24
|
-
|
|
25
16
|
export function parseFormat(value: string): OutputFormat {
|
|
26
17
|
if (value === "md" || value === "txt") {
|
|
27
18
|
return value;
|
|
@@ -40,21 +31,13 @@ export function parseConcurrency(value: string): number {
|
|
|
40
31
|
}
|
|
41
32
|
|
|
42
33
|
export function validateOptionCombination(options: CliOptions): void {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
throw new Error("Prompt mode requires exactly one of --prompt or --prompt-file.");
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
if (options.instructions) {
|
|
50
|
-
throw new Error("--instructions is only supported in auto mode.");
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
return;
|
|
34
|
+
const promptSourceCount = Number(Boolean(options.prompt)) + Number(Boolean(options.promptFile));
|
|
35
|
+
if (promptSourceCount > 1) {
|
|
36
|
+
throw new Error("Use exactly one of --prompt or --prompt-file.");
|
|
54
37
|
}
|
|
55
38
|
|
|
56
|
-
if (
|
|
57
|
-
throw new Error("--
|
|
39
|
+
if (promptSourceCount === 1 && options.instructions) {
|
|
40
|
+
throw new Error("--instructions cannot be combined with --prompt or --prompt-file.");
|
|
58
41
|
}
|
|
59
42
|
}
|
|
60
43
|
|
|
@@ -35,6 +35,10 @@ const AUTO_RESPONSE_SCHEMA = z.object({
|
|
|
35
35
|
content: z.string().min(1)
|
|
36
36
|
});
|
|
37
37
|
|
|
38
|
+
const RATE_LIMIT_MAX_RETRIES = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_RETRIES", 8);
|
|
39
|
+
const RATE_LIMIT_BASE_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_BASE_DELAY_MS", 2_000);
|
|
40
|
+
const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELAY_MS", 120_000);
|
|
41
|
+
|
|
38
42
|
export async function convertPdf(options: ConvertOptions): Promise<ConvertResult> {
|
|
39
43
|
const inputPath = resolve(options.inputPath);
|
|
40
44
|
await access(inputPath);
|
|
@@ -46,10 +50,12 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
46
50
|
|
|
47
51
|
const client = new OpenAI({ apiKey });
|
|
48
52
|
|
|
49
|
-
const uploaded = await
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
const uploaded = await withRateLimitRetry("file upload", () =>
|
|
54
|
+
client.files.create({
|
|
55
|
+
file: createReadStream(inputPath),
|
|
56
|
+
purpose: "user_data"
|
|
57
|
+
})
|
|
58
|
+
);
|
|
53
59
|
|
|
54
60
|
const agent = new Agent({
|
|
55
61
|
name: "PDF Converter",
|
|
@@ -58,21 +64,23 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
58
64
|
});
|
|
59
65
|
|
|
60
66
|
const promptText = buildPromptText(options);
|
|
61
|
-
const result = await run
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
67
|
+
const result = await withRateLimitRetry("model run", () =>
|
|
68
|
+
run(agent, [
|
|
69
|
+
{
|
|
70
|
+
role: "user",
|
|
71
|
+
content: [
|
|
72
|
+
{
|
|
73
|
+
type: "input_text",
|
|
74
|
+
text: promptText
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
type: "input_file",
|
|
78
|
+
file: { id: uploaded.id }
|
|
79
|
+
}
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
])
|
|
83
|
+
);
|
|
76
84
|
|
|
77
85
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
78
86
|
if (!rawOutput) {
|
|
@@ -201,3 +209,249 @@ function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
|
|
|
201
209
|
|
|
202
210
|
return { format: validated.data.format, content };
|
|
203
211
|
}
|
|
212
|
+
|
|
213
|
+
async function withRateLimitRetry<T>(operationName: string, operation: () => Promise<T>): Promise<T> {
|
|
214
|
+
let attempt = 0;
|
|
215
|
+
while (true) {
|
|
216
|
+
try {
|
|
217
|
+
return await operation();
|
|
218
|
+
} catch (error) {
|
|
219
|
+
if (!isRetriableRateLimitError(error) || attempt >= RATE_LIMIT_MAX_RETRIES) {
|
|
220
|
+
throw error;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const retryAfterMs = getRetryAfterMs(error);
|
|
224
|
+
const exponentialBackoffMs = RATE_LIMIT_BASE_DELAY_MS * (2 ** attempt);
|
|
225
|
+
const jitterMs = Math.floor(Math.random() * 750);
|
|
226
|
+
const computedDelayMs = retryAfterMs ?? (exponentialBackoffMs + jitterMs);
|
|
227
|
+
const waitMs = clampDelayMs(computedDelayMs, RATE_LIMIT_MAX_DELAY_MS);
|
|
228
|
+
const nextAttempt = attempt + 2;
|
|
229
|
+
const totalAttempts = RATE_LIMIT_MAX_RETRIES + 1;
|
|
230
|
+
const reason = extractErrorMessage(error);
|
|
231
|
+
|
|
232
|
+
console.warn(
|
|
233
|
+
`[retry] ${operationName} hit OpenAI rate limits. Waiting ${formatDelay(waitMs)} before retry ${nextAttempt}/${totalAttempts}. ${reason}`
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
await sleep(waitMs);
|
|
237
|
+
attempt += 1;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function isRetriableRateLimitError(error: unknown): boolean {
|
|
243
|
+
if (typeof error !== "object" || error === null) {
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const candidate = error as {
|
|
248
|
+
status?: unknown;
|
|
249
|
+
code?: unknown;
|
|
250
|
+
type?: unknown;
|
|
251
|
+
error?: { code?: unknown; type?: unknown; message?: unknown };
|
|
252
|
+
message?: unknown;
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
if (candidate.status === 429) {
|
|
256
|
+
const code = typeof candidate.code === "string" ? candidate.code : undefined;
|
|
257
|
+
const nestedCode = typeof candidate.error?.code === "string" ? candidate.error.code : undefined;
|
|
258
|
+
if (code === "insufficient_quota" || nestedCode === "insufficient_quota") {
|
|
259
|
+
return false;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return true;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const searchableText = [
|
|
266
|
+
toLowerCaseIfString(candidate.code),
|
|
267
|
+
toLowerCaseIfString(candidate.type),
|
|
268
|
+
toLowerCaseIfString(candidate.error?.code),
|
|
269
|
+
toLowerCaseIfString(candidate.error?.type),
|
|
270
|
+
toLowerCaseIfString(candidate.message),
|
|
271
|
+
toLowerCaseIfString(candidate.error?.message)
|
|
272
|
+
]
|
|
273
|
+
.filter(Boolean)
|
|
274
|
+
.join(" ");
|
|
275
|
+
|
|
276
|
+
if (searchableText.includes("insufficient_quota")) {
|
|
277
|
+
return false;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return (
|
|
281
|
+
searchableText.includes("rate_limit") ||
|
|
282
|
+
searchableText.includes("rate limit") ||
|
|
283
|
+
searchableText.includes("too many requests")
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function getRetryAfterMs(error: unknown): number | undefined {
|
|
288
|
+
const headerDelay = getRetryAfterMsFromHeaders(error);
|
|
289
|
+
if (typeof headerDelay === "number" && Number.isFinite(headerDelay) && headerDelay >= 0) {
|
|
290
|
+
return headerDelay;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
const textDelay = getRetryAfterMsFromText(extractErrorMessage(error));
|
|
294
|
+
if (typeof textDelay === "number" && Number.isFinite(textDelay) && textDelay >= 0) {
|
|
295
|
+
return textDelay;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return undefined;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function getRetryAfterMsFromHeaders(error: unknown): number | undefined {
|
|
302
|
+
if (typeof error !== "object" || error === null) {
|
|
303
|
+
return undefined;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const candidate = error as {
|
|
307
|
+
headers?: unknown;
|
|
308
|
+
response?: { headers?: unknown };
|
|
309
|
+
};
|
|
310
|
+
|
|
311
|
+
const retryAfterMsHeader = readHeader(candidate.headers, "retry-after-ms")
|
|
312
|
+
?? readHeader(candidate.response?.headers, "retry-after-ms");
|
|
313
|
+
if (retryAfterMsHeader) {
|
|
314
|
+
const milliseconds = Number.parseInt(retryAfterMsHeader, 10);
|
|
315
|
+
if (Number.isFinite(milliseconds) && milliseconds >= 0) {
|
|
316
|
+
return milliseconds;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const retryAfterHeader = readHeader(candidate.headers, "retry-after")
|
|
321
|
+
?? readHeader(candidate.response?.headers, "retry-after");
|
|
322
|
+
if (!retryAfterHeader) {
|
|
323
|
+
return undefined;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const seconds = Number.parseFloat(retryAfterHeader);
|
|
327
|
+
if (Number.isFinite(seconds)) {
|
|
328
|
+
return Math.max(0, Math.round(seconds * 1_000));
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const parsedDate = Date.parse(retryAfterHeader);
|
|
332
|
+
if (Number.isFinite(parsedDate)) {
|
|
333
|
+
return Math.max(0, parsedDate - Date.now());
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
return undefined;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
function getRetryAfterMsFromText(message: string): number | undefined {
|
|
340
|
+
const match = message.match(
|
|
341
|
+
/(?:try again in|retry after)\s*([0-9]+(?:\.[0-9]+)?)\s*(ms|msec|millisecond|milliseconds|s|sec|second|seconds|m|min|minute|minutes)?/i
|
|
342
|
+
);
|
|
343
|
+
if (!match) {
|
|
344
|
+
return undefined;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const rawValue = Number.parseFloat(match[1] ?? "");
|
|
348
|
+
if (!Number.isFinite(rawValue) || rawValue < 0) {
|
|
349
|
+
return undefined;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const unit = (match[2] ?? "s").toLowerCase();
|
|
353
|
+
if (unit === "ms" || unit === "msec" || unit === "millisecond" || unit === "milliseconds") {
|
|
354
|
+
return Math.round(rawValue);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (unit === "m" || unit === "min" || unit === "minute" || unit === "minutes") {
|
|
358
|
+
return Math.round(rawValue * 60_000);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return Math.round(rawValue * 1_000);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function readHeader(headersLike: unknown, headerName: string): string | undefined {
|
|
365
|
+
if (!headersLike) {
|
|
366
|
+
return undefined;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (
|
|
370
|
+
typeof headersLike === "object"
|
|
371
|
+
&& "get" in headersLike
|
|
372
|
+
&& typeof (headersLike as { get?: unknown }).get === "function"
|
|
373
|
+
) {
|
|
374
|
+
const value = (headersLike as { get: (name: string) => string | null }).get(headerName);
|
|
375
|
+
return value ?? undefined;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
if (typeof headersLike !== "object") {
|
|
379
|
+
return undefined;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const headersRecord = headersLike as Record<string, unknown>;
|
|
383
|
+
const lowerTarget = headerName.toLowerCase();
|
|
384
|
+
for (const [key, value] of Object.entries(headersRecord)) {
|
|
385
|
+
if (key.toLowerCase() !== lowerTarget) {
|
|
386
|
+
continue;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (typeof value === "string") {
|
|
390
|
+
return value;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (Array.isArray(value)) {
|
|
394
|
+
const first = value.find((entry) => typeof entry === "string");
|
|
395
|
+
return typeof first === "string" ? first : undefined;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
return undefined;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
function parsePositiveIntEnv(name: string, fallback: number): number {
|
|
403
|
+
const raw = process.env[name];
|
|
404
|
+
if (!raw) {
|
|
405
|
+
return fallback;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
const parsed = Number.parseInt(raw, 10);
|
|
409
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
410
|
+
return fallback;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
return parsed;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function clampDelayMs(value: number, max: number): number {
|
|
417
|
+
return Math.max(250, Math.min(Math.round(value), max));
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function formatDelay(milliseconds: number): string {
|
|
421
|
+
if (milliseconds < 1_000) {
|
|
422
|
+
return `${milliseconds}ms`;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
const seconds = milliseconds / 1_000;
|
|
426
|
+
return `${seconds.toFixed(seconds >= 10 ? 0 : 1)}s`;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function extractErrorMessage(error: unknown): string {
|
|
430
|
+
if (error instanceof Error && error.message.trim().length > 0) {
|
|
431
|
+
return error.message;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
if (typeof error === "object" && error !== null) {
|
|
435
|
+
const message = (error as { message?: unknown; error?: { message?: unknown } }).message;
|
|
436
|
+
if (typeof message === "string" && message.trim().length > 0) {
|
|
437
|
+
return message;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const nestedMessage = (error as { error?: { message?: unknown } }).error?.message;
|
|
441
|
+
if (typeof nestedMessage === "string" && nestedMessage.trim().length > 0) {
|
|
442
|
+
return nestedMessage;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
return String(error);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
function toLowerCaseIfString(value: unknown): string {
|
|
450
|
+
return typeof value === "string" ? value.toLowerCase() : "";
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
function sleep(milliseconds: number): Promise<void> {
|
|
454
|
+
return new Promise((resolveSleep) => {
|
|
455
|
+
setTimeout(resolveSleep, milliseconds);
|
|
456
|
+
});
|
|
457
|
+
}
|
package/test/cliHelpers.test.ts
CHANGED
|
@@ -8,22 +8,12 @@ import {
|
|
|
8
8
|
looksLikeFileOutput,
|
|
9
9
|
parseConcurrency,
|
|
10
10
|
parseFormat,
|
|
11
|
-
parseMode,
|
|
12
11
|
resolveFolderOutputPath,
|
|
13
12
|
truncate,
|
|
14
13
|
validateOptionCombination,
|
|
15
14
|
type CliOptions
|
|
16
15
|
} from "../src/cliHelpers.js";
|
|
17
16
|
|
|
18
|
-
test("parseMode accepts valid values", () => {
|
|
19
|
-
assert.equal(parseMode("auto"), "auto");
|
|
20
|
-
assert.equal(parseMode("prompt"), "prompt");
|
|
21
|
-
});
|
|
22
|
-
|
|
23
|
-
test("parseMode rejects invalid values", () => {
|
|
24
|
-
assert.throws(() => parseMode("invalid"), InvalidArgumentError);
|
|
25
|
-
});
|
|
26
|
-
|
|
27
17
|
test("parseFormat accepts valid values", () => {
|
|
28
18
|
assert.equal(parseFormat("md"), "md");
|
|
29
19
|
assert.equal(parseFormat("txt"), "txt");
|
|
@@ -45,42 +35,40 @@ test("parseConcurrency rejects invalid values", () => {
|
|
|
45
35
|
assert.throws(() => parseConcurrency("abc"), InvalidArgumentError);
|
|
46
36
|
});
|
|
47
37
|
|
|
48
|
-
test("validateOptionCombination
|
|
38
|
+
test("validateOptionCombination allows default auto behavior without prompt flags", () => {
|
|
49
39
|
const base: CliOptions = {
|
|
50
|
-
model: "gpt-4o-mini"
|
|
51
|
-
mode: "prompt"
|
|
40
|
+
model: "gpt-4o-mini"
|
|
52
41
|
};
|
|
53
42
|
|
|
54
|
-
assert.
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
43
|
+
assert.doesNotThrow(() => validateOptionCombination(base));
|
|
44
|
+
assert.doesNotThrow(() => validateOptionCombination({ ...base, instructions: "Extra formatting rules" }));
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test("validateOptionCombination treats --prompt and --prompt-file as mutually exclusive", () => {
|
|
48
|
+
const base: CliOptions = {
|
|
49
|
+
model: "gpt-4o-mini"
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
assert.doesNotThrow(() => validateOptionCombination({ ...base, prompt: "Convert" }));
|
|
59
53
|
assert.doesNotThrow(() => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }));
|
|
60
54
|
assert.throws(
|
|
61
55
|
() => validateOptionCombination({ ...base, prompt: "x", promptFile: "./prompt.txt" }),
|
|
62
|
-
/
|
|
63
|
-
);
|
|
64
|
-
assert.throws(
|
|
65
|
-
() => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
|
|
66
|
-
/--instructions is only supported in auto mode\./
|
|
56
|
+
/Use exactly one of --prompt or --prompt-file\./
|
|
67
57
|
);
|
|
68
58
|
});
|
|
69
59
|
|
|
70
|
-
test("validateOptionCombination rejects prompt flags
|
|
60
|
+
test("validateOptionCombination rejects --instructions with prompt flags", () => {
|
|
71
61
|
const base: CliOptions = {
|
|
72
|
-
model: "gpt-4o-mini"
|
|
73
|
-
mode: "auto"
|
|
62
|
+
model: "gpt-4o-mini"
|
|
74
63
|
};
|
|
75
64
|
|
|
76
|
-
assert.doesNotThrow(() => validateOptionCombination(base));
|
|
77
65
|
assert.throws(
|
|
78
|
-
() => validateOptionCombination({ ...base, prompt: "
|
|
79
|
-
/--
|
|
66
|
+
() => validateOptionCombination({ ...base, prompt: "x", instructions: "Extra" }),
|
|
67
|
+
/--instructions cannot be combined with --prompt or --prompt-file\./
|
|
80
68
|
);
|
|
81
69
|
assert.throws(
|
|
82
|
-
() => validateOptionCombination({ ...base, promptFile: "./prompt.txt" }),
|
|
83
|
-
/--
|
|
70
|
+
() => validateOptionCombination({ ...base, promptFile: "./prompt.txt", instructions: "Extra" }),
|
|
71
|
+
/--instructions cannot be combined with --prompt or --prompt-file\./
|
|
84
72
|
);
|
|
85
73
|
});
|
|
86
74
|
|