@robin7331/papyrus-cli 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/cli.js +46 -13
- package/dist/openaiPdfToMarkdown.js +192 -4
- package/package.json +2 -1
- package/src/cli.ts +59 -13
- package/src/openaiPdfToMarkdown.ts +273 -19
package/README.md
CHANGED
|
@@ -207,8 +207,14 @@ papyrus ./docs --yes
|
|
|
207
207
|
## Notes
|
|
208
208
|
|
|
209
209
|
- In `auto` mode without `--format`, the model returns structured JSON with `format` + `content`.
|
|
210
|
+
- Single-file input now also shows a live worker lane (spinner in TTY) while conversion is running.
|
|
210
211
|
- Folder input is scanned recursively for `.pdf` files and processed in parallel.
|
|
211
212
|
- In folder mode, `--output` must be a directory path and mirrored subfolders are preserved.
|
|
213
|
+
- OpenAI rate-limit (`429`) responses are retried automatically using `Retry-After` (when present) plus exponential backoff.
|
|
214
|
+
- Rate-limit retry tuning is available via environment variables:
|
|
215
|
+
- `PAPYRUS_RATE_LIMIT_MAX_RETRIES` (default `8`)
|
|
216
|
+
- `PAPYRUS_RATE_LIMIT_BASE_DELAY_MS` (default `2000`)
|
|
217
|
+
- `PAPYRUS_RATE_LIMIT_MAX_DELAY_MS` (default `120000`)
|
|
212
218
|
- For scanned PDFs, output quality depends on OCR quality from the model.
|
|
213
219
|
|
|
214
220
|
## Development
|
package/dist/cli.js
CHANGED
|
@@ -117,19 +117,52 @@ async function processSingleFile(inputPath, options, promptText) {
|
|
|
117
117
|
throw new Error("Input file must have a .pdf extension.");
|
|
118
118
|
}
|
|
119
119
|
await ensureApiKey();
|
|
120
|
-
const
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
120
|
+
const startedAt = Date.now();
|
|
121
|
+
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
122
|
+
const workerDashboard = process.stdout.isTTY
|
|
123
|
+
? new AsciiWorkerDashboard(1, 1)
|
|
124
|
+
: null;
|
|
125
|
+
workerDashboard?.setSummary(0, 0);
|
|
126
|
+
workerDashboard?.setWorkerRunning(0, displayInput);
|
|
127
|
+
if (!workerDashboard) {
|
|
128
|
+
console.log(`[worker-1] Running ${displayInput}`);
|
|
129
|
+
}
|
|
130
|
+
try {
|
|
131
|
+
const result = await convertPdf({
|
|
132
|
+
inputPath,
|
|
133
|
+
model: options.model,
|
|
134
|
+
mode: options.mode,
|
|
135
|
+
format: options.format,
|
|
136
|
+
instructions: options.instructions,
|
|
137
|
+
promptText
|
|
138
|
+
});
|
|
139
|
+
const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
|
|
140
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
141
|
+
await writeFile(outputPath, result.content, "utf8");
|
|
142
|
+
if (workerDashboard) {
|
|
143
|
+
workerDashboard.setWorkerDone(0, displayInput, `${result.format} in ${formatDurationMs(Date.now() - startedAt)}`);
|
|
144
|
+
workerDashboard.setSummary(1, 0);
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
console.log(`[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`);
|
|
148
|
+
}
|
|
149
|
+
console.log(`Output (${result.format}) written to: ${outputPath}`);
|
|
150
|
+
return result.usage;
|
|
151
|
+
}
|
|
152
|
+
catch (error) {
|
|
153
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
154
|
+
if (workerDashboard) {
|
|
155
|
+
workerDashboard.setWorkerFailed(0, displayInput, `${truncate(message, 42)} (${formatDurationMs(Date.now() - startedAt)})`);
|
|
156
|
+
workerDashboard.setSummary(1, 1);
|
|
157
|
+
}
|
|
158
|
+
else {
|
|
159
|
+
console.error(`[worker-1] Failed ${displayInput}: ${message} (${formatDurationMs(Date.now() - startedAt)})`);
|
|
160
|
+
}
|
|
161
|
+
throw error;
|
|
162
|
+
}
|
|
163
|
+
finally {
|
|
164
|
+
workerDashboard?.stop();
|
|
165
|
+
}
|
|
133
166
|
}
|
|
134
167
|
async function processFolder(inputDir, options, promptText) {
|
|
135
168
|
if (options.output && looksLikeFileOutput(options.output)) {
|
|
@@ -8,6 +8,9 @@ const AUTO_RESPONSE_SCHEMA = z.object({
|
|
|
8
8
|
format: z.enum(["md", "txt"]),
|
|
9
9
|
content: z.string().min(1)
|
|
10
10
|
});
|
|
11
|
+
const RATE_LIMIT_MAX_RETRIES = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_RETRIES", 8);
|
|
12
|
+
const RATE_LIMIT_BASE_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_BASE_DELAY_MS", 2_000);
|
|
13
|
+
const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELAY_MS", 120_000);
|
|
11
14
|
export async function convertPdf(options) {
|
|
12
15
|
const inputPath = resolve(options.inputPath);
|
|
13
16
|
await access(inputPath);
|
|
@@ -16,17 +19,17 @@ export async function convertPdf(options) {
|
|
|
16
19
|
throw new Error("OPENAI_API_KEY is not set.");
|
|
17
20
|
}
|
|
18
21
|
const client = new OpenAI({ apiKey });
|
|
19
|
-
const uploaded = await client.files.create({
|
|
22
|
+
const uploaded = await withRateLimitRetry("file upload", () => client.files.create({
|
|
20
23
|
file: createReadStream(inputPath),
|
|
21
24
|
purpose: "user_data"
|
|
22
|
-
});
|
|
25
|
+
}));
|
|
23
26
|
const agent = new Agent({
|
|
24
27
|
name: "PDF Converter",
|
|
25
28
|
instructions: "You convert PDF files precisely according to the requested output format.",
|
|
26
29
|
model: options.model
|
|
27
30
|
});
|
|
28
31
|
const promptText = buildPromptText(options);
|
|
29
|
-
const result = await run(agent, [
|
|
32
|
+
const result = await withRateLimitRetry("model run", () => run(agent, [
|
|
30
33
|
{
|
|
31
34
|
role: "user",
|
|
32
35
|
content: [
|
|
@@ -40,7 +43,7 @@ export async function convertPdf(options) {
|
|
|
40
43
|
}
|
|
41
44
|
]
|
|
42
45
|
}
|
|
43
|
-
]);
|
|
46
|
+
]));
|
|
44
47
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
45
48
|
if (!rawOutput) {
|
|
46
49
|
throw new Error("No content returned by the API.");
|
|
@@ -142,3 +145,188 @@ function parseAutoResponse(rawOutput) {
|
|
|
142
145
|
}
|
|
143
146
|
return { format: validated.data.format, content };
|
|
144
147
|
}
|
|
148
|
+
async function withRateLimitRetry(operationName, operation) {
|
|
149
|
+
let attempt = 0;
|
|
150
|
+
while (true) {
|
|
151
|
+
try {
|
|
152
|
+
return await operation();
|
|
153
|
+
}
|
|
154
|
+
catch (error) {
|
|
155
|
+
if (!isRetriableRateLimitError(error) || attempt >= RATE_LIMIT_MAX_RETRIES) {
|
|
156
|
+
throw error;
|
|
157
|
+
}
|
|
158
|
+
const retryAfterMs = getRetryAfterMs(error);
|
|
159
|
+
const exponentialBackoffMs = RATE_LIMIT_BASE_DELAY_MS * (2 ** attempt);
|
|
160
|
+
const jitterMs = Math.floor(Math.random() * 750);
|
|
161
|
+
const computedDelayMs = retryAfterMs ?? (exponentialBackoffMs + jitterMs);
|
|
162
|
+
const waitMs = clampDelayMs(computedDelayMs, RATE_LIMIT_MAX_DELAY_MS);
|
|
163
|
+
const nextAttempt = attempt + 2;
|
|
164
|
+
const totalAttempts = RATE_LIMIT_MAX_RETRIES + 1;
|
|
165
|
+
const reason = extractErrorMessage(error);
|
|
166
|
+
console.warn(`[retry] ${operationName} hit OpenAI rate limits. Waiting ${formatDelay(waitMs)} before retry ${nextAttempt}/${totalAttempts}. ${reason}`);
|
|
167
|
+
await sleep(waitMs);
|
|
168
|
+
attempt += 1;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
function isRetriableRateLimitError(error) {
|
|
173
|
+
if (typeof error !== "object" || error === null) {
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
const candidate = error;
|
|
177
|
+
if (candidate.status === 429) {
|
|
178
|
+
const code = typeof candidate.code === "string" ? candidate.code : undefined;
|
|
179
|
+
const nestedCode = typeof candidate.error?.code === "string" ? candidate.error.code : undefined;
|
|
180
|
+
if (code === "insufficient_quota" || nestedCode === "insufficient_quota") {
|
|
181
|
+
return false;
|
|
182
|
+
}
|
|
183
|
+
return true;
|
|
184
|
+
}
|
|
185
|
+
const searchableText = [
|
|
186
|
+
toLowerCaseIfString(candidate.code),
|
|
187
|
+
toLowerCaseIfString(candidate.type),
|
|
188
|
+
toLowerCaseIfString(candidate.error?.code),
|
|
189
|
+
toLowerCaseIfString(candidate.error?.type),
|
|
190
|
+
toLowerCaseIfString(candidate.message),
|
|
191
|
+
toLowerCaseIfString(candidate.error?.message)
|
|
192
|
+
]
|
|
193
|
+
.filter(Boolean)
|
|
194
|
+
.join(" ");
|
|
195
|
+
if (searchableText.includes("insufficient_quota")) {
|
|
196
|
+
return false;
|
|
197
|
+
}
|
|
198
|
+
return (searchableText.includes("rate_limit") ||
|
|
199
|
+
searchableText.includes("rate limit") ||
|
|
200
|
+
searchableText.includes("too many requests"));
|
|
201
|
+
}
|
|
202
|
+
function getRetryAfterMs(error) {
|
|
203
|
+
const headerDelay = getRetryAfterMsFromHeaders(error);
|
|
204
|
+
if (typeof headerDelay === "number" && Number.isFinite(headerDelay) && headerDelay >= 0) {
|
|
205
|
+
return headerDelay;
|
|
206
|
+
}
|
|
207
|
+
const textDelay = getRetryAfterMsFromText(extractErrorMessage(error));
|
|
208
|
+
if (typeof textDelay === "number" && Number.isFinite(textDelay) && textDelay >= 0) {
|
|
209
|
+
return textDelay;
|
|
210
|
+
}
|
|
211
|
+
return undefined;
|
|
212
|
+
}
|
|
213
|
+
function getRetryAfterMsFromHeaders(error) {
|
|
214
|
+
if (typeof error !== "object" || error === null) {
|
|
215
|
+
return undefined;
|
|
216
|
+
}
|
|
217
|
+
const candidate = error;
|
|
218
|
+
const retryAfterMsHeader = readHeader(candidate.headers, "retry-after-ms")
|
|
219
|
+
?? readHeader(candidate.response?.headers, "retry-after-ms");
|
|
220
|
+
if (retryAfterMsHeader) {
|
|
221
|
+
const milliseconds = Number.parseInt(retryAfterMsHeader, 10);
|
|
222
|
+
if (Number.isFinite(milliseconds) && milliseconds >= 0) {
|
|
223
|
+
return milliseconds;
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
const retryAfterHeader = readHeader(candidate.headers, "retry-after")
|
|
227
|
+
?? readHeader(candidate.response?.headers, "retry-after");
|
|
228
|
+
if (!retryAfterHeader) {
|
|
229
|
+
return undefined;
|
|
230
|
+
}
|
|
231
|
+
const seconds = Number.parseFloat(retryAfterHeader);
|
|
232
|
+
if (Number.isFinite(seconds)) {
|
|
233
|
+
return Math.max(0, Math.round(seconds * 1_000));
|
|
234
|
+
}
|
|
235
|
+
const parsedDate = Date.parse(retryAfterHeader);
|
|
236
|
+
if (Number.isFinite(parsedDate)) {
|
|
237
|
+
return Math.max(0, parsedDate - Date.now());
|
|
238
|
+
}
|
|
239
|
+
return undefined;
|
|
240
|
+
}
|
|
241
|
+
function getRetryAfterMsFromText(message) {
|
|
242
|
+
const match = message.match(/(?:try again in|retry after)\s*([0-9]+(?:\.[0-9]+)?)\s*(ms|msec|millisecond|milliseconds|s|sec|second|seconds|m|min|minute|minutes)?/i);
|
|
243
|
+
if (!match) {
|
|
244
|
+
return undefined;
|
|
245
|
+
}
|
|
246
|
+
const rawValue = Number.parseFloat(match[1] ?? "");
|
|
247
|
+
if (!Number.isFinite(rawValue) || rawValue < 0) {
|
|
248
|
+
return undefined;
|
|
249
|
+
}
|
|
250
|
+
const unit = (match[2] ?? "s").toLowerCase();
|
|
251
|
+
if (unit === "ms" || unit === "msec" || unit === "millisecond" || unit === "milliseconds") {
|
|
252
|
+
return Math.round(rawValue);
|
|
253
|
+
}
|
|
254
|
+
if (unit === "m" || unit === "min" || unit === "minute" || unit === "minutes") {
|
|
255
|
+
return Math.round(rawValue * 60_000);
|
|
256
|
+
}
|
|
257
|
+
return Math.round(rawValue * 1_000);
|
|
258
|
+
}
|
|
259
|
+
function readHeader(headersLike, headerName) {
|
|
260
|
+
if (!headersLike) {
|
|
261
|
+
return undefined;
|
|
262
|
+
}
|
|
263
|
+
if (typeof headersLike === "object"
|
|
264
|
+
&& "get" in headersLike
|
|
265
|
+
&& typeof headersLike.get === "function") {
|
|
266
|
+
const value = headersLike.get(headerName);
|
|
267
|
+
return value ?? undefined;
|
|
268
|
+
}
|
|
269
|
+
if (typeof headersLike !== "object") {
|
|
270
|
+
return undefined;
|
|
271
|
+
}
|
|
272
|
+
const headersRecord = headersLike;
|
|
273
|
+
const lowerTarget = headerName.toLowerCase();
|
|
274
|
+
for (const [key, value] of Object.entries(headersRecord)) {
|
|
275
|
+
if (key.toLowerCase() !== lowerTarget) {
|
|
276
|
+
continue;
|
|
277
|
+
}
|
|
278
|
+
if (typeof value === "string") {
|
|
279
|
+
return value;
|
|
280
|
+
}
|
|
281
|
+
if (Array.isArray(value)) {
|
|
282
|
+
const first = value.find((entry) => typeof entry === "string");
|
|
283
|
+
return typeof first === "string" ? first : undefined;
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
return undefined;
|
|
287
|
+
}
|
|
288
|
+
function parsePositiveIntEnv(name, fallback) {
|
|
289
|
+
const raw = process.env[name];
|
|
290
|
+
if (!raw) {
|
|
291
|
+
return fallback;
|
|
292
|
+
}
|
|
293
|
+
const parsed = Number.parseInt(raw, 10);
|
|
294
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
295
|
+
return fallback;
|
|
296
|
+
}
|
|
297
|
+
return parsed;
|
|
298
|
+
}
|
|
299
|
+
function clampDelayMs(value, max) {
|
|
300
|
+
return Math.max(250, Math.min(Math.round(value), max));
|
|
301
|
+
}
|
|
302
|
+
function formatDelay(milliseconds) {
|
|
303
|
+
if (milliseconds < 1_000) {
|
|
304
|
+
return `${milliseconds}ms`;
|
|
305
|
+
}
|
|
306
|
+
const seconds = milliseconds / 1_000;
|
|
307
|
+
return `${seconds.toFixed(seconds >= 10 ? 0 : 1)}s`;
|
|
308
|
+
}
|
|
309
|
+
function extractErrorMessage(error) {
|
|
310
|
+
if (error instanceof Error && error.message.trim().length > 0) {
|
|
311
|
+
return error.message;
|
|
312
|
+
}
|
|
313
|
+
if (typeof error === "object" && error !== null) {
|
|
314
|
+
const message = error.message;
|
|
315
|
+
if (typeof message === "string" && message.trim().length > 0) {
|
|
316
|
+
return message;
|
|
317
|
+
}
|
|
318
|
+
const nestedMessage = error.error?.message;
|
|
319
|
+
if (typeof nestedMessage === "string" && nestedMessage.trim().length > 0) {
|
|
320
|
+
return nestedMessage;
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
return String(error);
|
|
324
|
+
}
|
|
325
|
+
function toLowerCaseIfString(value) {
|
|
326
|
+
return typeof value === "string" ? value.toLowerCase() : "";
|
|
327
|
+
}
|
|
328
|
+
function sleep(milliseconds) {
|
|
329
|
+
return new Promise((resolveSleep) => {
|
|
330
|
+
setTimeout(resolveSleep, milliseconds);
|
|
331
|
+
});
|
|
332
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@robin7331/papyrus-cli",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"private": false,
|
|
5
5
|
"description": "Convert PDF to markdown or text with the OpenAI Agents SDK",
|
|
6
6
|
"repository": {
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
},
|
|
38
38
|
"dependencies": {
|
|
39
39
|
"@openai/agents": "^0.5.3",
|
|
40
|
+
"@robin7331/papyrus-cli": "^0.1.4",
|
|
40
41
|
"commander": "^14.0.0",
|
|
41
42
|
"dotenv": "^17.3.1",
|
|
42
43
|
"openai": "^6.7.0",
|
package/src/cli.ts
CHANGED
|
@@ -164,20 +164,66 @@ async function processSingleFile(
|
|
|
164
164
|
}
|
|
165
165
|
|
|
166
166
|
await ensureApiKey();
|
|
167
|
-
const
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
167
|
+
const startedAt = Date.now();
|
|
168
|
+
const displayInput = relative(process.cwd(), inputPath) || inputPath;
|
|
169
|
+
const workerDashboard = process.stdout.isTTY
|
|
170
|
+
? new AsciiWorkerDashboard(1, 1)
|
|
171
|
+
: null;
|
|
172
|
+
workerDashboard?.setSummary(0, 0);
|
|
173
|
+
workerDashboard?.setWorkerRunning(0, displayInput);
|
|
174
|
+
|
|
175
|
+
if (!workerDashboard) {
|
|
176
|
+
console.log(`[worker-1] Running ${displayInput}`);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
const result = await convertPdf({
|
|
181
|
+
inputPath,
|
|
182
|
+
model: options.model,
|
|
183
|
+
mode: options.mode,
|
|
184
|
+
format: options.format,
|
|
185
|
+
instructions: options.instructions,
|
|
186
|
+
promptText
|
|
187
|
+
});
|
|
175
188
|
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
189
|
+
const outputPath = resolve(options.output ?? defaultOutputPath(inputPath, result.format));
|
|
190
|
+
await mkdir(dirname(outputPath), { recursive: true });
|
|
191
|
+
await writeFile(outputPath, result.content, "utf8");
|
|
192
|
+
|
|
193
|
+
if (workerDashboard) {
|
|
194
|
+
workerDashboard.setWorkerDone(
|
|
195
|
+
0,
|
|
196
|
+
displayInput,
|
|
197
|
+
`${result.format} in ${formatDurationMs(Date.now() - startedAt)}`
|
|
198
|
+
);
|
|
199
|
+
workerDashboard.setSummary(1, 0);
|
|
200
|
+
} else {
|
|
201
|
+
console.log(
|
|
202
|
+
`[worker-1] Done ${displayInput} -> ${outputPath} (${result.format}, ${formatDurationMs(Date.now() - startedAt)})`
|
|
203
|
+
);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
console.log(`Output (${result.format}) written to: ${outputPath}`);
|
|
207
|
+
return result.usage;
|
|
208
|
+
} catch (error) {
|
|
209
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
210
|
+
if (workerDashboard) {
|
|
211
|
+
workerDashboard.setWorkerFailed(
|
|
212
|
+
0,
|
|
213
|
+
displayInput,
|
|
214
|
+
`${truncate(message, 42)} (${formatDurationMs(Date.now() - startedAt)})`
|
|
215
|
+
);
|
|
216
|
+
workerDashboard.setSummary(1, 1);
|
|
217
|
+
} else {
|
|
218
|
+
console.error(
|
|
219
|
+
`[worker-1] Failed ${displayInput}: ${message} (${formatDurationMs(Date.now() - startedAt)})`
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
throw error;
|
|
224
|
+
} finally {
|
|
225
|
+
workerDashboard?.stop();
|
|
226
|
+
}
|
|
181
227
|
}
|
|
182
228
|
|
|
183
229
|
type FolderSummary = {
|
|
@@ -35,6 +35,10 @@ const AUTO_RESPONSE_SCHEMA = z.object({
|
|
|
35
35
|
content: z.string().min(1)
|
|
36
36
|
});
|
|
37
37
|
|
|
38
|
+
const RATE_LIMIT_MAX_RETRIES = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_RETRIES", 8);
|
|
39
|
+
const RATE_LIMIT_BASE_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_BASE_DELAY_MS", 2_000);
|
|
40
|
+
const RATE_LIMIT_MAX_DELAY_MS = parsePositiveIntEnv("PAPYRUS_RATE_LIMIT_MAX_DELAY_MS", 120_000);
|
|
41
|
+
|
|
38
42
|
export async function convertPdf(options: ConvertOptions): Promise<ConvertResult> {
|
|
39
43
|
const inputPath = resolve(options.inputPath);
|
|
40
44
|
await access(inputPath);
|
|
@@ -46,10 +50,12 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
46
50
|
|
|
47
51
|
const client = new OpenAI({ apiKey });
|
|
48
52
|
|
|
49
|
-
const uploaded = await
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
+
const uploaded = await withRateLimitRetry("file upload", () =>
|
|
54
|
+
client.files.create({
|
|
55
|
+
file: createReadStream(inputPath),
|
|
56
|
+
purpose: "user_data"
|
|
57
|
+
})
|
|
58
|
+
);
|
|
53
59
|
|
|
54
60
|
const agent = new Agent({
|
|
55
61
|
name: "PDF Converter",
|
|
@@ -58,21 +64,23 @@ export async function convertPdf(options: ConvertOptions): Promise<ConvertResult
|
|
|
58
64
|
});
|
|
59
65
|
|
|
60
66
|
const promptText = buildPromptText(options);
|
|
61
|
-
const result = await run
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
67
|
+
const result = await withRateLimitRetry("model run", () =>
|
|
68
|
+
run(agent, [
|
|
69
|
+
{
|
|
70
|
+
role: "user",
|
|
71
|
+
content: [
|
|
72
|
+
{
|
|
73
|
+
type: "input_text",
|
|
74
|
+
text: promptText
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
type: "input_file",
|
|
78
|
+
file: { id: uploaded.id }
|
|
79
|
+
}
|
|
80
|
+
]
|
|
81
|
+
}
|
|
82
|
+
])
|
|
83
|
+
);
|
|
76
84
|
|
|
77
85
|
const rawOutput = (result.finalOutput ?? "").trim();
|
|
78
86
|
if (!rawOutput) {
|
|
@@ -201,3 +209,249 @@ function parseAutoResponse(rawOutput: string): Omit<ConvertResult, "usage"> {
|
|
|
201
209
|
|
|
202
210
|
return { format: validated.data.format, content };
|
|
203
211
|
}
|
|
212
|
+
|
|
213
|
+
async function withRateLimitRetry<T>(operationName: string, operation: () => Promise<T>): Promise<T> {
|
|
214
|
+
let attempt = 0;
|
|
215
|
+
while (true) {
|
|
216
|
+
try {
|
|
217
|
+
return await operation();
|
|
218
|
+
} catch (error) {
|
|
219
|
+
if (!isRetriableRateLimitError(error) || attempt >= RATE_LIMIT_MAX_RETRIES) {
|
|
220
|
+
throw error;
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const retryAfterMs = getRetryAfterMs(error);
|
|
224
|
+
const exponentialBackoffMs = RATE_LIMIT_BASE_DELAY_MS * (2 ** attempt);
|
|
225
|
+
const jitterMs = Math.floor(Math.random() * 750);
|
|
226
|
+
const computedDelayMs = retryAfterMs ?? (exponentialBackoffMs + jitterMs);
|
|
227
|
+
const waitMs = clampDelayMs(computedDelayMs, RATE_LIMIT_MAX_DELAY_MS);
|
|
228
|
+
const nextAttempt = attempt + 2;
|
|
229
|
+
const totalAttempts = RATE_LIMIT_MAX_RETRIES + 1;
|
|
230
|
+
const reason = extractErrorMessage(error);
|
|
231
|
+
|
|
232
|
+
console.warn(
|
|
233
|
+
`[retry] ${operationName} hit OpenAI rate limits. Waiting ${formatDelay(waitMs)} before retry ${nextAttempt}/${totalAttempts}. ${reason}`
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
await sleep(waitMs);
|
|
237
|
+
attempt += 1;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
function isRetriableRateLimitError(error: unknown): boolean {
|
|
243
|
+
if (typeof error !== "object" || error === null) {
|
|
244
|
+
return false;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
const candidate = error as {
|
|
248
|
+
status?: unknown;
|
|
249
|
+
code?: unknown;
|
|
250
|
+
type?: unknown;
|
|
251
|
+
error?: { code?: unknown; type?: unknown; message?: unknown };
|
|
252
|
+
message?: unknown;
|
|
253
|
+
};
|
|
254
|
+
|
|
255
|
+
if (candidate.status === 429) {
|
|
256
|
+
const code = typeof candidate.code === "string" ? candidate.code : undefined;
|
|
257
|
+
const nestedCode = typeof candidate.error?.code === "string" ? candidate.error.code : undefined;
|
|
258
|
+
if (code === "insufficient_quota" || nestedCode === "insufficient_quota") {
|
|
259
|
+
return false;
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
return true;
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
const searchableText = [
|
|
266
|
+
toLowerCaseIfString(candidate.code),
|
|
267
|
+
toLowerCaseIfString(candidate.type),
|
|
268
|
+
toLowerCaseIfString(candidate.error?.code),
|
|
269
|
+
toLowerCaseIfString(candidate.error?.type),
|
|
270
|
+
toLowerCaseIfString(candidate.message),
|
|
271
|
+
toLowerCaseIfString(candidate.error?.message)
|
|
272
|
+
]
|
|
273
|
+
.filter(Boolean)
|
|
274
|
+
.join(" ");
|
|
275
|
+
|
|
276
|
+
if (searchableText.includes("insufficient_quota")) {
|
|
277
|
+
return false;
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
return (
|
|
281
|
+
searchableText.includes("rate_limit") ||
|
|
282
|
+
searchableText.includes("rate limit") ||
|
|
283
|
+
searchableText.includes("too many requests")
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
function getRetryAfterMs(error: unknown): number | undefined {
|
|
288
|
+
const headerDelay = getRetryAfterMsFromHeaders(error);
|
|
289
|
+
if (typeof headerDelay === "number" && Number.isFinite(headerDelay) && headerDelay >= 0) {
|
|
290
|
+
return headerDelay;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
const textDelay = getRetryAfterMsFromText(extractErrorMessage(error));
|
|
294
|
+
if (typeof textDelay === "number" && Number.isFinite(textDelay) && textDelay >= 0) {
|
|
295
|
+
return textDelay;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
return undefined;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function getRetryAfterMsFromHeaders(error: unknown): number | undefined {
|
|
302
|
+
if (typeof error !== "object" || error === null) {
|
|
303
|
+
return undefined;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const candidate = error as {
|
|
307
|
+
headers?: unknown;
|
|
308
|
+
response?: { headers?: unknown };
|
|
309
|
+
};
|
|
310
|
+
|
|
311
|
+
const retryAfterMsHeader = readHeader(candidate.headers, "retry-after-ms")
|
|
312
|
+
?? readHeader(candidate.response?.headers, "retry-after-ms");
|
|
313
|
+
if (retryAfterMsHeader) {
|
|
314
|
+
const milliseconds = Number.parseInt(retryAfterMsHeader, 10);
|
|
315
|
+
if (Number.isFinite(milliseconds) && milliseconds >= 0) {
|
|
316
|
+
return milliseconds;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
const retryAfterHeader = readHeader(candidate.headers, "retry-after")
|
|
321
|
+
?? readHeader(candidate.response?.headers, "retry-after");
|
|
322
|
+
if (!retryAfterHeader) {
|
|
323
|
+
return undefined;
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
const seconds = Number.parseFloat(retryAfterHeader);
|
|
327
|
+
if (Number.isFinite(seconds)) {
|
|
328
|
+
return Math.max(0, Math.round(seconds * 1_000));
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
const parsedDate = Date.parse(retryAfterHeader);
|
|
332
|
+
if (Number.isFinite(parsedDate)) {
|
|
333
|
+
return Math.max(0, parsedDate - Date.now());
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
return undefined;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
function getRetryAfterMsFromText(message: string): number | undefined {
|
|
340
|
+
const match = message.match(
|
|
341
|
+
/(?:try again in|retry after)\s*([0-9]+(?:\.[0-9]+)?)\s*(ms|msec|millisecond|milliseconds|s|sec|second|seconds|m|min|minute|minutes)?/i
|
|
342
|
+
);
|
|
343
|
+
if (!match) {
|
|
344
|
+
return undefined;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const rawValue = Number.parseFloat(match[1] ?? "");
|
|
348
|
+
if (!Number.isFinite(rawValue) || rawValue < 0) {
|
|
349
|
+
return undefined;
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
const unit = (match[2] ?? "s").toLowerCase();
|
|
353
|
+
if (unit === "ms" || unit === "msec" || unit === "millisecond" || unit === "milliseconds") {
|
|
354
|
+
return Math.round(rawValue);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
if (unit === "m" || unit === "min" || unit === "minute" || unit === "minutes") {
|
|
358
|
+
return Math.round(rawValue * 60_000);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return Math.round(rawValue * 1_000);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
function readHeader(headersLike: unknown, headerName: string): string | undefined {
|
|
365
|
+
if (!headersLike) {
|
|
366
|
+
return undefined;
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (
|
|
370
|
+
typeof headersLike === "object"
|
|
371
|
+
&& "get" in headersLike
|
|
372
|
+
&& typeof (headersLike as { get?: unknown }).get === "function"
|
|
373
|
+
) {
|
|
374
|
+
const value = (headersLike as { get: (name: string) => string | null }).get(headerName);
|
|
375
|
+
return value ?? undefined;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
if (typeof headersLike !== "object") {
|
|
379
|
+
return undefined;
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
const headersRecord = headersLike as Record<string, unknown>;
|
|
383
|
+
const lowerTarget = headerName.toLowerCase();
|
|
384
|
+
for (const [key, value] of Object.entries(headersRecord)) {
|
|
385
|
+
if (key.toLowerCase() !== lowerTarget) {
|
|
386
|
+
continue;
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
if (typeof value === "string") {
|
|
390
|
+
return value;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
if (Array.isArray(value)) {
|
|
394
|
+
const first = value.find((entry) => typeof entry === "string");
|
|
395
|
+
return typeof first === "string" ? first : undefined;
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
399
|
+
return undefined;
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
function parsePositiveIntEnv(name: string, fallback: number): number {
|
|
403
|
+
const raw = process.env[name];
|
|
404
|
+
if (!raw) {
|
|
405
|
+
return fallback;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
const parsed = Number.parseInt(raw, 10);
|
|
409
|
+
if (!Number.isFinite(parsed) || parsed < 0) {
|
|
410
|
+
return fallback;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
return parsed;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function clampDelayMs(value: number, max: number): number {
|
|
417
|
+
return Math.max(250, Math.min(Math.round(value), max));
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
function formatDelay(milliseconds: number): string {
|
|
421
|
+
if (milliseconds < 1_000) {
|
|
422
|
+
return `${milliseconds}ms`;
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
const seconds = milliseconds / 1_000;
|
|
426
|
+
return `${seconds.toFixed(seconds >= 10 ? 0 : 1)}s`;
|
|
427
|
+
}
|
|
428
|
+
|
|
429
|
+
function extractErrorMessage(error: unknown): string {
|
|
430
|
+
if (error instanceof Error && error.message.trim().length > 0) {
|
|
431
|
+
return error.message;
|
|
432
|
+
}
|
|
433
|
+
|
|
434
|
+
if (typeof error === "object" && error !== null) {
|
|
435
|
+
const message = (error as { message?: unknown; error?: { message?: unknown } }).message;
|
|
436
|
+
if (typeof message === "string" && message.trim().length > 0) {
|
|
437
|
+
return message;
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
const nestedMessage = (error as { error?: { message?: unknown } }).error?.message;
|
|
441
|
+
if (typeof nestedMessage === "string" && nestedMessage.trim().length > 0) {
|
|
442
|
+
return nestedMessage;
|
|
443
|
+
}
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
return String(error);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
function toLowerCaseIfString(value: unknown): string {
|
|
450
|
+
return typeof value === "string" ? value.toLowerCase() : "";
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
function sleep(milliseconds: number): Promise<void> {
|
|
454
|
+
return new Promise((resolveSleep) => {
|
|
455
|
+
setTimeout(resolveSleep, milliseconds);
|
|
456
|
+
});
|
|
457
|
+
}
|