@link-assistant/agent 0.13.1 → 0.13.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/provider/models.ts +64 -1
- package/src/provider/retry-fetch.ts +136 -14
package/package.json
CHANGED
package/src/provider/models.ts
CHANGED
|
@@ -67,11 +67,74 @@ export namespace ModelsDev {
|
|
|
67
67
|
|
|
68
68
|
export type Provider = z.infer<typeof Provider>;
|
|
69
69
|
|
|
70
|
+
/**
|
|
71
|
+
* Cache staleness threshold in milliseconds (1 hour).
|
|
72
|
+
* If the cache is older than this, we await the refresh before using the data.
|
|
73
|
+
*/
|
|
74
|
+
const CACHE_STALE_THRESHOLD_MS = 60 * 60 * 1000;
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Get the models database, refreshing from models.dev if needed.
|
|
78
|
+
*
|
|
79
|
+
* This function handles cache staleness properly:
|
|
80
|
+
* - If cache doesn't exist: await refresh to ensure fresh data
|
|
81
|
+
* - If cache is stale (> 1 hour old): await refresh to ensure up-to-date models
|
|
82
|
+
* - If cache is fresh: trigger background refresh but use cached data immediately
|
|
83
|
+
*
|
|
84
|
+
* This prevents ProviderModelNotFoundError when:
|
|
85
|
+
* - User runs agent for the first time (no cache)
|
|
86
|
+
* - User has outdated cache missing new models like kimi-k2.5-free
|
|
87
|
+
*
|
|
88
|
+
* @see https://github.com/link-assistant/agent/issues/175
|
|
89
|
+
*/
|
|
70
90
|
export async function get() {
|
|
71
|
-
refresh();
|
|
72
91
|
const file = Bun.file(filepath);
|
|
92
|
+
|
|
93
|
+
// Check if cache exists and get its modification time
|
|
94
|
+
const exists = await file.exists();
|
|
95
|
+
|
|
96
|
+
if (!exists) {
|
|
97
|
+
// No cache - must await refresh to get initial data
|
|
98
|
+
log.info(() => ({
|
|
99
|
+
message: 'no cache found, awaiting refresh',
|
|
100
|
+
path: filepath,
|
|
101
|
+
}));
|
|
102
|
+
await refresh();
|
|
103
|
+
} else {
|
|
104
|
+
// Check if cache is stale
|
|
105
|
+
const stats = await file.stat().catch(() => null);
|
|
106
|
+
const mtime = stats?.mtime?.getTime() ?? 0;
|
|
107
|
+
const isStale = Date.now() - mtime > CACHE_STALE_THRESHOLD_MS;
|
|
108
|
+
|
|
109
|
+
if (isStale) {
|
|
110
|
+
// Stale cache - await refresh to get updated model list
|
|
111
|
+
log.info(() => ({
|
|
112
|
+
message: 'cache is stale, awaiting refresh',
|
|
113
|
+
path: filepath,
|
|
114
|
+
age: Date.now() - mtime,
|
|
115
|
+
threshold: CACHE_STALE_THRESHOLD_MS,
|
|
116
|
+
}));
|
|
117
|
+
await refresh();
|
|
118
|
+
} else {
|
|
119
|
+
// Fresh cache - trigger background refresh but don't wait
|
|
120
|
+
log.info(() => ({
|
|
121
|
+
message: 'cache is fresh, triggering background refresh',
|
|
122
|
+
path: filepath,
|
|
123
|
+
age: Date.now() - mtime,
|
|
124
|
+
}));
|
|
125
|
+
refresh();
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// Now read the cache file
|
|
73
130
|
const result = await file.json().catch(() => {});
|
|
74
131
|
if (result) return result as Record<string, Provider>;
|
|
132
|
+
|
|
133
|
+
// Fallback to bundled data if cache read failed
|
|
134
|
+
log.warn(() => ({
|
|
135
|
+
message: 'cache read failed, using bundled data',
|
|
136
|
+
path: filepath,
|
|
137
|
+
}));
|
|
75
138
|
const json = await data();
|
|
76
139
|
return JSON.parse(json) as Record<string, Provider>;
|
|
77
140
|
}
|
|
@@ -20,7 +20,12 @@ import { Flag } from '../flag/flag';
|
|
|
20
20
|
* By wrapping fetch, we handle rate limits at the HTTP layer with time-based retries,
|
|
21
21
|
* ensuring the agent's 7-week global timeout is respected.
|
|
22
22
|
*
|
|
23
|
+
* Important: Rate limit waits use ISOLATED AbortControllers that are NOT subject to
|
|
24
|
+
* provider/stream timeouts. This prevents long rate limit waits (e.g., 15 hours) from
|
|
25
|
+
* being aborted by short provider timeouts (e.g., 5 minutes).
|
|
26
|
+
*
|
|
23
27
|
* @see https://github.com/link-assistant/agent/issues/167
|
|
28
|
+
* @see https://github.com/link-assistant/agent/issues/183
|
|
24
29
|
* @see https://github.com/vercel/ai/issues/12585
|
|
25
30
|
*/
|
|
26
31
|
|
|
@@ -150,23 +155,109 @@ export namespace RetryFetch {
|
|
|
150
155
|
|
|
151
156
|
/**
|
|
152
157
|
* Sleep for the specified duration, but respect abort signals.
|
|
158
|
+
* Properly cleans up event listeners to prevent memory leaks.
|
|
153
159
|
*/
|
|
154
160
|
async function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
155
161
|
return new Promise((resolve, reject) => {
|
|
162
|
+
// Check if already aborted before starting
|
|
163
|
+
if (signal?.aborted) {
|
|
164
|
+
reject(new DOMException('Aborted', 'AbortError'));
|
|
165
|
+
return;
|
|
166
|
+
}
|
|
167
|
+
|
|
156
168
|
const timeout = setTimeout(resolve, ms);
|
|
169
|
+
|
|
157
170
|
if (signal) {
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
(
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
171
|
+
const abortHandler = () => {
|
|
172
|
+
clearTimeout(timeout);
|
|
173
|
+
reject(new DOMException('Aborted', 'AbortError'));
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
signal.addEventListener('abort', abortHandler, { once: true });
|
|
177
|
+
|
|
178
|
+
// Clean up the listener when the timeout completes normally
|
|
179
|
+
// This prevents memory leaks on long-running processes
|
|
180
|
+
const originalResolve = resolve;
|
|
181
|
+
// eslint-disable-next-line no-param-reassign
|
|
182
|
+
resolve = (value) => {
|
|
183
|
+
signal.removeEventListener('abort', abortHandler);
|
|
184
|
+
originalResolve(value);
|
|
185
|
+
};
|
|
166
186
|
}
|
|
167
187
|
});
|
|
168
188
|
}
|
|
169
189
|
|
|
190
|
+
/**
|
|
191
|
+
* Create an isolated AbortController for rate limit waits.
|
|
192
|
+
*
|
|
193
|
+
* This controller is NOT connected to the request's AbortSignal, so it won't be
|
|
194
|
+
* affected by provider timeouts (default 5 minutes) or stream timeouts.
|
|
195
|
+
* It only respects the global AGENT_RETRY_TIMEOUT.
|
|
196
|
+
*
|
|
197
|
+
* However, it DOES check the user's abort signal periodically (every 10 seconds)
|
|
198
|
+
* to allow user cancellation during long rate limit waits.
|
|
199
|
+
*
|
|
200
|
+
* This solves issue #183 where long rate limit waits (e.g., 15 hours) were being
|
|
201
|
+
* aborted by the provider timeout (5 minutes).
|
|
202
|
+
*
|
|
203
|
+
* @param remainingTimeout Maximum time allowed for this wait (ms)
|
|
204
|
+
* @param userSignal Optional user abort signal to check periodically
|
|
205
|
+
* @returns An object with the signal and a cleanup function
|
|
206
|
+
* @see https://github.com/link-assistant/agent/issues/183
|
|
207
|
+
*/
|
|
208
|
+
function createIsolatedRateLimitSignal(
|
|
209
|
+
remainingTimeout: number,
|
|
210
|
+
userSignal?: AbortSignal
|
|
211
|
+
): {
|
|
212
|
+
signal: AbortSignal;
|
|
213
|
+
cleanup: () => void;
|
|
214
|
+
} {
|
|
215
|
+
const controller = new AbortController();
|
|
216
|
+
const timers: NodeJS.Timeout[] = [];
|
|
217
|
+
|
|
218
|
+
// Set a timeout based on the global AGENT_RETRY_TIMEOUT (not provider timeout)
|
|
219
|
+
const globalTimeoutId = setTimeout(() => {
|
|
220
|
+
controller.abort(
|
|
221
|
+
new DOMException(
|
|
222
|
+
'Rate limit wait exceeded global timeout',
|
|
223
|
+
'TimeoutError'
|
|
224
|
+
)
|
|
225
|
+
);
|
|
226
|
+
}, remainingTimeout);
|
|
227
|
+
timers.push(globalTimeoutId);
|
|
228
|
+
|
|
229
|
+
// Periodically check if user canceled (every 10 seconds)
|
|
230
|
+
// This allows user cancellation during long rate limit waits
|
|
231
|
+
// without being affected by provider timeouts
|
|
232
|
+
if (userSignal) {
|
|
233
|
+
const checkUserCancellation = () => {
|
|
234
|
+
if (userSignal.aborted) {
|
|
235
|
+
controller.abort(
|
|
236
|
+
new DOMException(
|
|
237
|
+
'User canceled during rate limit wait',
|
|
238
|
+
'AbortError'
|
|
239
|
+
)
|
|
240
|
+
);
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
|
|
244
|
+
// Check immediately and then every 10 seconds
|
|
245
|
+
checkUserCancellation();
|
|
246
|
+
const intervalId = setInterval(checkUserCancellation, 10_000);
|
|
247
|
+
timers.push(intervalId as unknown as NodeJS.Timeout);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return {
|
|
251
|
+
signal: controller.signal,
|
|
252
|
+
cleanup: () => {
|
|
253
|
+
for (const timer of timers) {
|
|
254
|
+
clearTimeout(timer);
|
|
255
|
+
clearInterval(timer as unknown as NodeJS.Timeout);
|
|
256
|
+
}
|
|
257
|
+
},
|
|
258
|
+
};
|
|
259
|
+
}
|
|
260
|
+
|
|
170
261
|
/**
|
|
171
262
|
* Check if an error is retryable (network issues, temporary failures).
|
|
172
263
|
*/
|
|
@@ -317,26 +408,57 @@ export namespace RetryFetch {
|
|
|
317
408
|
return response;
|
|
318
409
|
}
|
|
319
410
|
|
|
411
|
+
const remainingTimeout = maxRetryTimeout - elapsed;
|
|
412
|
+
|
|
320
413
|
log.info(() => ({
|
|
321
414
|
message: 'rate limited, will retry',
|
|
322
415
|
sessionID,
|
|
323
416
|
attempt,
|
|
324
417
|
delay,
|
|
325
418
|
delayMinutes: (delay / 1000 / 60).toFixed(2),
|
|
419
|
+
delayHours: (delay / 1000 / 3600).toFixed(2),
|
|
326
420
|
elapsed,
|
|
327
|
-
remainingTimeout
|
|
421
|
+
remainingTimeout,
|
|
422
|
+
remainingTimeoutHours: (remainingTimeout / 1000 / 3600).toFixed(2),
|
|
423
|
+
isolatedSignal: true, // Indicates we're using isolated signal for this wait
|
|
328
424
|
}));
|
|
329
425
|
|
|
330
|
-
// Wait before retrying
|
|
426
|
+
// Wait before retrying using ISOLATED signal
|
|
427
|
+
// This is critical for issue #183: Rate limit waits can be hours long (e.g., 15 hours),
|
|
428
|
+
// but provider timeouts are typically 5 minutes. By using an isolated AbortController
|
|
429
|
+
// that only respects AGENT_RETRY_TIMEOUT, we prevent the provider timeout from
|
|
430
|
+
// aborting long rate limit waits.
|
|
431
|
+
//
|
|
432
|
+
// The isolated signal periodically checks the user's abort signal (every 10 seconds)
|
|
433
|
+
// to allow user cancellation during long waits.
|
|
434
|
+
const { signal: isolatedSignal, cleanup } =
|
|
435
|
+
createIsolatedRateLimitSignal(
|
|
436
|
+
remainingTimeout,
|
|
437
|
+
init?.signal ?? undefined
|
|
438
|
+
);
|
|
439
|
+
|
|
331
440
|
try {
|
|
332
|
-
await sleep(delay,
|
|
333
|
-
} catch {
|
|
334
|
-
//
|
|
441
|
+
await sleep(delay, isolatedSignal);
|
|
442
|
+
} catch (sleepError) {
|
|
443
|
+
// Check if the original request was aborted (user cancellation)
|
|
444
|
+
// In that case, we should stop retrying
|
|
445
|
+
if (init?.signal?.aborted) {
|
|
446
|
+
log.info(() => ({
|
|
447
|
+
message: 'rate limit wait aborted by user cancellation',
|
|
448
|
+
sessionID,
|
|
449
|
+
}));
|
|
450
|
+
return response;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
// Otherwise, it was the isolated timeout - log and return
|
|
335
454
|
log.info(() => ({
|
|
336
|
-
message: '
|
|
455
|
+
message: 'rate limit wait exceeded global timeout',
|
|
337
456
|
sessionID,
|
|
457
|
+
sleepError: String(sleepError),
|
|
338
458
|
}));
|
|
339
459
|
return response;
|
|
460
|
+
} finally {
|
|
461
|
+
cleanup();
|
|
340
462
|
}
|
|
341
463
|
}
|
|
342
464
|
};
|