@link-assistant/agent 0.12.0 → 0.12.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/flag/flag.ts +11 -0
- package/src/index.js +61 -13
- package/src/provider/provider.ts +220 -0
- package/src/provider/retry-fetch.ts +363 -0
- package/src/session/processor.ts +23 -2
- package/src/session/prompt.ts +7 -7
package/package.json
CHANGED
package/src/flag/flag.ts
CHANGED
|
@@ -98,6 +98,17 @@ export namespace Flag {
|
|
|
98
98
|
return val ? parseInt(val, 10) * 1000 : 1200000; // 20 minutes in ms
|
|
99
99
|
}
|
|
100
100
|
|
|
101
|
+
// Minimum retry interval to prevent rapid retries (default: 30 seconds)
|
|
102
|
+
// This ensures we don't hammer the API with rapid retry attempts
|
|
103
|
+
// See: https://github.com/link-assistant/agent/issues/167
|
|
104
|
+
export function MIN_RETRY_INTERVAL(): number {
|
|
105
|
+
const val = getEnv(
|
|
106
|
+
'LINK_ASSISTANT_AGENT_MIN_RETRY_INTERVAL',
|
|
107
|
+
'AGENT_MIN_RETRY_INTERVAL'
|
|
108
|
+
);
|
|
109
|
+
return val ? parseInt(val, 10) * 1000 : 30000; // 30 seconds in ms
|
|
110
|
+
}
|
|
111
|
+
|
|
101
112
|
// Stream timeout configuration
|
|
102
113
|
// chunkMs: timeout between stream chunks - detects stalled streams (default: 2 minutes)
|
|
103
114
|
// stepMs: timeout for each individual LLM step (default: 10 minutes)
|
package/src/index.js
CHANGED
|
@@ -142,14 +142,56 @@ function readStdinWithTimeout(timeout = null) {
|
|
|
142
142
|
|
|
143
143
|
/**
|
|
144
144
|
* Parse model configuration from argv
|
|
145
|
+
* Supports both explicit provider/model format and short model names.
|
|
146
|
+
*
|
|
147
|
+
* Format examples:
|
|
148
|
+
* - "kilo/glm-5-free" -> uses kilo provider with glm-5-free model (explicit)
|
|
149
|
+
* - "opencode/kimi-k2.5-free" -> uses opencode provider (explicit)
|
|
150
|
+
* - "glm-5-free" -> resolved to kilo provider (unique free model)
|
|
151
|
+
* - "kimi-k2.5-free" -> resolved to opencode provider (shared model, opencode preferred)
|
|
152
|
+
*
|
|
145
153
|
* @param {object} argv - Command line arguments
|
|
146
154
|
* @returns {object} - { providerID, modelID }
|
|
147
155
|
*/
|
|
148
156
|
async function parseModelConfig(argv) {
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
let providerID
|
|
152
|
-
let modelID
|
|
157
|
+
const modelArg = argv.model;
|
|
158
|
+
|
|
159
|
+
let providerID;
|
|
160
|
+
let modelID;
|
|
161
|
+
|
|
162
|
+
// Check if model includes explicit provider prefix
|
|
163
|
+
if (modelArg.includes('/')) {
|
|
164
|
+
// Explicit provider/model format - respect user's choice
|
|
165
|
+
const modelParts = modelArg.split('/');
|
|
166
|
+
providerID = modelParts[0];
|
|
167
|
+
modelID = modelParts.slice(1).join('/');
|
|
168
|
+
|
|
169
|
+
// Validate that providerID and modelID are not empty
|
|
170
|
+
if (!providerID || !modelID) {
|
|
171
|
+
providerID = providerID || 'opencode';
|
|
172
|
+
modelID = modelID || 'kimi-k2.5-free';
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
Log.Default.info(() => ({
|
|
176
|
+
message: 'using explicit provider/model',
|
|
177
|
+
providerID,
|
|
178
|
+
modelID,
|
|
179
|
+
}));
|
|
180
|
+
} else {
|
|
181
|
+
// Short model name - resolve to appropriate provider
|
|
182
|
+
// Import Provider to use parseModelWithResolution
|
|
183
|
+
const { Provider } = await import('./provider/provider.ts');
|
|
184
|
+
const resolved = await Provider.parseModelWithResolution(modelArg);
|
|
185
|
+
providerID = resolved.providerID;
|
|
186
|
+
modelID = resolved.modelID;
|
|
187
|
+
|
|
188
|
+
Log.Default.info(() => ({
|
|
189
|
+
message: 'resolved short model name',
|
|
190
|
+
input: modelArg,
|
|
191
|
+
providerID,
|
|
192
|
+
modelID,
|
|
193
|
+
}));
|
|
194
|
+
}
|
|
153
195
|
|
|
154
196
|
// Handle --use-existing-claude-oauth option
|
|
155
197
|
// This reads OAuth credentials from ~/.claude/.credentials.json (Claude Code CLI)
|
|
@@ -175,22 +217,22 @@ async function parseModelConfig(argv) {
|
|
|
175
217
|
// Set environment variable for the provider to use
|
|
176
218
|
process.env.CLAUDE_CODE_OAUTH_TOKEN = creds.accessToken;
|
|
177
219
|
|
|
178
|
-
// If user specified
|
|
179
|
-
// If
|
|
220
|
+
// If user specified the default model (opencode/kimi-k2.5-free), switch to claude-oauth
|
|
221
|
+
// If user explicitly specified kilo or another provider, warn but respect their choice
|
|
180
222
|
if (providerID === 'opencode' && modelID === 'kimi-k2.5-free') {
|
|
181
223
|
providerID = 'claude-oauth';
|
|
182
224
|
modelID = 'claude-sonnet-4-5';
|
|
183
225
|
} else if (!['claude-oauth', 'anthropic'].includes(providerID)) {
|
|
184
|
-
// If user specified a different provider, warn them
|
|
226
|
+
// If user specified a different provider explicitly, warn them
|
|
185
227
|
const compactJson = argv['compact-json'] === true;
|
|
186
228
|
outputStatus(
|
|
187
229
|
{
|
|
188
230
|
type: 'warning',
|
|
189
|
-
message: `--use-existing-claude-oauth is set but model uses provider "${providerID}". Using
|
|
231
|
+
message: `--use-existing-claude-oauth is set but model uses provider "${providerID}". Using specified provider.`,
|
|
190
232
|
},
|
|
191
233
|
compactJson
|
|
192
234
|
);
|
|
193
|
-
|
|
235
|
+
// Don't override - respect user's explicit provider choice
|
|
194
236
|
}
|
|
195
237
|
}
|
|
196
238
|
|
|
@@ -257,8 +299,6 @@ async function runAgentMode(argv, request) {
|
|
|
257
299
|
}));
|
|
258
300
|
}
|
|
259
301
|
|
|
260
|
-
const { providerID, modelID } = await parseModelConfig(argv);
|
|
261
|
-
|
|
262
302
|
// Validate and get JSON standard
|
|
263
303
|
const jsonStandard = argv['json-standard'];
|
|
264
304
|
if (!isValidJsonStandard(jsonStandard)) {
|
|
@@ -275,9 +315,14 @@ async function runAgentMode(argv, request) {
|
|
|
275
315
|
// Logging is already initialized in middleware, no need to call Log.init() again
|
|
276
316
|
|
|
277
317
|
// Wrap in Instance.provide for OpenCode infrastructure
|
|
318
|
+
// parseModelConfig must be called inside Instance.provide to access provider state
|
|
278
319
|
await Instance.provide({
|
|
279
320
|
directory: process.cwd(),
|
|
280
321
|
fn: async () => {
|
|
322
|
+
// Parse model config inside Instance.provide context
|
|
323
|
+
// This allows parseModelWithResolution to access the provider state
|
|
324
|
+
const { providerID, modelID } = await parseModelConfig(argv);
|
|
325
|
+
|
|
281
326
|
if (argv.server) {
|
|
282
327
|
// SERVER MODE: Start server and communicate via HTTP
|
|
283
328
|
await runServerMode(
|
|
@@ -330,8 +375,6 @@ async function runContinuousAgentMode(argv) {
|
|
|
330
375
|
}));
|
|
331
376
|
}
|
|
332
377
|
|
|
333
|
-
const { providerID, modelID } = await parseModelConfig(argv);
|
|
334
|
-
|
|
335
378
|
// Validate and get JSON standard
|
|
336
379
|
const jsonStandard = argv['json-standard'];
|
|
337
380
|
if (!isValidJsonStandard(jsonStandard)) {
|
|
@@ -348,9 +391,14 @@ async function runContinuousAgentMode(argv) {
|
|
|
348
391
|
const { systemMessage, appendSystemMessage } = await readSystemMessages(argv);
|
|
349
392
|
|
|
350
393
|
// Wrap in Instance.provide for OpenCode infrastructure
|
|
394
|
+
// parseModelConfig must be called inside Instance.provide to access provider state
|
|
351
395
|
await Instance.provide({
|
|
352
396
|
directory: process.cwd(),
|
|
353
397
|
fn: async () => {
|
|
398
|
+
// Parse model config inside Instance.provide context
|
|
399
|
+
// This allows parseModelWithResolution to access the provider state
|
|
400
|
+
const { providerID, modelID } = await parseModelConfig(argv);
|
|
401
|
+
|
|
354
402
|
if (argv.server) {
|
|
355
403
|
// SERVER MODE: Start server and communicate via HTTP
|
|
356
404
|
await runContinuousServerMode(
|
package/src/provider/provider.ts
CHANGED
|
@@ -16,6 +16,7 @@ import { Flag } from '../flag/flag';
|
|
|
16
16
|
import { iife } from '../util/iife';
|
|
17
17
|
import { createEchoModel } from './echo';
|
|
18
18
|
import { createCacheModel } from './cache';
|
|
19
|
+
import { RetryFetch } from './retry-fetch';
|
|
19
20
|
|
|
20
21
|
export namespace Provider {
|
|
21
22
|
const log = Log.create({ service: 'provider' });
|
|
@@ -1143,6 +1144,16 @@ export namespace Provider {
|
|
|
1143
1144
|
});
|
|
1144
1145
|
};
|
|
1145
1146
|
}
|
|
1147
|
+
|
|
1148
|
+
// Wrap fetch with retry logic for rate limit handling (HTTP 429)
|
|
1149
|
+
// This ensures the agent's time-based retry (7-week timeout) is respected
|
|
1150
|
+
// instead of the AI SDK's fixed retry count (3 attempts)
|
|
1151
|
+
// See: https://github.com/link-assistant/agent/issues/167
|
|
1152
|
+
const existingFetch = options['fetch'] ?? fetch;
|
|
1153
|
+
options['fetch'] = RetryFetch.wrap(existingFetch, {
|
|
1154
|
+
sessionID: provider.id,
|
|
1155
|
+
});
|
|
1156
|
+
|
|
1146
1157
|
const fn = mod[Object.keys(mod).find((key) => key.startsWith('create'))!];
|
|
1147
1158
|
const loaded = fn({
|
|
1148
1159
|
name: provider.id,
|
|
@@ -1373,6 +1384,215 @@ export namespace Provider {
|
|
|
1373
1384
|
};
|
|
1374
1385
|
}
|
|
1375
1386
|
|
|
1387
|
+
/**
|
|
1388
|
+
* Resolve a short model name (without provider prefix) to the appropriate provider.
|
|
1389
|
+
* This function finds which provider should handle a model when no explicit provider is specified.
|
|
1390
|
+
*
|
|
1391
|
+
* Priority for free models:
|
|
1392
|
+
* 1. If model is uniquely available in one provider, use that provider
|
|
1393
|
+
* 2. If model is available in multiple providers, prioritize based on free model availability:
|
|
1394
|
+
* - kilo: glm-5-free, glm-4.7-free, minimax-m2.1-free, giga-potato-free (unique to Kilo)
|
|
1395
|
+
* - opencode: minimax-m2.5-free, big-pickle, gpt-5-nano (unique to OpenCode)
|
|
1396
|
+
* - SHARED: kimi-k2.5-free (available in both)
|
|
1397
|
+
* 3. For shared models like kimi-k2.5-free, prefer OpenCode first, then fall back to Kilo on rate limit
|
|
1398
|
+
*
|
|
1399
|
+
* @param modelID - Short model name without provider prefix
|
|
1400
|
+
* @returns Provider ID that should handle this model, or undefined if not found
|
|
1401
|
+
*/
|
|
1402
|
+
export async function resolveShortModelName(
|
|
1403
|
+
modelID: string
|
|
1404
|
+
): Promise<{ providerID: string; modelID: string } | undefined> {
|
|
1405
|
+
const s = await state();
|
|
1406
|
+
|
|
1407
|
+
// Define model-to-provider mappings for free models
|
|
1408
|
+
// Models unique to Kilo (GLM models from Z.AI are only free on Kilo)
|
|
1409
|
+
const kiloUniqueModels = [
|
|
1410
|
+
'glm-5-free',
|
|
1411
|
+
'glm-4.7-free',
|
|
1412
|
+
'giga-potato-free',
|
|
1413
|
+
'trinity-large-preview',
|
|
1414
|
+
];
|
|
1415
|
+
|
|
1416
|
+
// Check if it's a Kilo-unique model
|
|
1417
|
+
if (kiloUniqueModels.includes(modelID)) {
|
|
1418
|
+
const kiloProvider = s.providers['kilo'];
|
|
1419
|
+
if (kiloProvider && kiloProvider.info.models[modelID]) {
|
|
1420
|
+
log.info(() => ({
|
|
1421
|
+
message: 'resolved short model name to kilo (unique)',
|
|
1422
|
+
modelID,
|
|
1423
|
+
}));
|
|
1424
|
+
return { providerID: 'kilo', modelID };
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
1427
|
+
|
|
1428
|
+
// Check if model exists in any provider
|
|
1429
|
+
const matchingProviders: string[] = [];
|
|
1430
|
+
for (const [providerID, provider] of Object.entries(s.providers)) {
|
|
1431
|
+
if (provider.info.models[modelID]) {
|
|
1432
|
+
matchingProviders.push(providerID);
|
|
1433
|
+
}
|
|
1434
|
+
}
|
|
1435
|
+
|
|
1436
|
+
if (matchingProviders.length === 0) {
|
|
1437
|
+
return undefined;
|
|
1438
|
+
}
|
|
1439
|
+
|
|
1440
|
+
if (matchingProviders.length === 1) {
|
|
1441
|
+
const providerID = matchingProviders[0];
|
|
1442
|
+
log.info(() => ({
|
|
1443
|
+
message: 'resolved short model name (single match)',
|
|
1444
|
+
modelID,
|
|
1445
|
+
providerID,
|
|
1446
|
+
}));
|
|
1447
|
+
return { providerID, modelID };
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
// Multiple providers have this model - prefer OpenCode for shared free models
|
|
1451
|
+
// This follows the convention that opencode is the primary free provider
|
|
1452
|
+
if (matchingProviders.includes('opencode')) {
|
|
1453
|
+
log.info(() => ({
|
|
1454
|
+
message: 'resolved short model name to opencode (multiple providers)',
|
|
1455
|
+
modelID,
|
|
1456
|
+
availableProviders: matchingProviders,
|
|
1457
|
+
}));
|
|
1458
|
+
return { providerID: 'opencode', modelID };
|
|
1459
|
+
}
|
|
1460
|
+
|
|
1461
|
+
// Fallback to first matching provider
|
|
1462
|
+
const providerID = matchingProviders[0];
|
|
1463
|
+
log.info(() => ({
|
|
1464
|
+
message: 'resolved short model name (fallback)',
|
|
1465
|
+
modelID,
|
|
1466
|
+
providerID,
|
|
1467
|
+
availableProviders: matchingProviders,
|
|
1468
|
+
}));
|
|
1469
|
+
return { providerID, modelID };
|
|
1470
|
+
}
|
|
1471
|
+
|
|
1472
|
+
/**
|
|
1473
|
+
* Parse a model string that may or may not include a provider prefix.
|
|
1474
|
+
* If no provider is specified, attempts to resolve the short model name to the appropriate provider.
|
|
1475
|
+
*
|
|
1476
|
+
* Examples:
|
|
1477
|
+
* - "kilo/glm-5-free" -> { providerID: "kilo", modelID: "glm-5-free" }
|
|
1478
|
+
* - "glm-5-free" -> { providerID: "kilo", modelID: "glm-5-free" } (resolved)
|
|
1479
|
+
* - "kimi-k2.5-free" -> { providerID: "opencode", modelID: "kimi-k2.5-free" } (resolved)
|
|
1480
|
+
*
|
|
1481
|
+
* @param model - Model string with or without provider prefix
|
|
1482
|
+
* @returns Parsed provider ID and model ID
|
|
1483
|
+
*/
|
|
1484
|
+
export async function parseModelWithResolution(
|
|
1485
|
+
model: string
|
|
1486
|
+
): Promise<{ providerID: string; modelID: string }> {
|
|
1487
|
+
// Check if model includes a provider prefix
|
|
1488
|
+
if (model.includes('/')) {
|
|
1489
|
+
// Explicit provider specified - use it directly
|
|
1490
|
+
return parseModel(model);
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
// No provider prefix - try to resolve the short model name
|
|
1494
|
+
const resolved = await resolveShortModelName(model);
|
|
1495
|
+
if (resolved) {
|
|
1496
|
+
return resolved;
|
|
1497
|
+
}
|
|
1498
|
+
|
|
1499
|
+
// Unable to resolve - fall back to default behavior (opencode provider)
|
|
1500
|
+
log.warn(() => ({
|
|
1501
|
+
message: 'unable to resolve short model name, using opencode as default',
|
|
1502
|
+
modelID: model,
|
|
1503
|
+
}));
|
|
1504
|
+
return {
|
|
1505
|
+
providerID: 'opencode',
|
|
1506
|
+
modelID: model,
|
|
1507
|
+
};
|
|
1508
|
+
}
|
|
1509
|
+
|
|
1510
|
+
/**
|
|
1511
|
+
* Defines models that are available in multiple free providers.
|
|
1512
|
+
* When one provider hits rate limits, the system can try an alternative.
|
|
1513
|
+
*
|
|
1514
|
+
* Note: This is only used for models without explicit provider specification.
|
|
1515
|
+
* If user specifies "kilo/kimi-k2.5-free", no fallback will occur.
|
|
1516
|
+
*/
|
|
1517
|
+
const SHARED_FREE_MODELS: Record<string, string[]> = {
|
|
1518
|
+
// kimi-k2.5-free is available in both OpenCode and Kilo
|
|
1519
|
+
'kimi-k2.5-free': ['opencode', 'kilo'],
|
|
1520
|
+
// Note: minimax-m2.1-free is Kilo only, minimax-m2.5-free is OpenCode only
|
|
1521
|
+
// They are different model versions, not shared
|
|
1522
|
+
};
|
|
1523
|
+
|
|
1524
|
+
/**
|
|
1525
|
+
* Get alternative providers for a model when the primary provider fails (e.g., rate limited).
|
|
1526
|
+
* This function returns a list of alternative providers that offer the same model.
|
|
1527
|
+
*
|
|
1528
|
+
* Note: This only returns alternatives for models without explicit provider specification.
|
|
1529
|
+
* If the original request had an explicit provider (like "kilo/kimi-k2.5-free"), this returns empty array.
|
|
1530
|
+
*
|
|
1531
|
+
* @param modelID - The model ID to find alternatives for
|
|
1532
|
+
* @param failedProviderID - The provider that failed
|
|
1533
|
+
* @param wasExplicitProvider - Whether the user explicitly specified the provider
|
|
1534
|
+
* @returns Array of alternative provider IDs that can serve this model
|
|
1535
|
+
*/
|
|
1536
|
+
export async function getAlternativeProviders(
|
|
1537
|
+
modelID: string,
|
|
1538
|
+
failedProviderID: string,
|
|
1539
|
+
wasExplicitProvider: boolean
|
|
1540
|
+
): Promise<string[]> {
|
|
1541
|
+
// If the user explicitly specified a provider, don't offer alternatives
|
|
1542
|
+
if (wasExplicitProvider) {
|
|
1543
|
+
log.info(() => ({
|
|
1544
|
+
message: 'no alternative providers (explicit provider specified)',
|
|
1545
|
+
modelID,
|
|
1546
|
+
failedProviderID,
|
|
1547
|
+
}));
|
|
1548
|
+
return [];
|
|
1549
|
+
}
|
|
1550
|
+
|
|
1551
|
+
// Check if this is a shared model
|
|
1552
|
+
const sharedProviders = SHARED_FREE_MODELS[modelID];
|
|
1553
|
+
if (!sharedProviders) {
|
|
1554
|
+
// Not a shared model, no alternatives
|
|
1555
|
+
return [];
|
|
1556
|
+
}
|
|
1557
|
+
|
|
1558
|
+
// Get alternative providers (excluding the failed one)
|
|
1559
|
+
const s = await state();
|
|
1560
|
+
const alternatives = sharedProviders.filter(
|
|
1561
|
+
(p) => p !== failedProviderID && s.providers[p]
|
|
1562
|
+
);
|
|
1563
|
+
|
|
1564
|
+
if (alternatives.length > 0) {
|
|
1565
|
+
log.info(() => ({
|
|
1566
|
+
message: 'found alternative providers for rate-limited model',
|
|
1567
|
+
modelID,
|
|
1568
|
+
failedProviderID,
|
|
1569
|
+
alternatives,
|
|
1570
|
+
}));
|
|
1571
|
+
}
|
|
1572
|
+
|
|
1573
|
+
return alternatives;
|
|
1574
|
+
}
|
|
1575
|
+
|
|
1576
|
+
/**
|
|
1577
|
+
* Checks if an error indicates a rate limit issue.
|
|
1578
|
+
* @param error - The error to check
|
|
1579
|
+
* @returns true if the error indicates a rate limit
|
|
1580
|
+
*/
|
|
1581
|
+
export function isRateLimitError(error: unknown): boolean {
|
|
1582
|
+
if (!(error instanceof Error)) return false;
|
|
1583
|
+
|
|
1584
|
+
const message = error.message.toLowerCase();
|
|
1585
|
+
const name = error.name.toLowerCase();
|
|
1586
|
+
|
|
1587
|
+
return (
|
|
1588
|
+
message.includes('rate limit') ||
|
|
1589
|
+
message.includes('ratelimit') ||
|
|
1590
|
+
message.includes('too many requests') ||
|
|
1591
|
+
message.includes('429') ||
|
|
1592
|
+
name.includes('ratelimit')
|
|
1593
|
+
);
|
|
1594
|
+
}
|
|
1595
|
+
|
|
1376
1596
|
export const ModelNotFoundError = NamedError.create(
|
|
1377
1597
|
'ProviderModelNotFoundError',
|
|
1378
1598
|
z.object({
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
import { Log } from '../util/log';
|
|
2
|
+
import { Flag } from '../flag/flag';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Custom fetch wrapper that handles rate limits (HTTP 429) using time-based retry logic.
|
|
6
|
+
*
|
|
7
|
+
* This wrapper intercepts 429 responses at the HTTP level before the AI SDK's internal
|
|
8
|
+
* retry mechanism can interfere. It respects:
|
|
9
|
+
* - retry-after headers (both seconds and HTTP date formats)
|
|
10
|
+
* - retry-after-ms header for millisecond precision
|
|
11
|
+
* - AGENT_RETRY_TIMEOUT for global time-based retry limit
|
|
12
|
+
* - AGENT_MAX_RETRY_DELAY for maximum single retry wait time
|
|
13
|
+
*
|
|
14
|
+
* Problem solved:
|
|
15
|
+
* The AI SDK's internal retry uses a fixed count (default 3 attempts) and ignores
|
|
16
|
+
* retry-after headers. When providers return long retry-after values (e.g., 64 minutes),
|
|
17
|
+
* the SDK exhausts its retries before the agent can properly wait.
|
|
18
|
+
*
|
|
19
|
+
* Solution:
|
|
20
|
+
* By wrapping fetch, we handle rate limits at the HTTP layer with time-based retries,
|
|
21
|
+
* ensuring the agent's 7-week global timeout is respected.
|
|
22
|
+
*
|
|
23
|
+
* @see https://github.com/link-assistant/agent/issues/167
|
|
24
|
+
* @see https://github.com/vercel/ai/issues/12585
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
export namespace RetryFetch {
|
|
28
|
+
const log = Log.create({ service: 'retry-fetch' });
|
|
29
|
+
|
|
30
|
+
// Retry configuration constants matching SessionRetry
|
|
31
|
+
const RETRY_INITIAL_DELAY = 2000;
|
|
32
|
+
const RETRY_BACKOFF_FACTOR = 2;
|
|
33
|
+
const RETRY_MAX_DELAY_NO_HEADERS = 30_000;
|
|
34
|
+
|
|
35
|
+
// Minimum retry interval to prevent rapid retries (default: 30 seconds)
|
|
36
|
+
// Can be configured via AGENT_MIN_RETRY_INTERVAL env var
|
|
37
|
+
function getMinRetryInterval(): number {
|
|
38
|
+
return Flag.MIN_RETRY_INTERVAL();
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Add jitter to a delay value to prevent thundering herd.
|
|
43
|
+
* Adds 0-10% random variation to the delay.
|
|
44
|
+
*/
|
|
45
|
+
function addJitter(delay: number): number {
|
|
46
|
+
const jitter = Math.random() * 0.1 * delay;
|
|
47
|
+
return Math.round(delay + jitter);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Parse retry-after value from response headers and return delay in milliseconds.
|
|
52
|
+
* Returns null if no valid retry-after header is found.
|
|
53
|
+
*/
|
|
54
|
+
function parseRetryAfterHeader(headers: Headers): number | null {
|
|
55
|
+
// Check for retry-after-ms header first (milliseconds)
|
|
56
|
+
const retryAfterMs = headers.get('retry-after-ms');
|
|
57
|
+
if (retryAfterMs) {
|
|
58
|
+
const parsedMs = Number.parseFloat(retryAfterMs);
|
|
59
|
+
if (!Number.isNaN(parsedMs) && parsedMs > 0) {
|
|
60
|
+
log.info(() => ({
|
|
61
|
+
message: 'parsed retry-after-ms header',
|
|
62
|
+
headerValue: parsedMs,
|
|
63
|
+
}));
|
|
64
|
+
return parsedMs;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Check for retry-after header (seconds or HTTP date)
|
|
69
|
+
const retryAfter = headers.get('retry-after');
|
|
70
|
+
if (retryAfter) {
|
|
71
|
+
const parsedSeconds = Number.parseFloat(retryAfter);
|
|
72
|
+
if (!Number.isNaN(parsedSeconds) && parsedSeconds > 0) {
|
|
73
|
+
const delayMs = Math.ceil(parsedSeconds * 1000);
|
|
74
|
+
log.info(() => ({
|
|
75
|
+
message: 'parsed retry-after header (seconds)',
|
|
76
|
+
headerValue: parsedSeconds,
|
|
77
|
+
delayMs,
|
|
78
|
+
}));
|
|
79
|
+
return delayMs;
|
|
80
|
+
}
|
|
81
|
+
// Try parsing as HTTP date format
|
|
82
|
+
const parsed = Date.parse(retryAfter) - Date.now();
|
|
83
|
+
if (!Number.isNaN(parsed) && parsed > 0) {
|
|
84
|
+
log.info(() => ({
|
|
85
|
+
message: 'parsed retry-after header (date)',
|
|
86
|
+
headerValue: retryAfter,
|
|
87
|
+
delayMs: parsed,
|
|
88
|
+
}));
|
|
89
|
+
return Math.ceil(parsed);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Calculate retry delay based on headers and attempt number.
|
|
98
|
+
* Returns null if retry-after exceeds the global retry timeout.
|
|
99
|
+
*/
|
|
100
|
+
function calculateRetryDelay(
|
|
101
|
+
headers: Headers,
|
|
102
|
+
attempt: number,
|
|
103
|
+
maxRetryTimeout: number,
|
|
104
|
+
maxBackoffDelay: number
|
|
105
|
+
): number | null {
|
|
106
|
+
const retryAfterMs = parseRetryAfterHeader(headers);
|
|
107
|
+
const minInterval = getMinRetryInterval();
|
|
108
|
+
|
|
109
|
+
if (retryAfterMs !== null) {
|
|
110
|
+
// Check if retry-after exceeds the maximum retry timeout
|
|
111
|
+
if (retryAfterMs > maxRetryTimeout) {
|
|
112
|
+
log.error(() => ({
|
|
113
|
+
message:
|
|
114
|
+
'retry-after exceeds maximum retry timeout, will not retry at fetch level',
|
|
115
|
+
retryAfterMs,
|
|
116
|
+
maxRetryTimeout,
|
|
117
|
+
retryAfterHours: (retryAfterMs / 1000 / 3600).toFixed(2),
|
|
118
|
+
maxRetryTimeoutHours: (maxRetryTimeout / 1000 / 3600).toFixed(2),
|
|
119
|
+
}));
|
|
120
|
+
return null;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Use exact retry-after time, but ensure minimum interval
|
|
124
|
+
const delay = Math.max(retryAfterMs, minInterval);
|
|
125
|
+
log.info(() => ({
|
|
126
|
+
message: 'using retry-after value',
|
|
127
|
+
retryAfterMs,
|
|
128
|
+
delay,
|
|
129
|
+
minInterval,
|
|
130
|
+
}));
|
|
131
|
+
return addJitter(delay);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// No retry-after header - use exponential backoff
|
|
135
|
+
const backoffDelay = Math.min(
|
|
136
|
+
RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1),
|
|
137
|
+
maxBackoffDelay
|
|
138
|
+
);
|
|
139
|
+
const delay = Math.max(backoffDelay, minInterval);
|
|
140
|
+
log.info(() => ({
|
|
141
|
+
message: 'no retry-after header, using exponential backoff',
|
|
142
|
+
attempt,
|
|
143
|
+
backoffDelay,
|
|
144
|
+
delay,
|
|
145
|
+
minInterval,
|
|
146
|
+
maxBackoffDelay,
|
|
147
|
+
}));
|
|
148
|
+
return addJitter(delay);
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Sleep for the specified duration, but respect abort signals.
|
|
153
|
+
*/
|
|
154
|
+
async function sleep(ms: number, signal?: AbortSignal): Promise<void> {
|
|
155
|
+
return new Promise((resolve, reject) => {
|
|
156
|
+
const timeout = setTimeout(resolve, ms);
|
|
157
|
+
if (signal) {
|
|
158
|
+
signal.addEventListener(
|
|
159
|
+
'abort',
|
|
160
|
+
() => {
|
|
161
|
+
clearTimeout(timeout);
|
|
162
|
+
reject(new DOMException('Aborted', 'AbortError'));
|
|
163
|
+
},
|
|
164
|
+
{ once: true }
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Check if an error is retryable (network issues, temporary failures).
|
|
172
|
+
*/
|
|
173
|
+
function isRetryableError(error: unknown): boolean {
|
|
174
|
+
if (!(error instanceof Error)) return false;
|
|
175
|
+
|
|
176
|
+
// Socket/connection errors (Bun has known timeout issues)
|
|
177
|
+
// See: https://github.com/oven-sh/bun/issues/14439
|
|
178
|
+
if (
|
|
179
|
+
error.message.includes('ConnectionClosed') ||
|
|
180
|
+
error.message.includes('ECONNRESET') ||
|
|
181
|
+
error.message.includes('ECONNREFUSED') ||
|
|
182
|
+
error.message.includes('socket') ||
|
|
183
|
+
error.message.includes('connection')
|
|
184
|
+
) {
|
|
185
|
+
return true;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
return false;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
export type RetryFetchOptions = {
|
|
192
|
+
/**
|
|
193
|
+
* Original fetch function to wrap. Defaults to global fetch.
|
|
194
|
+
*/
|
|
195
|
+
baseFetch?: typeof fetch;
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Session ID for logging purposes.
|
|
199
|
+
*/
|
|
200
|
+
sessionID?: string;
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* Create a fetch function that handles rate limits with time-based retry logic.
|
|
205
|
+
*
|
|
206
|
+
* This wrapper:
|
|
207
|
+
* 1. Intercepts HTTP 429 responses
|
|
208
|
+
* 2. Parses retry-after headers
|
|
209
|
+
* 3. Waits for the specified duration (respecting global timeout)
|
|
210
|
+
* 4. Retries the request
|
|
211
|
+
*
|
|
212
|
+
* If retry-after exceeds AGENT_RETRY_TIMEOUT, the original 429 response is returned
|
|
213
|
+
* to let higher-level error handling take over.
|
|
214
|
+
*
|
|
215
|
+
* @param options Configuration options
|
|
216
|
+
* @returns A fetch function with rate limit retry handling
|
|
217
|
+
*/
|
|
218
|
+
export function create(options: RetryFetchOptions = {}): typeof fetch {
|
|
219
|
+
const baseFetch = options.baseFetch ?? fetch;
|
|
220
|
+
const sessionID = options.sessionID ?? 'unknown';
|
|
221
|
+
|
|
222
|
+
return async function retryFetch(
|
|
223
|
+
input: RequestInfo | URL,
|
|
224
|
+
init?: RequestInit
|
|
225
|
+
): Promise<Response> {
|
|
226
|
+
let attempt = 0;
|
|
227
|
+
const startTime = Date.now();
|
|
228
|
+
const maxRetryTimeout = Flag.RETRY_TIMEOUT() * 1000;
|
|
229
|
+
const maxBackoffDelay = Flag.MAX_RETRY_DELAY();
|
|
230
|
+
|
|
231
|
+
while (true) {
|
|
232
|
+
attempt++;
|
|
233
|
+
let response: Response;
|
|
234
|
+
|
|
235
|
+
try {
|
|
236
|
+
response = await baseFetch(input, init);
|
|
237
|
+
} catch (error) {
|
|
238
|
+
// Check if it's a retryable network error
|
|
239
|
+
if (isRetryableError(error)) {
|
|
240
|
+
const elapsed = Date.now() - startTime;
|
|
241
|
+
if (elapsed >= maxRetryTimeout) {
|
|
242
|
+
log.warn(() => ({
|
|
243
|
+
message:
|
|
244
|
+
'network error retry timeout exceeded, re-throwing error',
|
|
245
|
+
sessionID,
|
|
246
|
+
elapsed,
|
|
247
|
+
maxRetryTimeout,
|
|
248
|
+
error: (error as Error).message,
|
|
249
|
+
}));
|
|
250
|
+
throw error;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Use exponential backoff for network errors
|
|
254
|
+
const delay = Math.min(
|
|
255
|
+
2000 * Math.pow(2, attempt - 1),
|
|
256
|
+
maxBackoffDelay
|
|
257
|
+
);
|
|
258
|
+
log.info(() => ({
|
|
259
|
+
message: 'network error, retrying',
|
|
260
|
+
sessionID,
|
|
261
|
+
attempt,
|
|
262
|
+
delay,
|
|
263
|
+
error: (error as Error).message,
|
|
264
|
+
}));
|
|
265
|
+
await sleep(delay, init?.signal ?? undefined);
|
|
266
|
+
continue;
|
|
267
|
+
}
|
|
268
|
+
throw error;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// Only handle rate limit errors (429)
|
|
272
|
+
if (response.status !== 429) {
|
|
273
|
+
return response;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
// Check if we're within the global retry timeout
|
|
277
|
+
const elapsed = Date.now() - startTime;
|
|
278
|
+
if (elapsed >= maxRetryTimeout) {
|
|
279
|
+
log.warn(() => ({
|
|
280
|
+
message: 'retry timeout exceeded in fetch wrapper, returning 429',
|
|
281
|
+
sessionID,
|
|
282
|
+
elapsed,
|
|
283
|
+
maxRetryTimeout,
|
|
284
|
+
}));
|
|
285
|
+
return response; // Let higher-level handling take over
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Calculate retry delay
|
|
289
|
+
const delay = calculateRetryDelay(
|
|
290
|
+
response.headers,
|
|
291
|
+
attempt,
|
|
292
|
+
maxRetryTimeout - elapsed, // Remaining time
|
|
293
|
+
maxBackoffDelay
|
|
294
|
+
);
|
|
295
|
+
|
|
296
|
+
// If delay is null, retry-after exceeds timeout - return response
|
|
297
|
+
if (delay === null) {
|
|
298
|
+
log.warn(() => ({
|
|
299
|
+
message:
|
|
300
|
+
'retry-after exceeds remaining timeout, returning 429 response',
|
|
301
|
+
sessionID,
|
|
302
|
+
elapsed,
|
|
303
|
+
remainingTimeout: maxRetryTimeout - elapsed,
|
|
304
|
+
}));
|
|
305
|
+
return response;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
// Check if delay would exceed remaining timeout
|
|
309
|
+
if (elapsed + delay >= maxRetryTimeout) {
|
|
310
|
+
log.warn(() => ({
|
|
311
|
+
message: 'delay would exceed retry timeout, returning 429 response',
|
|
312
|
+
sessionID,
|
|
313
|
+
elapsed,
|
|
314
|
+
delay,
|
|
315
|
+
maxRetryTimeout,
|
|
316
|
+
}));
|
|
317
|
+
return response;
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
log.info(() => ({
|
|
321
|
+
message: 'rate limited, will retry',
|
|
322
|
+
sessionID,
|
|
323
|
+
attempt,
|
|
324
|
+
delay,
|
|
325
|
+
delayMinutes: (delay / 1000 / 60).toFixed(2),
|
|
326
|
+
elapsed,
|
|
327
|
+
remainingTimeout: maxRetryTimeout - elapsed,
|
|
328
|
+
}));
|
|
329
|
+
|
|
330
|
+
// Wait before retrying
|
|
331
|
+
try {
|
|
332
|
+
await sleep(delay, init?.signal ?? undefined);
|
|
333
|
+
} catch {
|
|
334
|
+
// Aborted - return the last response
|
|
335
|
+
log.info(() => ({
|
|
336
|
+
message: 'retry sleep aborted, returning last response',
|
|
337
|
+
sessionID,
|
|
338
|
+
}));
|
|
339
|
+
return response;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
};
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Wrap an existing custom fetch (e.g., OAuth fetch) with retry logic.
|
|
347
|
+
*
|
|
348
|
+
* This allows composing multiple fetch wrappers while maintaining retry handling.
|
|
349
|
+
*
|
|
350
|
+
* @param customFetch The custom fetch function to wrap
|
|
351
|
+
* @param options Configuration options
|
|
352
|
+
* @returns A fetch function with both custom logic and retry handling
|
|
353
|
+
*/
|
|
354
|
+
export function wrap(
|
|
355
|
+
customFetch: typeof fetch,
|
|
356
|
+
options: Omit<RetryFetchOptions, 'baseFetch'> = {}
|
|
357
|
+
): typeof fetch {
|
|
358
|
+
return create({
|
|
359
|
+
...options,
|
|
360
|
+
baseFetch: customFetch,
|
|
361
|
+
});
|
|
362
|
+
}
|
|
363
|
+
}
|
package/src/session/processor.ts
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
import type { ModelsDev } from '../provider/models';
|
|
2
2
|
import { MessageV2 } from './message-v2';
|
|
3
|
-
import {
|
|
3
|
+
import {
|
|
4
|
+
type StreamTextResult,
|
|
5
|
+
type Tool as AITool,
|
|
6
|
+
APICallError,
|
|
7
|
+
JSONParseError,
|
|
8
|
+
} from 'ai';
|
|
4
9
|
import { Log } from '../util/log';
|
|
5
10
|
import { Identifier } from '../id/id';
|
|
6
11
|
import { Session } from '.';
|
|
@@ -205,6 +210,22 @@ export namespace SessionProcessor {
|
|
|
205
210
|
break;
|
|
206
211
|
}
|
|
207
212
|
case 'error':
|
|
213
|
+
// Skip stream parse errors (malformed SSE from gateway/provider)
|
|
214
|
+
// The AI SDK emits these as error events but continues the stream.
|
|
215
|
+
// Following OpenAI Codex pattern: log and skip bad events.
|
|
216
|
+
// See: https://github.com/link-assistant/agent/issues/169
|
|
217
|
+
if (JSONParseError.isInstance(value.error)) {
|
|
218
|
+
log.warn(() => ({
|
|
219
|
+
message:
|
|
220
|
+
'skipping malformed SSE event (stream parse error)',
|
|
221
|
+
errorName: (value.error as Error)?.name,
|
|
222
|
+
errorMessage: (value.error as Error)?.message?.substring(
|
|
223
|
+
0,
|
|
224
|
+
200
|
|
225
|
+
),
|
|
226
|
+
}));
|
|
227
|
+
continue;
|
|
228
|
+
}
|
|
208
229
|
throw value.error;
|
|
209
230
|
|
|
210
231
|
case 'start-step':
|
|
@@ -364,7 +385,7 @@ export namespace SessionProcessor {
|
|
|
364
385
|
providerID: input.providerID,
|
|
365
386
|
});
|
|
366
387
|
|
|
367
|
-
// Check if error is retryable (APIError, SocketConnectionError,
|
|
388
|
+
// Check if error is retryable (APIError, SocketConnectionError, TimeoutError)
|
|
368
389
|
const isRetryableAPIError =
|
|
369
390
|
error?.name === 'APIError' && error.data.isRetryable;
|
|
370
391
|
const isRetryableSocketError =
|
package/src/session/prompt.ts
CHANGED
|
@@ -298,18 +298,18 @@ export namespace SessionPrompt {
|
|
|
298
298
|
lastUser.model.modelID
|
|
299
299
|
);
|
|
300
300
|
} catch (error) {
|
|
301
|
-
|
|
301
|
+
// When an explicit provider is specified, do NOT silently fall back to default
|
|
302
|
+
// This ensures user's explicit choice is respected
|
|
303
|
+
// If the user wants a fallback, they should not specify a provider
|
|
304
|
+
log.error(() => ({
|
|
302
305
|
message:
|
|
303
|
-
'Failed to initialize specified model
|
|
306
|
+
'Failed to initialize specified model - NOT falling back to default (explicit provider specified)',
|
|
304
307
|
providerID: lastUser.model.providerID,
|
|
305
308
|
modelID: lastUser.model.modelID,
|
|
306
309
|
error: error instanceof Error ? error.message : String(error),
|
|
307
310
|
}));
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
defaultModel.providerID,
|
|
311
|
-
defaultModel.modelID
|
|
312
|
-
);
|
|
311
|
+
// Re-throw the error so it can be handled by the caller
|
|
312
|
+
throw error;
|
|
313
313
|
}
|
|
314
314
|
const task = tasks.pop();
|
|
315
315
|
|