@link-assistant/agent 0.12.0 → 0.12.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/agent",
3
- "version": "0.12.0",
3
+ "version": "0.12.3",
4
4
  "description": "A minimal, public domain AI CLI agent compatible with OpenCode's JSON interface. Bun-only runtime.",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/flag/flag.ts CHANGED
@@ -98,6 +98,17 @@ export namespace Flag {
98
98
  return val ? parseInt(val, 10) * 1000 : 1200000; // 20 minutes in ms
99
99
  }
100
100
 
101
+ // Minimum retry interval to prevent rapid retries (default: 30 seconds)
102
+ // This ensures we don't hammer the API with rapid retry attempts
103
+ // See: https://github.com/link-assistant/agent/issues/167
104
+ export function MIN_RETRY_INTERVAL(): number {
105
+ const val = getEnv(
106
+ 'LINK_ASSISTANT_AGENT_MIN_RETRY_INTERVAL',
107
+ 'AGENT_MIN_RETRY_INTERVAL'
108
+ );
109
+ return val ? parseInt(val, 10) * 1000 : 30000; // 30 seconds in ms
110
+ }
111
+
101
112
  // Stream timeout configuration
102
113
  // chunkMs: timeout between stream chunks - detects stalled streams (default: 2 minutes)
103
114
  // stepMs: timeout for each individual LLM step (default: 10 minutes)
package/src/index.js CHANGED
@@ -142,14 +142,56 @@ function readStdinWithTimeout(timeout = null) {
142
142
 
143
143
  /**
144
144
  * Parse model configuration from argv
145
+ * Supports both explicit provider/model format and short model names.
146
+ *
147
+ * Format examples:
148
+ * - "kilo/glm-5-free" -> uses kilo provider with glm-5-free model (explicit)
149
+ * - "opencode/kimi-k2.5-free" -> uses opencode provider (explicit)
150
+ * - "glm-5-free" -> resolved to kilo provider (unique free model)
151
+ * - "kimi-k2.5-free" -> resolved to opencode provider (shared model, opencode preferred)
152
+ *
145
153
  * @param {object} argv - Command line arguments
146
154
  * @returns {object} - { providerID, modelID }
147
155
  */
148
156
  async function parseModelConfig(argv) {
149
- // Parse model argument (handle model IDs with slashes like groq/qwen/qwen3-32b)
150
- const modelParts = argv.model.split('/');
151
- let providerID = modelParts[0] || 'opencode';
152
- let modelID = modelParts.slice(1).join('/') || 'kimi-k2.5-free';
157
+ const modelArg = argv.model;
158
+
159
+ let providerID;
160
+ let modelID;
161
+
162
+ // Check if model includes explicit provider prefix
163
+ if (modelArg.includes('/')) {
164
+ // Explicit provider/model format - respect user's choice
165
+ const modelParts = modelArg.split('/');
166
+ providerID = modelParts[0];
167
+ modelID = modelParts.slice(1).join('/');
168
+
169
+ // Validate that providerID and modelID are not empty
170
+ if (!providerID || !modelID) {
171
+ providerID = providerID || 'opencode';
172
+ modelID = modelID || 'kimi-k2.5-free';
173
+ }
174
+
175
+ Log.Default.info(() => ({
176
+ message: 'using explicit provider/model',
177
+ providerID,
178
+ modelID,
179
+ }));
180
+ } else {
181
+ // Short model name - resolve to appropriate provider
182
+ // Import Provider to use parseModelWithResolution
183
+ const { Provider } = await import('./provider/provider.ts');
184
+ const resolved = await Provider.parseModelWithResolution(modelArg);
185
+ providerID = resolved.providerID;
186
+ modelID = resolved.modelID;
187
+
188
+ Log.Default.info(() => ({
189
+ message: 'resolved short model name',
190
+ input: modelArg,
191
+ providerID,
192
+ modelID,
193
+ }));
194
+ }
153
195
 
154
196
  // Handle --use-existing-claude-oauth option
155
197
  // This reads OAuth credentials from ~/.claude/.credentials.json (Claude Code CLI)
@@ -175,22 +217,22 @@ async function parseModelConfig(argv) {
175
217
  // Set environment variable for the provider to use
176
218
  process.env.CLAUDE_CODE_OAUTH_TOKEN = creds.accessToken;
177
219
 
178
- // If user specified a model, use it with claude-oauth provider
179
- // If not, use claude-oauth/claude-sonnet-4-5 as default
220
+ // If user specified the default model (opencode/kimi-k2.5-free), switch to claude-oauth
221
+ // If user explicitly specified kilo or another provider, warn but respect their choice
180
222
  if (providerID === 'opencode' && modelID === 'kimi-k2.5-free') {
181
223
  providerID = 'claude-oauth';
182
224
  modelID = 'claude-sonnet-4-5';
183
225
  } else if (!['claude-oauth', 'anthropic'].includes(providerID)) {
184
- // If user specified a different provider, warn them
226
+ // If user specified a different provider explicitly, warn them
185
227
  const compactJson = argv['compact-json'] === true;
186
228
  outputStatus(
187
229
  {
188
230
  type: 'warning',
189
- message: `--use-existing-claude-oauth is set but model uses provider "${providerID}". Using OAuth credentials anyway.`,
231
+ message: `--use-existing-claude-oauth is set but model uses provider "${providerID}". Using specified provider.`,
190
232
  },
191
233
  compactJson
192
234
  );
193
- providerID = 'claude-oauth';
235
+ // Don't override - respect user's explicit provider choice
194
236
  }
195
237
  }
196
238
 
@@ -257,8 +299,6 @@ async function runAgentMode(argv, request) {
257
299
  }));
258
300
  }
259
301
 
260
- const { providerID, modelID } = await parseModelConfig(argv);
261
-
262
302
  // Validate and get JSON standard
263
303
  const jsonStandard = argv['json-standard'];
264
304
  if (!isValidJsonStandard(jsonStandard)) {
@@ -275,9 +315,14 @@ async function runAgentMode(argv, request) {
275
315
  // Logging is already initialized in middleware, no need to call Log.init() again
276
316
 
277
317
  // Wrap in Instance.provide for OpenCode infrastructure
318
+ // parseModelConfig must be called inside Instance.provide to access provider state
278
319
  await Instance.provide({
279
320
  directory: process.cwd(),
280
321
  fn: async () => {
322
+ // Parse model config inside Instance.provide context
323
+ // This allows parseModelWithResolution to access the provider state
324
+ const { providerID, modelID } = await parseModelConfig(argv);
325
+
281
326
  if (argv.server) {
282
327
  // SERVER MODE: Start server and communicate via HTTP
283
328
  await runServerMode(
@@ -330,8 +375,6 @@ async function runContinuousAgentMode(argv) {
330
375
  }));
331
376
  }
332
377
 
333
- const { providerID, modelID } = await parseModelConfig(argv);
334
-
335
378
  // Validate and get JSON standard
336
379
  const jsonStandard = argv['json-standard'];
337
380
  if (!isValidJsonStandard(jsonStandard)) {
@@ -348,9 +391,14 @@ async function runContinuousAgentMode(argv) {
348
391
  const { systemMessage, appendSystemMessage } = await readSystemMessages(argv);
349
392
 
350
393
  // Wrap in Instance.provide for OpenCode infrastructure
394
+ // parseModelConfig must be called inside Instance.provide to access provider state
351
395
  await Instance.provide({
352
396
  directory: process.cwd(),
353
397
  fn: async () => {
398
+ // Parse model config inside Instance.provide context
399
+ // This allows parseModelWithResolution to access the provider state
400
+ const { providerID, modelID } = await parseModelConfig(argv);
401
+
354
402
  if (argv.server) {
355
403
  // SERVER MODE: Start server and communicate via HTTP
356
404
  await runContinuousServerMode(
@@ -16,6 +16,7 @@ import { Flag } from '../flag/flag';
16
16
  import { iife } from '../util/iife';
17
17
  import { createEchoModel } from './echo';
18
18
  import { createCacheModel } from './cache';
19
+ import { RetryFetch } from './retry-fetch';
19
20
 
20
21
  export namespace Provider {
21
22
  const log = Log.create({ service: 'provider' });
@@ -1143,6 +1144,16 @@ export namespace Provider {
1143
1144
  });
1144
1145
  };
1145
1146
  }
1147
+
1148
+ // Wrap fetch with retry logic for rate limit handling (HTTP 429)
1149
+ // This ensures the agent's time-based retry (7-week timeout) is respected
1150
+ // instead of the AI SDK's fixed retry count (3 attempts)
1151
+ // See: https://github.com/link-assistant/agent/issues/167
1152
+ const existingFetch = options['fetch'] ?? fetch;
1153
+ options['fetch'] = RetryFetch.wrap(existingFetch, {
1154
+ sessionID: provider.id,
1155
+ });
1156
+
1146
1157
  const fn = mod[Object.keys(mod).find((key) => key.startsWith('create'))!];
1147
1158
  const loaded = fn({
1148
1159
  name: provider.id,
@@ -1373,6 +1384,215 @@ export namespace Provider {
1373
1384
  };
1374
1385
  }
1375
1386
 
1387
+ /**
1388
+ * Resolve a short model name (without provider prefix) to the appropriate provider.
1389
+ * This function finds which provider should handle a model when no explicit provider is specified.
1390
+ *
1391
+ * Priority for free models:
1392
+ * 1. If model is uniquely available in one provider, use that provider
1393
+ * 2. If model is available in multiple providers, prioritize based on free model availability:
1394
+ * - kilo: glm-5-free, glm-4.7-free, minimax-m2.1-free, giga-potato-free (unique to Kilo)
1395
+ * - opencode: minimax-m2.5-free, big-pickle, gpt-5-nano (unique to OpenCode)
1396
+ * - SHARED: kimi-k2.5-free (available in both)
1397
+ * 3. For shared models like kimi-k2.5-free, prefer OpenCode first, then fall back to Kilo on rate limit
1398
+ *
1399
+ * @param modelID - Short model name without provider prefix
1400
+ * @returns Provider ID that should handle this model, or undefined if not found
1401
+ */
1402
+ export async function resolveShortModelName(
1403
+ modelID: string
1404
+ ): Promise<{ providerID: string; modelID: string } | undefined> {
1405
+ const s = await state();
1406
+
1407
+ // Define model-to-provider mappings for free models
1408
+ // Models unique to Kilo (GLM models from Z.AI are only free on Kilo)
1409
+ const kiloUniqueModels = [
1410
+ 'glm-5-free',
1411
+ 'glm-4.7-free',
1412
+ 'giga-potato-free',
1413
+ 'trinity-large-preview',
1414
+ ];
1415
+
1416
+ // Check if it's a Kilo-unique model
1417
+ if (kiloUniqueModels.includes(modelID)) {
1418
+ const kiloProvider = s.providers['kilo'];
1419
+ if (kiloProvider && kiloProvider.info.models[modelID]) {
1420
+ log.info(() => ({
1421
+ message: 'resolved short model name to kilo (unique)',
1422
+ modelID,
1423
+ }));
1424
+ return { providerID: 'kilo', modelID };
1425
+ }
1426
+ }
1427
+
1428
+ // Check if model exists in any provider
1429
+ const matchingProviders: string[] = [];
1430
+ for (const [providerID, provider] of Object.entries(s.providers)) {
1431
+ if (provider.info.models[modelID]) {
1432
+ matchingProviders.push(providerID);
1433
+ }
1434
+ }
1435
+
1436
+ if (matchingProviders.length === 0) {
1437
+ return undefined;
1438
+ }
1439
+
1440
+ if (matchingProviders.length === 1) {
1441
+ const providerID = matchingProviders[0];
1442
+ log.info(() => ({
1443
+ message: 'resolved short model name (single match)',
1444
+ modelID,
1445
+ providerID,
1446
+ }));
1447
+ return { providerID, modelID };
1448
+ }
1449
+
1450
+ // Multiple providers have this model - prefer OpenCode for shared free models
1451
+ // This follows the convention that opencode is the primary free provider
1452
+ if (matchingProviders.includes('opencode')) {
1453
+ log.info(() => ({
1454
+ message: 'resolved short model name to opencode (multiple providers)',
1455
+ modelID,
1456
+ availableProviders: matchingProviders,
1457
+ }));
1458
+ return { providerID: 'opencode', modelID };
1459
+ }
1460
+
1461
+ // Fallback to first matching provider
1462
+ const providerID = matchingProviders[0];
1463
+ log.info(() => ({
1464
+ message: 'resolved short model name (fallback)',
1465
+ modelID,
1466
+ providerID,
1467
+ availableProviders: matchingProviders,
1468
+ }));
1469
+ return { providerID, modelID };
1470
+ }
1471
+
1472
+ /**
1473
+ * Parse a model string that may or may not include a provider prefix.
1474
+ * If no provider is specified, attempts to resolve the short model name to the appropriate provider.
1475
+ *
1476
+ * Examples:
1477
+ * - "kilo/glm-5-free" -> { providerID: "kilo", modelID: "glm-5-free" }
1478
+ * - "glm-5-free" -> { providerID: "kilo", modelID: "glm-5-free" } (resolved)
1479
+ * - "kimi-k2.5-free" -> { providerID: "opencode", modelID: "kimi-k2.5-free" } (resolved)
1480
+ *
1481
+ * @param model - Model string with or without provider prefix
1482
+ * @returns Parsed provider ID and model ID
1483
+ */
1484
+ export async function parseModelWithResolution(
1485
+ model: string
1486
+ ): Promise<{ providerID: string; modelID: string }> {
1487
+ // Check if model includes a provider prefix
1488
+ if (model.includes('/')) {
1489
+ // Explicit provider specified - use it directly
1490
+ return parseModel(model);
1491
+ }
1492
+
1493
+ // No provider prefix - try to resolve the short model name
1494
+ const resolved = await resolveShortModelName(model);
1495
+ if (resolved) {
1496
+ return resolved;
1497
+ }
1498
+
1499
+ // Unable to resolve - fall back to default behavior (opencode provider)
1500
+ log.warn(() => ({
1501
+ message: 'unable to resolve short model name, using opencode as default',
1502
+ modelID: model,
1503
+ }));
1504
+ return {
1505
+ providerID: 'opencode',
1506
+ modelID: model,
1507
+ };
1508
+ }
1509
+
1510
+ /**
1511
+ * Defines models that are available in multiple free providers.
1512
+ * When one provider hits rate limits, the system can try an alternative.
1513
+ *
1514
+ * Note: This is only used for models without explicit provider specification.
1515
+ * If user specifies "kilo/kimi-k2.5-free", no fallback will occur.
1516
+ */
1517
+ const SHARED_FREE_MODELS: Record<string, string[]> = {
1518
+ // kimi-k2.5-free is available in both OpenCode and Kilo
1519
+ 'kimi-k2.5-free': ['opencode', 'kilo'],
1520
+ // Note: minimax-m2.1-free is Kilo only, minimax-m2.5-free is OpenCode only
1521
+ // They are different model versions, not shared
1522
+ };
1523
+
1524
+ /**
1525
+ * Get alternative providers for a model when the primary provider fails (e.g., rate limited).
1526
+ * This function returns a list of alternative providers that offer the same model.
1527
+ *
1528
+ * Note: This only returns alternatives for models without explicit provider specification.
1529
+ * If the original request had an explicit provider (like "kilo/kimi-k2.5-free"), this returns empty array.
1530
+ *
1531
+ * @param modelID - The model ID to find alternatives for
1532
+ * @param failedProviderID - The provider that failed
1533
+ * @param wasExplicitProvider - Whether the user explicitly specified the provider
1534
+ * @returns Array of alternative provider IDs that can serve this model
1535
+ */
1536
+ export async function getAlternativeProviders(
1537
+ modelID: string,
1538
+ failedProviderID: string,
1539
+ wasExplicitProvider: boolean
1540
+ ): Promise<string[]> {
1541
+ // If the user explicitly specified a provider, don't offer alternatives
1542
+ if (wasExplicitProvider) {
1543
+ log.info(() => ({
1544
+ message: 'no alternative providers (explicit provider specified)',
1545
+ modelID,
1546
+ failedProviderID,
1547
+ }));
1548
+ return [];
1549
+ }
1550
+
1551
+ // Check if this is a shared model
1552
+ const sharedProviders = SHARED_FREE_MODELS[modelID];
1553
+ if (!sharedProviders) {
1554
+ // Not a shared model, no alternatives
1555
+ return [];
1556
+ }
1557
+
1558
+ // Get alternative providers (excluding the failed one)
1559
+ const s = await state();
1560
+ const alternatives = sharedProviders.filter(
1561
+ (p) => p !== failedProviderID && s.providers[p]
1562
+ );
1563
+
1564
+ if (alternatives.length > 0) {
1565
+ log.info(() => ({
1566
+ message: 'found alternative providers for rate-limited model',
1567
+ modelID,
1568
+ failedProviderID,
1569
+ alternatives,
1570
+ }));
1571
+ }
1572
+
1573
+ return alternatives;
1574
+ }
1575
+
1576
+ /**
1577
+ * Checks if an error indicates a rate limit issue.
1578
+ * @param error - The error to check
1579
+ * @returns true if the error indicates a rate limit
1580
+ */
1581
+ export function isRateLimitError(error: unknown): boolean {
1582
+ if (!(error instanceof Error)) return false;
1583
+
1584
+ const message = error.message.toLowerCase();
1585
+ const name = error.name.toLowerCase();
1586
+
1587
+ return (
1588
+ message.includes('rate limit') ||
1589
+ message.includes('ratelimit') ||
1590
+ message.includes('too many requests') ||
1591
+ message.includes('429') ||
1592
+ name.includes('ratelimit')
1593
+ );
1594
+ }
1595
+
1376
1596
  export const ModelNotFoundError = NamedError.create(
1377
1597
  'ProviderModelNotFoundError',
1378
1598
  z.object({
@@ -0,0 +1,363 @@
1
+ import { Log } from '../util/log';
2
+ import { Flag } from '../flag/flag';
3
+
4
+ /**
5
+ * Custom fetch wrapper that handles rate limits (HTTP 429) using time-based retry logic.
6
+ *
7
+ * This wrapper intercepts 429 responses at the HTTP level before the AI SDK's internal
8
+ * retry mechanism can interfere. It respects:
9
+ * - retry-after headers (both seconds and HTTP date formats)
10
+ * - retry-after-ms header for millisecond precision
11
+ * - AGENT_RETRY_TIMEOUT for global time-based retry limit
12
+ * - AGENT_MAX_RETRY_DELAY for maximum single retry wait time
13
+ *
14
+ * Problem solved:
15
+ * The AI SDK's internal retry uses a fixed count (default 3 attempts) and ignores
16
+ * retry-after headers. When providers return long retry-after values (e.g., 64 minutes),
17
+ * the SDK exhausts its retries before the agent can properly wait.
18
+ *
19
+ * Solution:
20
+ * By wrapping fetch, we handle rate limits at the HTTP layer with time-based retries,
21
+ * ensuring the agent's 7-week global timeout is respected.
22
+ *
23
+ * @see https://github.com/link-assistant/agent/issues/167
24
+ * @see https://github.com/vercel/ai/issues/12585
25
+ */
26
+
27
+ export namespace RetryFetch {
28
+ const log = Log.create({ service: 'retry-fetch' });
29
+
30
+ // Retry configuration constants matching SessionRetry
31
+ const RETRY_INITIAL_DELAY = 2000;
32
+ const RETRY_BACKOFF_FACTOR = 2;
33
+ const RETRY_MAX_DELAY_NO_HEADERS = 30_000;
34
+
35
+ // Minimum retry interval to prevent rapid retries (default: 30 seconds)
36
+ // Can be configured via AGENT_MIN_RETRY_INTERVAL env var
37
+ function getMinRetryInterval(): number {
38
+ return Flag.MIN_RETRY_INTERVAL();
39
+ }
40
+
41
+ /**
42
+ * Add jitter to a delay value to prevent thundering herd.
43
+ * Adds 0-10% random variation to the delay.
44
+ */
45
+ function addJitter(delay: number): number {
46
+ const jitter = Math.random() * 0.1 * delay;
47
+ return Math.round(delay + jitter);
48
+ }
49
+
50
+ /**
51
+ * Parse retry-after value from response headers and return delay in milliseconds.
52
+ * Returns null if no valid retry-after header is found.
53
+ */
54
+ function parseRetryAfterHeader(headers: Headers): number | null {
55
+ // Check for retry-after-ms header first (milliseconds)
56
+ const retryAfterMs = headers.get('retry-after-ms');
57
+ if (retryAfterMs) {
58
+ const parsedMs = Number.parseFloat(retryAfterMs);
59
+ if (!Number.isNaN(parsedMs) && parsedMs > 0) {
60
+ log.info(() => ({
61
+ message: 'parsed retry-after-ms header',
62
+ headerValue: parsedMs,
63
+ }));
64
+ return parsedMs;
65
+ }
66
+ }
67
+
68
+ // Check for retry-after header (seconds or HTTP date)
69
+ const retryAfter = headers.get('retry-after');
70
+ if (retryAfter) {
71
+ const parsedSeconds = Number.parseFloat(retryAfter);
72
+ if (!Number.isNaN(parsedSeconds) && parsedSeconds > 0) {
73
+ const delayMs = Math.ceil(parsedSeconds * 1000);
74
+ log.info(() => ({
75
+ message: 'parsed retry-after header (seconds)',
76
+ headerValue: parsedSeconds,
77
+ delayMs,
78
+ }));
79
+ return delayMs;
80
+ }
81
+ // Try parsing as HTTP date format
82
+ const parsed = Date.parse(retryAfter) - Date.now();
83
+ if (!Number.isNaN(parsed) && parsed > 0) {
84
+ log.info(() => ({
85
+ message: 'parsed retry-after header (date)',
86
+ headerValue: retryAfter,
87
+ delayMs: parsed,
88
+ }));
89
+ return Math.ceil(parsed);
90
+ }
91
+ }
92
+
93
+ return null;
94
+ }
95
+
96
+ /**
97
+ * Calculate retry delay based on headers and attempt number.
98
+ * Returns null if retry-after exceeds the global retry timeout.
99
+ */
100
+ function calculateRetryDelay(
101
+ headers: Headers,
102
+ attempt: number,
103
+ maxRetryTimeout: number,
104
+ maxBackoffDelay: number
105
+ ): number | null {
106
+ const retryAfterMs = parseRetryAfterHeader(headers);
107
+ const minInterval = getMinRetryInterval();
108
+
109
+ if (retryAfterMs !== null) {
110
+ // Check if retry-after exceeds the maximum retry timeout
111
+ if (retryAfterMs > maxRetryTimeout) {
112
+ log.error(() => ({
113
+ message:
114
+ 'retry-after exceeds maximum retry timeout, will not retry at fetch level',
115
+ retryAfterMs,
116
+ maxRetryTimeout,
117
+ retryAfterHours: (retryAfterMs / 1000 / 3600).toFixed(2),
118
+ maxRetryTimeoutHours: (maxRetryTimeout / 1000 / 3600).toFixed(2),
119
+ }));
120
+ return null;
121
+ }
122
+
123
+ // Use exact retry-after time, but ensure minimum interval
124
+ const delay = Math.max(retryAfterMs, minInterval);
125
+ log.info(() => ({
126
+ message: 'using retry-after value',
127
+ retryAfterMs,
128
+ delay,
129
+ minInterval,
130
+ }));
131
+ return addJitter(delay);
132
+ }
133
+
134
+ // No retry-after header - use exponential backoff
135
+ const backoffDelay = Math.min(
136
+ RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1),
137
+ maxBackoffDelay
138
+ );
139
+ const delay = Math.max(backoffDelay, minInterval);
140
+ log.info(() => ({
141
+ message: 'no retry-after header, using exponential backoff',
142
+ attempt,
143
+ backoffDelay,
144
+ delay,
145
+ minInterval,
146
+ maxBackoffDelay,
147
+ }));
148
+ return addJitter(delay);
149
+ }
150
+
151
+ /**
152
+ * Sleep for the specified duration, but respect abort signals.
153
+ */
154
+ async function sleep(ms: number, signal?: AbortSignal): Promise<void> {
155
+ return new Promise((resolve, reject) => {
156
+ const timeout = setTimeout(resolve, ms);
157
+ if (signal) {
158
+ signal.addEventListener(
159
+ 'abort',
160
+ () => {
161
+ clearTimeout(timeout);
162
+ reject(new DOMException('Aborted', 'AbortError'));
163
+ },
164
+ { once: true }
165
+ );
166
+ }
167
+ });
168
+ }
169
+
170
+ /**
171
+ * Check if an error is retryable (network issues, temporary failures).
172
+ */
173
+ function isRetryableError(error: unknown): boolean {
174
+ if (!(error instanceof Error)) return false;
175
+
176
+ // Socket/connection errors (Bun has known timeout issues)
177
+ // See: https://github.com/oven-sh/bun/issues/14439
178
+ if (
179
+ error.message.includes('ConnectionClosed') ||
180
+ error.message.includes('ECONNRESET') ||
181
+ error.message.includes('ECONNREFUSED') ||
182
+ error.message.includes('socket') ||
183
+ error.message.includes('connection')
184
+ ) {
185
+ return true;
186
+ }
187
+
188
+ return false;
189
+ }
190
+
191
+ export type RetryFetchOptions = {
192
+ /**
193
+ * Original fetch function to wrap. Defaults to global fetch.
194
+ */
195
+ baseFetch?: typeof fetch;
196
+
197
+ /**
198
+ * Session ID for logging purposes.
199
+ */
200
+ sessionID?: string;
201
+ };
202
+
203
+ /**
204
+ * Create a fetch function that handles rate limits with time-based retry logic.
205
+ *
206
+ * This wrapper:
207
+ * 1. Intercepts HTTP 429 responses
208
+ * 2. Parses retry-after headers
209
+ * 3. Waits for the specified duration (respecting global timeout)
210
+ * 4. Retries the request
211
+ *
212
+ * If retry-after exceeds AGENT_RETRY_TIMEOUT, the original 429 response is returned
213
+ * to let higher-level error handling take over.
214
+ *
215
+ * @param options Configuration options
216
+ * @returns A fetch function with rate limit retry handling
217
+ */
218
+ export function create(options: RetryFetchOptions = {}): typeof fetch {
219
+ const baseFetch = options.baseFetch ?? fetch;
220
+ const sessionID = options.sessionID ?? 'unknown';
221
+
222
+ return async function retryFetch(
223
+ input: RequestInfo | URL,
224
+ init?: RequestInit
225
+ ): Promise<Response> {
226
+ let attempt = 0;
227
+ const startTime = Date.now();
228
+ const maxRetryTimeout = Flag.RETRY_TIMEOUT() * 1000;
229
+ const maxBackoffDelay = Flag.MAX_RETRY_DELAY();
230
+
231
+ while (true) {
232
+ attempt++;
233
+ let response: Response;
234
+
235
+ try {
236
+ response = await baseFetch(input, init);
237
+ } catch (error) {
238
+ // Check if it's a retryable network error
239
+ if (isRetryableError(error)) {
240
+ const elapsed = Date.now() - startTime;
241
+ if (elapsed >= maxRetryTimeout) {
242
+ log.warn(() => ({
243
+ message:
244
+ 'network error retry timeout exceeded, re-throwing error',
245
+ sessionID,
246
+ elapsed,
247
+ maxRetryTimeout,
248
+ error: (error as Error).message,
249
+ }));
250
+ throw error;
251
+ }
252
+
253
+ // Use exponential backoff for network errors
254
+ const delay = Math.min(
255
+ 2000 * Math.pow(2, attempt - 1),
256
+ maxBackoffDelay
257
+ );
258
+ log.info(() => ({
259
+ message: 'network error, retrying',
260
+ sessionID,
261
+ attempt,
262
+ delay,
263
+ error: (error as Error).message,
264
+ }));
265
+ await sleep(delay, init?.signal ?? undefined);
266
+ continue;
267
+ }
268
+ throw error;
269
+ }
270
+
271
+ // Only handle rate limit errors (429)
272
+ if (response.status !== 429) {
273
+ return response;
274
+ }
275
+
276
+ // Check if we're within the global retry timeout
277
+ const elapsed = Date.now() - startTime;
278
+ if (elapsed >= maxRetryTimeout) {
279
+ log.warn(() => ({
280
+ message: 'retry timeout exceeded in fetch wrapper, returning 429',
281
+ sessionID,
282
+ elapsed,
283
+ maxRetryTimeout,
284
+ }));
285
+ return response; // Let higher-level handling take over
286
+ }
287
+
288
+ // Calculate retry delay
289
+ const delay = calculateRetryDelay(
290
+ response.headers,
291
+ attempt,
292
+ maxRetryTimeout - elapsed, // Remaining time
293
+ maxBackoffDelay
294
+ );
295
+
296
+ // If delay is null, retry-after exceeds timeout - return response
297
+ if (delay === null) {
298
+ log.warn(() => ({
299
+ message:
300
+ 'retry-after exceeds remaining timeout, returning 429 response',
301
+ sessionID,
302
+ elapsed,
303
+ remainingTimeout: maxRetryTimeout - elapsed,
304
+ }));
305
+ return response;
306
+ }
307
+
308
+ // Check if delay would exceed remaining timeout
309
+ if (elapsed + delay >= maxRetryTimeout) {
310
+ log.warn(() => ({
311
+ message: 'delay would exceed retry timeout, returning 429 response',
312
+ sessionID,
313
+ elapsed,
314
+ delay,
315
+ maxRetryTimeout,
316
+ }));
317
+ return response;
318
+ }
319
+
320
+ log.info(() => ({
321
+ message: 'rate limited, will retry',
322
+ sessionID,
323
+ attempt,
324
+ delay,
325
+ delayMinutes: (delay / 1000 / 60).toFixed(2),
326
+ elapsed,
327
+ remainingTimeout: maxRetryTimeout - elapsed,
328
+ }));
329
+
330
+ // Wait before retrying
331
+ try {
332
+ await sleep(delay, init?.signal ?? undefined);
333
+ } catch {
334
+ // Aborted - return the last response
335
+ log.info(() => ({
336
+ message: 'retry sleep aborted, returning last response',
337
+ sessionID,
338
+ }));
339
+ return response;
340
+ }
341
+ }
342
+ };
343
+ }
344
+
345
+ /**
346
+ * Wrap an existing custom fetch (e.g., OAuth fetch) with retry logic.
347
+ *
348
+ * This allows composing multiple fetch wrappers while maintaining retry handling.
349
+ *
350
+ * @param customFetch The custom fetch function to wrap
351
+ * @param options Configuration options
352
+ * @returns A fetch function with both custom logic and retry handling
353
+ */
354
+ export function wrap(
355
+ customFetch: typeof fetch,
356
+ options: Omit<RetryFetchOptions, 'baseFetch'> = {}
357
+ ): typeof fetch {
358
+ return create({
359
+ ...options,
360
+ baseFetch: customFetch,
361
+ });
362
+ }
363
+ }
@@ -1,6 +1,11 @@
1
1
  import type { ModelsDev } from '../provider/models';
2
2
  import { MessageV2 } from './message-v2';
3
- import { type StreamTextResult, type Tool as AITool, APICallError } from 'ai';
3
+ import {
4
+ type StreamTextResult,
5
+ type Tool as AITool,
6
+ APICallError,
7
+ JSONParseError,
8
+ } from 'ai';
4
9
  import { Log } from '../util/log';
5
10
  import { Identifier } from '../id/id';
6
11
  import { Session } from '.';
@@ -205,6 +210,22 @@ export namespace SessionProcessor {
205
210
  break;
206
211
  }
207
212
  case 'error':
213
+ // Skip stream parse errors (malformed SSE from gateway/provider)
214
+ // The AI SDK emits these as error events but continues the stream.
215
+ // Following OpenAI Codex pattern: log and skip bad events.
216
+ // See: https://github.com/link-assistant/agent/issues/169
217
+ if (JSONParseError.isInstance(value.error)) {
218
+ log.warn(() => ({
219
+ message:
220
+ 'skipping malformed SSE event (stream parse error)',
221
+ errorName: (value.error as Error)?.name,
222
+ errorMessage: (value.error as Error)?.message?.substring(
223
+ 0,
224
+ 200
225
+ ),
226
+ }));
227
+ continue;
228
+ }
208
229
  throw value.error;
209
230
 
210
231
  case 'start-step':
@@ -364,7 +385,7 @@ export namespace SessionProcessor {
364
385
  providerID: input.providerID,
365
386
  });
366
387
 
367
- // Check if error is retryable (APIError, SocketConnectionError, or TimeoutError)
388
+ // Check if error is retryable (APIError, SocketConnectionError, TimeoutError)
368
389
  const isRetryableAPIError =
369
390
  error?.name === 'APIError' && error.data.isRetryable;
370
391
  const isRetryableSocketError =
@@ -298,18 +298,18 @@ export namespace SessionPrompt {
298
298
  lastUser.model.modelID
299
299
  );
300
300
  } catch (error) {
301
- log.warn(() => ({
301
+ // When an explicit provider is specified, do NOT silently fall back to default
302
+ // This ensures user's explicit choice is respected
303
+ // If the user wants a fallback, they should not specify a provider
304
+ log.error(() => ({
302
305
  message:
303
- 'Failed to initialize specified model, falling back to default model',
306
+ 'Failed to initialize specified model - NOT falling back to default (explicit provider specified)',
304
307
  providerID: lastUser.model.providerID,
305
308
  modelID: lastUser.model.modelID,
306
309
  error: error instanceof Error ? error.message : String(error),
307
310
  }));
308
- const defaultModel = await Provider.defaultModel();
309
- model = await Provider.getModel(
310
- defaultModel.providerID,
311
- defaultModel.modelID
312
- );
311
+ // Re-throw the error so it can be handled by the caller
312
+ throw error;
313
313
  }
314
314
  const task = tasks.pop();
315
315