@link-assistant/agent 0.10.1 → 0.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@link-assistant/agent",
3
- "version": "0.10.1",
3
+ "version": "0.12.0",
4
4
  "description": "A minimal, public domain AI CLI agent compatible with OpenCode's JSON interface. Bun-only runtime.",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/flag/flag.ts CHANGED
@@ -63,6 +63,41 @@ export namespace Flag {
63
63
  'OPENCODE_DRY_RUN'
64
64
  );
65
65
 
66
+ // Title generation configuration
67
+ // When disabled, sessions will use default "New session - {timestamp}" titles
68
+ // This saves tokens and prevents rate limit issues with free tier models
69
+ // See: https://github.com/link-assistant/agent/issues/157
70
+ export let GENERATE_TITLE = truthyCompat(
71
+ 'LINK_ASSISTANT_AGENT_GENERATE_TITLE',
72
+ 'AGENT_GENERATE_TITLE'
73
+ );
74
+
75
+ // Allow setting title generation mode programmatically (e.g., from CLI --generate-title flag)
76
+ export function setGenerateTitle(value: boolean) {
77
+ GENERATE_TITLE = value;
78
+ }
79
+
80
+ // Retry timeout configuration
81
+ // Maximum total time to keep retrying for the same error type (default: 7 days in seconds)
82
+ // For different error types, the timer resets
83
+ // See: https://github.com/link-assistant/agent/issues/157
84
+ export function RETRY_TIMEOUT(): number {
85
+ const val = getEnv(
86
+ 'LINK_ASSISTANT_AGENT_RETRY_TIMEOUT',
87
+ 'AGENT_RETRY_TIMEOUT'
88
+ );
89
+ return val ? parseInt(val, 10) : 604800; // 7 days in seconds
90
+ }
91
+
92
+ // Maximum delay for a single retry attempt (default: 20 minutes in milliseconds)
93
+ export function MAX_RETRY_DELAY(): number {
94
+ const val = getEnv(
95
+ 'LINK_ASSISTANT_AGENT_MAX_RETRY_DELAY',
96
+ 'AGENT_MAX_RETRY_DELAY'
97
+ );
98
+ return val ? parseInt(val, 10) * 1000 : 1200000; // 20 minutes in ms
99
+ }
100
+
66
101
  // Stream timeout configuration
67
102
  // chunkMs: timeout between stream chunks - detects stalled streams (default: 2 minutes)
68
103
  // stepMs: timeout for each individual LLM step (default: 10 minutes)
package/src/index.js CHANGED
@@ -684,6 +684,17 @@ async function main() {
684
684
  description:
685
685
  'When used with --resume or --continue, continue in the same session without forking to a new UUID.',
686
686
  default: false,
687
+ })
688
+ .option('generate-title', {
689
+ type: 'boolean',
690
+ description:
691
+ 'Generate session titles using AI (default: false). Disabling saves tokens and prevents rate limit issues.',
692
+ default: false,
693
+ })
694
+ .option('retry-timeout', {
695
+ type: 'number',
696
+ description:
697
+ 'Maximum total retry time in seconds for rate limit errors (default: 604800 = 7 days)',
687
698
  }),
688
699
  handler: async (argv) => {
689
700
  // Check both CLI flag and environment variable for compact JSON mode
@@ -866,6 +877,13 @@ async function main() {
866
877
  Flag.setDryRun(true);
867
878
  }
868
879
 
880
+ // Set generate-title flag if explicitly enabled
881
+ // Default is false to save tokens and prevent rate limit issues
882
+ // See: https://github.com/link-assistant/agent/issues/157
883
+ if (argv['generate-title'] === true) {
884
+ Flag.setGenerateTitle(true);
885
+ }
886
+
869
887
  // Initialize logging system
870
888
  // - Print logs to stdout only when verbose for clean CLI output
871
889
  // - Use verbose flag to enable DEBUG level logging
@@ -321,6 +321,48 @@ export namespace Provider {
321
321
  options: {},
322
322
  };
323
323
  },
324
+ /**
325
+ * Kilo provider - access to 500+ AI models through Kilo Gateway
326
+ * Uses OpenAI-compatible API at https://api.kilo.ai/api/gateway
327
+ *
328
+ * Free models available without API key (using 'public' key):
329
+ * - GLM-5 (z-ai/glm-5) - Free limited time, flagship Z.AI model
330
+ * - GLM 4.7 (z-ai/glm-4.7:free) - Free, agent-centric model
331
+ * - Kimi K2.5 (moonshot/kimi-k2.5:free) - Free, agentic capabilities
332
+ * - MiniMax M2.1 (minimax/m2.1:free) - Free, general-purpose
333
+ * - Giga Potato (giga-potato:free) - Free evaluation model
334
+ *
335
+ * For paid models, set KILO_API_KEY environment variable
336
+ *
337
+ * @see https://kilo.ai/docs/gateway
338
+ * @see https://kilo.ai/docs/advanced-usage/free-and-budget-models
339
+ */
340
+ kilo: async (input) => {
341
+ const hasKey = await (async () => {
342
+ if (input.env.some((item) => process.env[item])) return true;
343
+ if (await Auth.get(input.id)) return true;
344
+ return false;
345
+ })();
346
+
347
+ // For free models, we can use 'public' as the API key
348
+ // For paid models, user needs to set KILO_API_KEY
349
+ if (!hasKey) {
350
+ for (const [key, value] of Object.entries(input.models)) {
351
+ // Keep only free models (cost.input === 0) when no API key
352
+ if (value.cost.input === 0) continue;
353
+ delete input.models[key];
354
+ }
355
+ }
356
+
357
+ return {
358
+ autoload: Object.keys(input.models).length > 0,
359
+ options: hasKey
360
+ ? {}
361
+ : {
362
+ apiKey: 'public',
363
+ },
364
+ };
365
+ },
324
366
  /**
325
367
  * Qwen Coder OAuth provider for Qwen subscription users
326
368
  * Uses OAuth credentials from agent auth login (Qwen Coder Subscription)
@@ -719,6 +761,170 @@ export namespace Provider {
719
761
  models: {}, // Models are dynamically created based on the provider/model syntax
720
762
  };
721
763
 
764
+ // Add Kilo provider for access to 500+ AI models through Kilo Gateway
765
+ // Free models available: GLM-5, GLM 4.7, Kimi K2.5, MiniMax M2.1, Giga Potato
766
+ // @see https://kilo.ai/docs/gateway
767
+ // @see https://github.com/link-assistant/agent/issues/159
768
+ database['kilo'] = {
769
+ id: 'kilo',
770
+ name: 'Kilo Gateway',
771
+ npm: '@ai-sdk/openai-compatible',
772
+ api: 'https://api.kilo.ai/api/gateway',
773
+ env: ['KILO_API_KEY'],
774
+ models: {
775
+ // GLM-5 - Flagship Z.AI model, free for limited time
776
+ 'glm-5-free': {
777
+ id: 'z-ai/glm-5',
778
+ name: 'GLM-5 (Free)',
779
+ release_date: '2026-02-11',
780
+ attachment: false,
781
+ reasoning: true,
782
+ temperature: true,
783
+ tool_call: true,
784
+ cost: {
785
+ input: 0,
786
+ output: 0,
787
+ cache_read: 0,
788
+ cache_write: 0,
789
+ },
790
+ limit: {
791
+ context: 202752,
792
+ output: 131072,
793
+ },
794
+ modalities: {
795
+ input: ['text'],
796
+ output: ['text'],
797
+ },
798
+ options: {},
799
+ },
800
+ // GLM 4.7 - Agent-centric model, free
801
+ 'glm-4.7-free': {
802
+ id: 'z-ai/glm-4.7:free',
803
+ name: 'GLM 4.7 (Free)',
804
+ release_date: '2026-01-15',
805
+ attachment: false,
806
+ reasoning: true,
807
+ temperature: true,
808
+ tool_call: true,
809
+ cost: {
810
+ input: 0,
811
+ output: 0,
812
+ cache_read: 0,
813
+ cache_write: 0,
814
+ },
815
+ limit: {
816
+ context: 131072,
817
+ output: 65536,
818
+ },
819
+ modalities: {
820
+ input: ['text'],
821
+ output: ['text'],
822
+ },
823
+ options: {},
824
+ },
825
+ // Kimi K2.5 - Agentic capabilities, free
826
+ 'kimi-k2.5-free': {
827
+ id: 'moonshot/kimi-k2.5:free',
828
+ name: 'Kimi K2.5 (Free)',
829
+ release_date: '2025-12-01',
830
+ attachment: false,
831
+ reasoning: false,
832
+ temperature: true,
833
+ tool_call: true,
834
+ cost: {
835
+ input: 0,
836
+ output: 0,
837
+ cache_read: 0,
838
+ cache_write: 0,
839
+ },
840
+ limit: {
841
+ context: 131072,
842
+ output: 65536,
843
+ },
844
+ modalities: {
845
+ input: ['text'],
846
+ output: ['text'],
847
+ },
848
+ options: {},
849
+ },
850
+ // MiniMax M2.1 - General-purpose, free
851
+ 'minimax-m2.1-free': {
852
+ id: 'minimax/m2.1:free',
853
+ name: 'MiniMax M2.1 (Free)',
854
+ release_date: '2025-11-01',
855
+ attachment: false,
856
+ reasoning: false,
857
+ temperature: true,
858
+ tool_call: true,
859
+ cost: {
860
+ input: 0,
861
+ output: 0,
862
+ cache_read: 0,
863
+ cache_write: 0,
864
+ },
865
+ limit: {
866
+ context: 131072,
867
+ output: 65536,
868
+ },
869
+ modalities: {
870
+ input: ['text'],
871
+ output: ['text'],
872
+ },
873
+ options: {},
874
+ },
875
+ // Giga Potato - Free evaluation model
876
+ 'giga-potato-free': {
877
+ id: 'giga-potato:free',
878
+ name: 'Giga Potato (Free)',
879
+ release_date: '2026-01-01',
880
+ attachment: false,
881
+ reasoning: false,
882
+ temperature: true,
883
+ tool_call: true,
884
+ cost: {
885
+ input: 0,
886
+ output: 0,
887
+ cache_read: 0,
888
+ cache_write: 0,
889
+ },
890
+ limit: {
891
+ context: 65536,
892
+ output: 32768,
893
+ },
894
+ modalities: {
895
+ input: ['text'],
896
+ output: ['text'],
897
+ },
898
+ options: {},
899
+ },
900
+ // Trinity Large Preview - Preview model from Arcee AI
901
+ 'trinity-large-preview': {
902
+ id: 'arcee/trinity-large-preview',
903
+ name: 'Trinity Large Preview (Free)',
904
+ release_date: '2026-01-01',
905
+ attachment: false,
906
+ reasoning: false,
907
+ temperature: true,
908
+ tool_call: true,
909
+ cost: {
910
+ input: 0,
911
+ output: 0,
912
+ cache_read: 0,
913
+ cache_write: 0,
914
+ },
915
+ limit: {
916
+ context: 65536,
917
+ output: 32768,
918
+ },
919
+ modalities: {
920
+ input: ['text'],
921
+ output: ['text'],
922
+ },
923
+ options: {},
924
+ },
925
+ },
926
+ };
927
+
722
928
  for (const [providerID, provider] of configProviders) {
723
929
  const existing = database[providerID];
724
930
  const parsed: ModelsDev.Provider = {
@@ -1073,6 +1279,15 @@ export namespace Provider {
1073
1279
  'big-pickle',
1074
1280
  ];
1075
1281
  }
1282
+ if (providerID === 'kilo') {
1283
+ priority = [
1284
+ 'glm-5-free',
1285
+ 'glm-4.7-free',
1286
+ 'kimi-k2.5-free',
1287
+ 'minimax-m2.1-free',
1288
+ 'giga-potato-free',
1289
+ ];
1290
+ }
1076
1291
  for (const item of priority) {
1077
1292
  for (const model of Object.keys(provider.info.models)) {
1078
1293
  if (model.includes(item)) return getModel(providerID, model);
@@ -1081,6 +1296,7 @@ export namespace Provider {
1081
1296
  }
1082
1297
 
1083
1298
  const priority = [
1299
+ 'glm-5-free',
1084
1300
  'kimi-k2.5-free',
1085
1301
  'minimax-m2.1-free',
1086
1302
  'gpt-5-nano',
@@ -321,6 +321,8 @@ export namespace SessionProcessor {
321
321
  case 'finish':
322
322
  input.assistantMessage.time.completed = Date.now();
323
323
  await Session.updateMessage(input.assistantMessage);
324
+ // Clear retry state on successful completion
325
+ SessionRetry.clearRetryState(input.sessionID);
324
326
  break;
325
327
 
326
328
  default:
@@ -374,24 +376,67 @@ export namespace SessionProcessor {
374
376
  error.data.isRetryable &&
375
377
  attempt < SessionRetry.TIMEOUT_MAX_RETRIES;
376
378
 
379
+ // For API errors (rate limits), check if we're within the retry timeout
380
+ // See: https://github.com/link-assistant/agent/issues/157
381
+ const retryCheck = isRetryableAPIError
382
+ ? SessionRetry.shouldRetry(
383
+ input.sessionID,
384
+ error.data.statusCode?.toString() ?? 'unknown'
385
+ )
386
+ : { shouldRetry: true, elapsedTime: 0, maxTime: 0 };
387
+
377
388
  if (
378
- isRetryableAPIError ||
389
+ (isRetryableAPIError && retryCheck.shouldRetry) ||
379
390
  isRetryableSocketError ||
380
391
  isRetryableTimeoutError
381
392
  ) {
382
393
  attempt++;
383
394
  // Use error-specific delay calculation
384
- const delay =
385
- error?.name === 'SocketConnectionError'
386
- ? SessionRetry.socketErrorDelay(attempt)
387
- : error?.name === 'TimeoutError'
388
- ? SessionRetry.timeoutDelay(attempt)
389
- : SessionRetry.delay(error, attempt);
395
+ // SessionRetry.delay may throw RetryTimeoutExceededError if retry-after exceeds timeout
396
+ let delay: number;
397
+ try {
398
+ delay =
399
+ error?.name === 'SocketConnectionError'
400
+ ? SessionRetry.socketErrorDelay(attempt)
401
+ : error?.name === 'TimeoutError'
402
+ ? SessionRetry.timeoutDelay(attempt)
403
+ : SessionRetry.delay(error, attempt);
404
+ } catch (delayError) {
405
+ // If retry-after exceeds AGENT_RETRY_TIMEOUT, fail immediately
406
+ if (
407
+ delayError instanceof SessionRetry.RetryTimeoutExceededError
408
+ ) {
409
+ log.error(() => ({
410
+ message: 'retry-after exceeds timeout, failing immediately',
411
+ retryAfterMs: delayError.retryAfterMs,
412
+ maxTimeoutMs: delayError.maxTimeoutMs,
413
+ }));
414
+ SessionRetry.clearRetryState(input.sessionID);
415
+ // Create a specific error for this case
416
+ input.assistantMessage.error = {
417
+ name: 'RetryTimeoutExceededError',
418
+ data: {
419
+ message: delayError.message,
420
+ isRetryable: false,
421
+ retryAfterMs: delayError.retryAfterMs,
422
+ maxTimeoutMs: delayError.maxTimeoutMs,
423
+ },
424
+ } as MessageV2.Error;
425
+ Bus.publish(Session.Event.Error, {
426
+ sessionID: input.assistantMessage.sessionID,
427
+ error: input.assistantMessage.error,
428
+ });
429
+ break;
430
+ }
431
+ throw delayError;
432
+ }
390
433
  log.info(() => ({
391
434
  message: 'retrying',
392
435
  errorType: error?.name,
393
436
  attempt,
394
437
  delay,
438
+ elapsedRetryTime: retryCheck.elapsedTime,
439
+ maxRetryTime: retryCheck.maxTime,
395
440
  }));
396
441
  SessionStatus.set(input.sessionID, {
397
442
  type: 'retry',
@@ -399,9 +444,14 @@ export namespace SessionProcessor {
399
444
  message: error.data.message,
400
445
  next: Date.now() + delay,
401
446
  });
447
+ // Update retry state to track total time
448
+ SessionRetry.updateRetryState(input.sessionID, delay);
402
449
  await SessionRetry.sleep(delay, input.abort).catch(() => {});
403
450
  continue;
404
451
  }
452
+
453
+ // Clear retry state on non-retryable error
454
+ SessionRetry.clearRetryState(input.sessionID);
405
455
  input.assistantMessage.error = error;
406
456
  Bus.publish(Session.Event.Error, {
407
457
  sessionID: input.assistantMessage.sessionID,
@@ -1530,7 +1530,9 @@ export namespace SessionPrompt {
1530
1530
  return result;
1531
1531
  }
1532
1532
 
1533
- // TODO: wire this back up
1533
+ // Title generation is optional and disabled by default to save tokens
1534
+ // Enable via --generate-title flag or AGENT_GENERATE_TITLE=true env var
1535
+ // See: https://github.com/link-assistant/agent/issues/157
1534
1536
  async function ensureTitle(input: {
1535
1537
  session: Session.Info;
1536
1538
  message: MessageV2.WithParts;
@@ -1538,6 +1540,14 @@ export namespace SessionPrompt {
1538
1540
  providerID: string;
1539
1541
  modelID: string;
1540
1542
  }) {
1543
+ // Skip title generation if disabled (default)
1544
+ if (!Flag.GENERATE_TITLE) {
1545
+ log.info(() => ({
1546
+ message: 'title generation disabled',
1547
+ hint: 'Enable with --generate-title flag or AGENT_GENERATE_TITLE=true',
1548
+ }));
1549
+ return;
1550
+ }
1541
1551
  if (input.session.parentID) return;
1542
1552
  if (!Session.isDefaultTitle(input.session.title)) return;
1543
1553
  const isFirst =
@@ -1,11 +1,47 @@
1
- import { iife } from '../util/iife';
2
1
  import { MessageV2 } from './message-v2';
2
+ import { Flag } from '../flag/flag';
3
+ import { Log } from '../util/log';
3
4
 
4
5
  export namespace SessionRetry {
6
+ const log = Log.create({ service: 'session.retry' });
7
+
5
8
  export const RETRY_INITIAL_DELAY = 2000;
6
9
  export const RETRY_BACKOFF_FACTOR = 2;
7
10
  export const RETRY_MAX_DELAY_NO_HEADERS = 30_000; // 30 seconds
8
11
 
12
+ // Maximum delay for a single retry attempt when NO retry-after header (default: 20 minutes)
13
+ // This caps exponential backoff when headers are not available
14
+ // Can be configured via AGENT_MAX_RETRY_DELAY env var
15
+ export function getMaxRetryDelay(): number {
16
+ return Flag.MAX_RETRY_DELAY();
17
+ }
18
+
19
+ // Get retry timeout in milliseconds
20
+ export function getRetryTimeout(): number {
21
+ return Flag.RETRY_TIMEOUT() * 1000;
22
+ }
23
+
24
+ /**
25
+ * Error thrown when retry-after exceeds AGENT_RETRY_TIMEOUT
26
+ * This indicates the wait time is too long and we should fail immediately
27
+ */
28
+ export class RetryTimeoutExceededError extends Error {
29
+ public readonly retryAfterMs: number;
30
+ public readonly maxTimeoutMs: number;
31
+
32
+ constructor(retryAfterMs: number, maxTimeoutMs: number) {
33
+ const retryAfterHours = (retryAfterMs / 1000 / 3600).toFixed(2);
34
+ const maxTimeoutHours = (maxTimeoutMs / 1000 / 3600).toFixed(2);
35
+ super(
36
+ `API returned retry-after of ${retryAfterHours} hours, which exceeds the maximum retry timeout of ${maxTimeoutHours} hours. ` +
37
+ `Failing immediately instead of waiting. You can adjust AGENT_RETRY_TIMEOUT env var to increase the limit.`
38
+ );
39
+ this.name = 'RetryTimeoutExceededError';
40
+ this.retryAfterMs = retryAfterMs;
41
+ this.maxTimeoutMs = maxTimeoutMs;
42
+ }
43
+ }
44
+
9
45
  // Socket connection error retry configuration
10
46
  // Bun's fetch() has a known 10-second idle timeout issue
11
47
  // See: https://github.com/oven-sh/bun/issues/14439
@@ -19,6 +55,71 @@ export namespace SessionRetry {
19
55
  export const TIMEOUT_MAX_RETRIES = 3;
20
56
  export const TIMEOUT_DELAYS = [30_000, 60_000, 120_000]; // 30s, 60s, 120s
21
57
 
58
+ // Rate limit retry state tracking
59
+ // Tracks total time spent retrying for each error type
60
+ // See: https://github.com/link-assistant/agent/issues/157
61
+ interface RetryState {
62
+ errorType: string;
63
+ startTime: number;
64
+ totalRetryTime: number;
65
+ }
66
+
67
+ const retryStates: Map<string, RetryState> = new Map();
68
+
69
+ /**
70
+ * Check if we should continue retrying for a given session and error type.
71
+ * Returns true if within retry timeout, false if exceeded.
72
+ * The timeout resets when the error type changes.
73
+ */
74
+ export function shouldRetry(
75
+ sessionID: string,
76
+ errorType: string
77
+ ): { shouldRetry: boolean; elapsedTime: number; maxTime: number } {
78
+ const maxTime = Flag.RETRY_TIMEOUT() * 1000; // Convert to ms
79
+ const state = retryStates.get(sessionID);
80
+
81
+ if (!state || state.errorType !== errorType) {
82
+ // New error type or first error - reset state
83
+ retryStates.set(sessionID, {
84
+ errorType,
85
+ startTime: Date.now(),
86
+ totalRetryTime: 0,
87
+ });
88
+ return { shouldRetry: true, elapsedTime: 0, maxTime };
89
+ }
90
+
91
+ const elapsedTime = Date.now() - state.startTime;
92
+ if (elapsedTime >= maxTime) {
93
+ log.info(() => ({
94
+ message: 'retry timeout exceeded',
95
+ sessionID,
96
+ errorType,
97
+ elapsedTime,
98
+ maxTime,
99
+ }));
100
+ return { shouldRetry: false, elapsedTime, maxTime };
101
+ }
102
+
103
+ return { shouldRetry: true, elapsedTime, maxTime };
104
+ }
105
+
106
+ /**
107
+ * Update retry state after a retry attempt.
108
+ */
109
+ export function updateRetryState(sessionID: string, delayMs: number): void {
110
+ const state = retryStates.get(sessionID);
111
+ if (state) {
112
+ state.totalRetryTime += delayMs;
113
+ }
114
+ }
115
+
116
+ /**
117
+ * Clear retry state for a session (e.g., on success).
118
+ */
119
+ export function clearRetryState(sessionID: string): void {
120
+ retryStates.delete(sessionID);
121
+ }
122
+
22
123
  export async function sleep(ms: number, signal: AbortSignal): Promise<void> {
23
124
  return new Promise((resolve, reject) => {
24
125
  const timeout = setTimeout(resolve, ms);
@@ -33,38 +134,136 @@ export namespace SessionRetry {
33
134
  });
34
135
  }
35
136
 
36
- export function delay(error: MessageV2.APIError, attempt: number) {
137
+ /**
138
+ * Add jitter to a delay value to prevent thundering herd.
139
+ * Adds 0-10% random variation to the delay.
140
+ */
141
+ function addJitter(delay: number): number {
142
+ const jitter = Math.random() * 0.1 * delay;
143
+ return Math.round(delay + jitter);
144
+ }
145
+
146
+ /**
147
+ * Parse retry-after value from headers and return delay in milliseconds.
148
+ * Returns null if no valid retry-after header is found.
149
+ */
150
+ function parseRetryAfterHeader(
151
+ headers: Record<string, string>
152
+ ): number | null {
153
+ // Check for retry-after-ms header first (milliseconds)
154
+ const retryAfterMs = headers['retry-after-ms'];
155
+ if (retryAfterMs) {
156
+ const parsedMs = Number.parseFloat(retryAfterMs);
157
+ if (!Number.isNaN(parsedMs) && parsedMs > 0) {
158
+ log.info(() => ({
159
+ message: 'parsed retry-after-ms header',
160
+ headerValue: parsedMs,
161
+ }));
162
+ return parsedMs;
163
+ }
164
+ }
165
+
166
+ // Check for retry-after header (seconds or HTTP date)
167
+ const retryAfter = headers['retry-after'];
168
+ if (retryAfter) {
169
+ const parsedSeconds = Number.parseFloat(retryAfter);
170
+ if (!Number.isNaN(parsedSeconds) && parsedSeconds > 0) {
171
+ const delayMs = Math.ceil(parsedSeconds * 1000);
172
+ log.info(() => ({
173
+ message: 'parsed retry-after header (seconds)',
174
+ headerValue: parsedSeconds,
175
+ delayMs,
176
+ }));
177
+ return delayMs;
178
+ }
179
+ // Try parsing as HTTP date format
180
+ const parsed = Date.parse(retryAfter) - Date.now();
181
+ if (!Number.isNaN(parsed) && parsed > 0) {
182
+ log.info(() => ({
183
+ message: 'parsed retry-after header (date)',
184
+ headerValue: retryAfter,
185
+ delayMs: parsed,
186
+ }));
187
+ return Math.ceil(parsed);
188
+ }
189
+ }
190
+
191
+ return null;
192
+ }
193
+
194
+ /**
195
+ * Calculate retry delay based on error response headers and attempt number.
196
+ *
197
+ * RETRY LOGIC (per issue #157 requirements):
198
+ * 1. If retry-after header is available:
199
+ * - If retry-after <= AGENT_RETRY_TIMEOUT: use it directly (exact time)
200
+ * - If retry-after > AGENT_RETRY_TIMEOUT: throw RetryTimeoutExceededError (fail immediately)
201
+ * 2. If no retry-after header:
202
+ * - Use exponential backoff up to AGENT_MAX_RETRY_DELAY
203
+ *
204
+ * Adds jitter to prevent thundering herd when multiple requests retry.
205
+ * See: https://github.com/link-assistant/agent/issues/157
206
+ *
207
+ * @throws {RetryTimeoutExceededError} When retry-after exceeds AGENT_RETRY_TIMEOUT
208
+ */
209
+ export function delay(error: MessageV2.APIError, attempt: number): number {
210
+ const maxRetryTimeout = getRetryTimeout();
211
+ const maxBackoffDelay = getMaxRetryDelay();
37
212
  const headers = error.data.responseHeaders;
213
+
38
214
  if (headers) {
39
- const retryAfterMs = headers['retry-after-ms'];
40
- if (retryAfterMs) {
41
- const parsedMs = Number.parseFloat(retryAfterMs);
42
- if (!Number.isNaN(parsedMs)) {
43
- return parsedMs;
44
- }
45
- }
215
+ const retryAfterMs = parseRetryAfterHeader(headers);
46
216
 
47
- const retryAfter = headers['retry-after'];
48
- if (retryAfter) {
49
- const parsedSeconds = Number.parseFloat(retryAfter);
50
- if (!Number.isNaN(parsedSeconds)) {
51
- // convert seconds to milliseconds
52
- return Math.ceil(parsedSeconds * 1000);
53
- }
54
- // Try parsing as HTTP date format
55
- const parsed = Date.parse(retryAfter) - Date.now();
56
- if (!Number.isNaN(parsed) && parsed > 0) {
57
- return Math.ceil(parsed);
217
+ if (retryAfterMs !== null) {
218
+ // Check if retry-after exceeds the maximum retry timeout
219
+ if (retryAfterMs > maxRetryTimeout) {
220
+ log.error(() => ({
221
+ message:
222
+ 'retry-after exceeds maximum retry timeout, failing immediately',
223
+ retryAfterMs,
224
+ maxRetryTimeout,
225
+ retryAfterHours: (retryAfterMs / 1000 / 3600).toFixed(2),
226
+ maxRetryTimeoutHours: (maxRetryTimeout / 1000 / 3600).toFixed(2),
227
+ }));
228
+ throw new RetryTimeoutExceededError(retryAfterMs, maxRetryTimeout);
58
229
  }
230
+
231
+ // Use exact retry-after time (within timeout limit)
232
+ log.info(() => ({
233
+ message: 'using exact retry-after value',
234
+ retryAfterMs,
235
+ maxRetryTimeout,
236
+ }));
237
+ return addJitter(retryAfterMs);
59
238
  }
60
239
 
61
- return RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1);
240
+ // Headers present but no retry-after - use exponential backoff with max delay cap
241
+ const backoffDelay = Math.min(
242
+ RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1),
243
+ maxBackoffDelay
244
+ );
245
+ log.info(() => ({
246
+ message: 'no retry-after header, using exponential backoff',
247
+ attempt,
248
+ backoffDelay,
249
+ maxBackoffDelay,
250
+ }));
251
+ return addJitter(backoffDelay);
62
252
  }
63
253
 
64
- return Math.min(
254
+ // No headers at all - use exponential backoff with lower cap
255
+ const backoffDelay = Math.min(
65
256
  RETRY_INITIAL_DELAY * Math.pow(RETRY_BACKOFF_FACTOR, attempt - 1),
66
257
  RETRY_MAX_DELAY_NO_HEADERS
67
258
  );
259
+ log.info(() => ({
260
+ message:
261
+ 'no response headers, using exponential backoff with conservative cap',
262
+ attempt,
263
+ backoffDelay,
264
+ maxCap: RETRY_MAX_DELAY_NO_HEADERS,
265
+ }));
266
+ return addJitter(backoffDelay);
68
267
  }
69
268
 
70
269
  /**