mohdel 0.104.4 → 0.105.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -95,7 +95,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
95
95
  response = await client.chat.completions.create(args, { signal: deps.signal })
96
96
  } catch (e) {
97
97
  deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
98
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
98
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: config.provider }) }
99
99
  return
100
100
  }
101
101
 
@@ -158,7 +158,7 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
158
158
  stream = await client.chat.completions.create(args, { signal: deps.signal })
159
159
  } catch (e) {
160
160
  deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
161
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
161
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: config.provider }) }
162
162
  return
163
163
  }
164
164
 
@@ -222,7 +222,7 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
222
222
  }
223
223
  } catch (e) {
224
224
  deps.log?.warn({ err: e }, `[mohdel:${config.provider}] stream failed`)
225
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
225
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: config.provider }) }
226
226
  return
227
227
  }
228
228
 
@@ -13,9 +13,19 @@
13
13
  * caller does with `detail` after that — surface it, log it, redact
14
14
  * it further — is the caller's policy.
15
15
  *
16
- * Type tags: `AUTH_INVALID`, `RATE_LIMIT`, `QUOTA_EXHAUSTED`,
17
- * `CONTEXT_OVERFLOW`, `CONTENT_BLOCKED`, `PROVIDER_UNAVAILABLE`,
18
- * `PROVIDER_ERROR`, `NET_ERROR`.
16
+ * 429 split: `RATE_LIMIT_TIER` means the caller's own quota dimension
17
+ * (per-minute/per-day requests or tokens, org concurrency) was
18
+ * exhausted — retrying inside the rate-limit window cannot succeed.
19
+ * `RATE_LIMIT_LOAD` means the provider is shedding load for reasons
20
+ * not tied to the caller's quota — the next attempt may succeed
21
+ * immediately. `RATE_LIMIT` (no suffix) is the fallback when the
22
+ * signal is ambiguous. Pass `opts.provider` from the adapter to enable
23
+ * provider-specific disambiguation.
24
+ *
25
+ * Type tags: `AUTH_INVALID`, `RATE_LIMIT`, `RATE_LIMIT_TIER`,
26
+ * `RATE_LIMIT_LOAD`, `QUOTA_EXHAUSTED`, `CONTEXT_OVERFLOW`,
27
+ * `CONTENT_BLOCKED`, `PROVIDER_UNAVAILABLE`, `PROVIDER_ERROR`,
28
+ * `NET_ERROR`.
19
29
  *
20
30
  * @module session/adapters/_errors
21
31
  */
@@ -102,6 +112,189 @@ function matchesContextOverflow (msg) {
102
112
  return false
103
113
  }
104
114
 
115
+ /**
116
+ * Read a header in a SDK-agnostic way. Some SDKs hand back a real
117
+ * `Headers` instance (web fetch); others use a plain lowercased
118
+ * object. Normalize to a case-insensitive lookup that works on both.
119
+ * @param {any} headers
120
+ * @param {string} name
121
+ * @returns {string | undefined}
122
+ */
123
+ function headerVal (headers, name) {
124
+ if (!headers) return undefined
125
+ if (typeof headers.get === 'function') {
126
+ const v = headers.get(name) ?? headers.get(name.toLowerCase())
127
+ return v == null ? undefined : String(v)
128
+ }
129
+ if (typeof headers === 'object') {
130
+ const lower = name.toLowerCase()
131
+ if (headers[name] != null) return String(headers[name])
132
+ if (headers[lower] != null) return String(headers[lower])
133
+ for (const k of Object.keys(headers)) {
134
+ if (k.toLowerCase() === lower) return String(headers[k])
135
+ }
136
+ }
137
+ return undefined
138
+ }
139
+
140
+ /**
141
+ * Headers that providers expose for caller-side quota limits. Any one
142
+ * being present is a strong signal that the 429 is tier-driven; if
143
+ * one is present and reads 0, it's definitive.
144
+ */
145
+ const RATE_LIMIT_HEADER_NAMES = Object.freeze([
146
+ 'x-ratelimit-remaining-requests',
147
+ 'x-ratelimit-remaining-tokens',
148
+ 'anthropic-ratelimit-requests-remaining',
149
+ 'anthropic-ratelimit-tokens-remaining',
150
+ 'anthropic-ratelimit-input-tokens-remaining',
151
+ 'anthropic-ratelimit-output-tokens-remaining'
152
+ ])
153
+
154
+ function readRemainingHeaders (err) {
155
+ const headers = err?.headers || err?.response?.headers
156
+ if (!headers) return { any: false, zero: false }
157
+ let any = false
158
+ let zero = false
159
+ for (const name of RATE_LIMIT_HEADER_NAMES) {
160
+ const raw = headerVal(headers, name)
161
+ if (raw == null) continue
162
+ any = true
163
+ const n = Number(raw)
164
+ if (Number.isFinite(n) && n <= 0) zero = true
165
+ }
166
+ return { any, zero }
167
+ }
168
+
169
+ const tierResult = (detail) => ({
170
+ message: 'rate limit exceeded (caller quota)',
171
+ severity: 'warn',
172
+ retryable: true,
173
+ type: 'RATE_LIMIT_TIER',
174
+ detail
175
+ })
176
+
177
+ const loadResult = (detail) => ({
178
+ message: 'rate limit exceeded (provider load)',
179
+ severity: 'warn',
180
+ retryable: true,
181
+ type: 'RATE_LIMIT_LOAD',
182
+ detail
183
+ })
184
+
185
+ const ambiguousResult = (detail) => ({
186
+ message: 'rate limit exceeded',
187
+ severity: 'warn',
188
+ retryable: true,
189
+ type: 'RATE_LIMIT',
190
+ detail
191
+ })
192
+
193
+ /**
194
+ * Per-provider 429 disambiguators. Each takes the raw error + already-
195
+ * extracted code/detail and returns a TypedError result, or `undefined`
196
+ * to defer to the generic header/fallback path.
197
+ *
198
+ * Notes on signals (verified against SDK source where possible;
199
+ * marked `UNVERIFIED` when based on docs/conventions only):
200
+ * - openai: `code === 'rate_limit_exceeded'` is the documented tier
201
+ * tag. Generic 429 with no quota wording → LOAD.
202
+ * - anthropic: `error.error.type === 'overloaded_error'` is the public
203
+ * load signal; `'rate_limit_error'` is the tier signal.
204
+ * - cerebras: tier hits surface the same generic 429 body
205
+ * ("We're experiencing high traffic right now…") as load events;
206
+ * `x-ratelimit-remaining-*` headers, when present and zero, are the
207
+ * only reliable tier discriminator. Absent headers + that body
208
+ * string → LOAD. UNVERIFIED for non-tier-saturated cases.
209
+ * - gemini: `status === 'RESOURCE_EXHAUSTED'` in the body → TIER.
210
+ * Pure 429 without that status → LOAD (rare; gemini usually returns
211
+ * 503 / `UNAVAILABLE` for global congestion).
212
+ */
213
+ const providerOverrides = {
214
+ openai (_err, code, detail) {
215
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
216
+ return undefined
217
+ },
218
+ cerebras (_err, _code, detail) {
219
+ if (/high traffic/i.test(detail || '')) return loadResult(detail)
220
+ return undefined
221
+ },
222
+ xai (_err, code, detail) {
223
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
224
+ return undefined
225
+ },
226
+ deepseek (_err, code, detail) {
227
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
228
+ return undefined
229
+ },
230
+ mistral (_err, code, detail) {
231
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
232
+ return undefined
233
+ },
234
+ fireworks (_err, code, detail) {
235
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
236
+ return undefined
237
+ },
238
+ groq (_err, code, detail) {
239
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
240
+ return undefined
241
+ },
242
+ novita (_err, code, detail) {
243
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
244
+ return undefined
245
+ },
246
+ openrouter (_err, code, detail) {
247
+ // OpenRouter forwards upstream 429s. Their own aggregator tier hits
248
+ // expose `code === 'rate_limit_exceeded'`; upstream-provider load
249
+ // is reflected via the body's `error.metadata.provider_name`
250
+ // alongside a "overloaded" / "busy" phrasing — fall back to LOAD
251
+ // when we don't have a definite tier signal.
252
+ if (code === 'rate_limit_exceeded') return tierResult(detail)
253
+ if (/overloaded|busy|capacity/i.test(detail || '')) return loadResult(detail)
254
+ return undefined
255
+ },
256
+ anthropic (_err, code, detail) {
257
+ if (code === 'overloaded_error') return loadResult(detail)
258
+ if (code === 'rate_limit_error') return tierResult(detail)
259
+ return undefined
260
+ },
261
+ gemini (err, code, detail) {
262
+ // SDK buries the protobuf-style status in the message; the body's
263
+ // `error.status` is also exposed when present.
264
+ const status = err?.error?.status || err?.response?.data?.error?.status
265
+ if (status === 'RESOURCE_EXHAUSTED' || /resource_exhausted/i.test(detail || '')) {
266
+ return tierResult(detail)
267
+ }
268
+ return undefined
269
+ }
270
+ }
271
+
272
+ /**
273
+ * Decide whether a 429 is caller-tier or provider-load. The order is
274
+ * intentional: provider override first (most specific signals), then
275
+ * generic header-based detection, then a "remaining=0 on a present
276
+ * header" definitive tier signal, otherwise fall back to ambiguous.
277
+ * @param {any} err
278
+ * @param {string} code already-lowercased code from extractCode()
279
+ * @param {string | undefined} detail
280
+ * @param {string} [provider]
281
+ * @returns {import('#core/errors.js').TypedError}
282
+ */
283
+ function classify429 (err, code, detail, provider) {
284
+ const override = provider && providerOverrides[provider]?.(err, code, detail)
285
+ if (override) return override
286
+
287
+ const { any, zero } = readRemainingHeaders(err)
288
+ if (zero) return tierResult(detail)
289
+ if (any) {
290
+ // Headers present but remaining > 0 — provider is throttling
291
+ // despite the caller having budget. That's a load signal.
292
+ return loadResult(detail)
293
+ }
294
+
295
+ return ambiguousResult(detail)
296
+ }
297
+
105
298
  /**
106
299
  * @param {unknown} e
107
300
  * @param {string} [key] Optional API key the call was made with. When
@@ -110,9 +303,15 @@ function matchesContextOverflow (msg) {
110
303
  * providers occasionally echo the rejected key
111
304
  * in error bodies (notably 401/403) and that
112
305
  * must not leak.
306
+ * @param {{ provider?: string }} [opts]
307
+ * Adapter-supplied provider name. Enables
308
+ * provider-specific 429 disambiguation
309
+ * (`RATE_LIMIT_TIER` vs `RATE_LIMIT_LOAD`).
310
+ * When omitted, 429 classification falls back
311
+ * to header-only detection.
113
312
  * @returns {import('#core/errors.js').TypedError}
114
313
  */
115
- export function classifyProviderError (e, key) {
314
+ export function classifyProviderError (e, key, opts = {}) {
116
315
  const err = /** @type {any} */(e)
117
316
  const status = err?.status
118
317
  const code = extractCode(err)
@@ -182,13 +381,7 @@ export function classifyProviderError (e, key) {
182
381
  }
183
382
  }
184
383
  if (status === 429) {
185
- return {
186
- message: 'rate limit exceeded',
187
- severity: 'warn',
188
- retryable: true,
189
- type: 'RATE_LIMIT',
190
- detail
191
- }
384
+ return classify429(err, code, detail, opts.provider)
192
385
  }
193
386
  if (typeof status === 'number' && status >= 500) {
194
387
  return {
@@ -100,7 +100,7 @@ export async function * anthropic (envelope, deps = {}) {
100
100
  if (blocks.length) injectImageBlocks(conversation, blocks)
101
101
  } catch (e) {
102
102
  log?.warn({ err: e }, '[mohdel:anthropic] image load failed')
103
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
103
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'anthropic' }) }
104
104
  return
105
105
  }
106
106
  }
@@ -201,7 +201,7 @@ export async function * anthropic (envelope, deps = {}) {
201
201
  return
202
202
  }
203
203
  log?.warn({ err: e }, '[mohdel:anthropic] stream failed')
204
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
204
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'anthropic' }) }
205
205
  return
206
206
  }
207
207
 
@@ -58,7 +58,7 @@ export async function * gemini (envelope, deps = {}) {
58
58
  if (parts.length) injectParts(contents, parts)
59
59
  } catch (e) {
60
60
  log?.warn({ err: e }, '[mohdel:gemini] image load failed')
61
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
61
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'gemini' }) }
62
62
  return
63
63
  }
64
64
  }
@@ -80,7 +80,7 @@ export async function * gemini (envelope, deps = {}) {
80
80
  // `typed` lets _videos.js surface PROVIDER_UNAVAILABLE on
81
81
  // upload-deadline timeouts; fall back to generic classification.
82
82
  const typed = /** @type {any} */(e).typed
83
- yield { type: 'error', error: typed || classifyProviderError(e, envelope.auth?.key) }
83
+ yield { type: 'error', error: typed || classifyProviderError(e, envelope.auth?.key, { provider: 'gemini' }) }
84
84
  return
85
85
  }
86
86
  }
@@ -172,7 +172,7 @@ export async function * gemini (envelope, deps = {}) {
172
172
  return
173
173
  }
174
174
  log?.warn({ err: e }, '[mohdel:gemini] stream failed')
175
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
175
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'gemini' }) }
176
176
  return
177
177
  }
178
178
 
@@ -62,7 +62,7 @@ export async function * openai (envelope, deps = {}) {
62
62
  if (parts.length) injectImageParts(input, parts)
63
63
  } catch (e) {
64
64
  log?.warn({ err: e }, '[mohdel:openai] image load failed')
65
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
65
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'openai' }) }
66
66
  return
67
67
  }
68
68
  }
@@ -161,7 +161,7 @@ export async function * openai (envelope, deps = {}) {
161
161
  return
162
162
  }
163
163
  log?.warn({ err: e }, '[mohdel:openai] stream failed')
164
- yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
164
+ yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'openai' }) }
165
165
  return
166
166
  }
167
167
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mohdel",
3
- "version": "0.104.4",
3
+ "version": "0.105.0",
4
4
  "license": "MIT",
5
5
  "author": {
6
6
  "name": "Christophe Le Bars",
@@ -87,7 +87,7 @@
87
87
  "@opentelemetry/exporter-trace-otlp-grpc": "^0.217.0",
88
88
  "@opentelemetry/sdk-node": "^0.217.0",
89
89
  "chalk": "^5.4.0",
90
- "mohdel-thin-gate-linux-x64-gnu": "0.104.4"
90
+ "mohdel-thin-gate-linux-x64-gnu": "0.105.0"
91
91
  },
92
92
  "dependencies": {
93
93
  "@anthropic-ai/sdk": "^0.95.1",