mohdel 0.104.4 → 0.105.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -95,7 +95,7 @@ export async function * runChatCompletions (envelope, client, config, deps = {})
|
|
|
95
95
|
response = await client.chat.completions.create(args, { signal: deps.signal })
|
|
96
96
|
} catch (e) {
|
|
97
97
|
deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
|
|
98
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
98
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: config.provider }) }
|
|
99
99
|
return
|
|
100
100
|
}
|
|
101
101
|
|
|
@@ -158,7 +158,7 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
|
158
158
|
stream = await client.chat.completions.create(args, { signal: deps.signal })
|
|
159
159
|
} catch (e) {
|
|
160
160
|
deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
|
|
161
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
161
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: config.provider }) }
|
|
162
162
|
return
|
|
163
163
|
}
|
|
164
164
|
|
|
@@ -222,7 +222,7 @@ async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
|
222
222
|
}
|
|
223
223
|
} catch (e) {
|
|
224
224
|
deps.log?.warn({ err: e }, `[mohdel:${config.provider}] stream failed`)
|
|
225
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
225
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: config.provider }) }
|
|
226
226
|
return
|
|
227
227
|
}
|
|
228
228
|
|
|
@@ -13,9 +13,19 @@
|
|
|
13
13
|
* caller does with `detail` after that — surface it, log it, redact
|
|
14
14
|
* it further — is the caller's policy.
|
|
15
15
|
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
16
|
+
* 429 split: `RATE_LIMIT_TIER` means the caller's own quota dimension
|
|
17
|
+
* (per-minute/per-day requests or tokens, org concurrency) was
|
|
18
|
+
* exhausted — retrying inside the rate-limit window cannot succeed.
|
|
19
|
+
* `RATE_LIMIT_LOAD` means the provider is shedding load for reasons
|
|
20
|
+
* not tied to the caller's quota — the next attempt may succeed
|
|
21
|
+
* immediately. `RATE_LIMIT` (no suffix) is the fallback when the
|
|
22
|
+
* signal is ambiguous. Pass `opts.provider` from the adapter to enable
|
|
23
|
+
* provider-specific disambiguation.
|
|
24
|
+
*
|
|
25
|
+
* Type tags: `AUTH_INVALID`, `RATE_LIMIT`, `RATE_LIMIT_TIER`,
|
|
26
|
+
* `RATE_LIMIT_LOAD`, `QUOTA_EXHAUSTED`, `CONTEXT_OVERFLOW`,
|
|
27
|
+
* `CONTENT_BLOCKED`, `PROVIDER_UNAVAILABLE`, `PROVIDER_ERROR`,
|
|
28
|
+
* `NET_ERROR`.
|
|
19
29
|
*
|
|
20
30
|
* @module session/adapters/_errors
|
|
21
31
|
*/
|
|
@@ -102,6 +112,189 @@ function matchesContextOverflow (msg) {
|
|
|
102
112
|
return false
|
|
103
113
|
}
|
|
104
114
|
|
|
115
|
+
/**
|
|
116
|
+
* Read a header in a SDK-agnostic way. Some SDKs hand back a real
|
|
117
|
+
* `Headers` instance (web fetch); others use a plain lowercased
|
|
118
|
+
* object. Normalize to a case-insensitive lookup that works on both.
|
|
119
|
+
* @param {any} headers
|
|
120
|
+
* @param {string} name
|
|
121
|
+
* @returns {string | undefined}
|
|
122
|
+
*/
|
|
123
|
+
function headerVal (headers, name) {
|
|
124
|
+
if (!headers) return undefined
|
|
125
|
+
if (typeof headers.get === 'function') {
|
|
126
|
+
const v = headers.get(name) ?? headers.get(name.toLowerCase())
|
|
127
|
+
return v == null ? undefined : String(v)
|
|
128
|
+
}
|
|
129
|
+
if (typeof headers === 'object') {
|
|
130
|
+
const lower = name.toLowerCase()
|
|
131
|
+
if (headers[name] != null) return String(headers[name])
|
|
132
|
+
if (headers[lower] != null) return String(headers[lower])
|
|
133
|
+
for (const k of Object.keys(headers)) {
|
|
134
|
+
if (k.toLowerCase() === lower) return String(headers[k])
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
return undefined
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Headers that providers expose for caller-side quota limits. Any one
|
|
142
|
+
* being present is a strong signal that the 429 is tier-driven; if
|
|
143
|
+
* one is present and reads 0, it's definitive.
|
|
144
|
+
*/
|
|
145
|
+
const RATE_LIMIT_HEADER_NAMES = Object.freeze([
|
|
146
|
+
'x-ratelimit-remaining-requests',
|
|
147
|
+
'x-ratelimit-remaining-tokens',
|
|
148
|
+
'anthropic-ratelimit-requests-remaining',
|
|
149
|
+
'anthropic-ratelimit-tokens-remaining',
|
|
150
|
+
'anthropic-ratelimit-input-tokens-remaining',
|
|
151
|
+
'anthropic-ratelimit-output-tokens-remaining'
|
|
152
|
+
])
|
|
153
|
+
|
|
154
|
+
function readRemainingHeaders (err) {
|
|
155
|
+
const headers = err?.headers || err?.response?.headers
|
|
156
|
+
if (!headers) return { any: false, zero: false }
|
|
157
|
+
let any = false
|
|
158
|
+
let zero = false
|
|
159
|
+
for (const name of RATE_LIMIT_HEADER_NAMES) {
|
|
160
|
+
const raw = headerVal(headers, name)
|
|
161
|
+
if (raw == null) continue
|
|
162
|
+
any = true
|
|
163
|
+
const n = Number(raw)
|
|
164
|
+
if (Number.isFinite(n) && n <= 0) zero = true
|
|
165
|
+
}
|
|
166
|
+
return { any, zero }
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const tierResult = (detail) => ({
|
|
170
|
+
message: 'rate limit exceeded (caller quota)',
|
|
171
|
+
severity: 'warn',
|
|
172
|
+
retryable: true,
|
|
173
|
+
type: 'RATE_LIMIT_TIER',
|
|
174
|
+
detail
|
|
175
|
+
})
|
|
176
|
+
|
|
177
|
+
const loadResult = (detail) => ({
|
|
178
|
+
message: 'rate limit exceeded (provider load)',
|
|
179
|
+
severity: 'warn',
|
|
180
|
+
retryable: true,
|
|
181
|
+
type: 'RATE_LIMIT_LOAD',
|
|
182
|
+
detail
|
|
183
|
+
})
|
|
184
|
+
|
|
185
|
+
const ambiguousResult = (detail) => ({
|
|
186
|
+
message: 'rate limit exceeded',
|
|
187
|
+
severity: 'warn',
|
|
188
|
+
retryable: true,
|
|
189
|
+
type: 'RATE_LIMIT',
|
|
190
|
+
detail
|
|
191
|
+
})
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Per-provider 429 disambiguators. Each takes the raw error + already-
|
|
195
|
+
* extracted code/detail and returns a TypedError result, or `undefined`
|
|
196
|
+
* to defer to the generic header/fallback path.
|
|
197
|
+
*
|
|
198
|
+
* Notes on signals (verified against SDK source where possible;
|
|
199
|
+
* marked `UNVERIFIED` when based on docs/conventions only):
|
|
200
|
+
* - openai: `code === 'rate_limit_exceeded'` is the documented tier
|
|
201
|
+
* tag. Generic 429 with no quota wording → LOAD.
|
|
202
|
+
* - anthropic: `error.error.type === 'overloaded_error'` is the public
|
|
203
|
+
* load signal; `'rate_limit_error'` is the tier signal.
|
|
204
|
+
* - cerebras: tier hits surface the same generic 429 body
|
|
205
|
+
* ("We're experiencing high traffic right now…") as load events;
|
|
206
|
+
* `x-ratelimit-remaining-*` headers, when present and zero, are the
|
|
207
|
+
* only reliable tier discriminator. Absent headers + that body
|
|
208
|
+
* string → LOAD. UNVERIFIED for non-tier-saturated cases.
|
|
209
|
+
* - gemini: `status === 'RESOURCE_EXHAUSTED'` in the body → TIER.
|
|
210
|
+
* Pure 429 without that status → LOAD (rare; gemini usually returns
|
|
211
|
+
* 503 / `UNAVAILABLE` for global congestion).
|
|
212
|
+
*/
|
|
213
|
+
const providerOverrides = {
|
|
214
|
+
openai (_err, code, detail) {
|
|
215
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
216
|
+
return undefined
|
|
217
|
+
},
|
|
218
|
+
cerebras (_err, _code, detail) {
|
|
219
|
+
if (/high traffic/i.test(detail || '')) return loadResult(detail)
|
|
220
|
+
return undefined
|
|
221
|
+
},
|
|
222
|
+
xai (_err, code, detail) {
|
|
223
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
224
|
+
return undefined
|
|
225
|
+
},
|
|
226
|
+
deepseek (_err, code, detail) {
|
|
227
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
228
|
+
return undefined
|
|
229
|
+
},
|
|
230
|
+
mistral (_err, code, detail) {
|
|
231
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
232
|
+
return undefined
|
|
233
|
+
},
|
|
234
|
+
fireworks (_err, code, detail) {
|
|
235
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
236
|
+
return undefined
|
|
237
|
+
},
|
|
238
|
+
groq (_err, code, detail) {
|
|
239
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
240
|
+
return undefined
|
|
241
|
+
},
|
|
242
|
+
novita (_err, code, detail) {
|
|
243
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
244
|
+
return undefined
|
|
245
|
+
},
|
|
246
|
+
openrouter (_err, code, detail) {
|
|
247
|
+
// OpenRouter forwards upstream 429s. Their own aggregator tier hits
|
|
248
|
+
// expose `code === 'rate_limit_exceeded'`; upstream-provider load
|
|
249
|
+
// is reflected via the body's `error.metadata.provider_name`
|
|
250
|
+
// alongside a "overloaded" / "busy" phrasing — fall back to LOAD
|
|
251
|
+
// when we don't have a definite tier signal.
|
|
252
|
+
if (code === 'rate_limit_exceeded') return tierResult(detail)
|
|
253
|
+
if (/overloaded|busy|capacity/i.test(detail || '')) return loadResult(detail)
|
|
254
|
+
return undefined
|
|
255
|
+
},
|
|
256
|
+
anthropic (_err, code, detail) {
|
|
257
|
+
if (code === 'overloaded_error') return loadResult(detail)
|
|
258
|
+
if (code === 'rate_limit_error') return tierResult(detail)
|
|
259
|
+
return undefined
|
|
260
|
+
},
|
|
261
|
+
gemini (err, code, detail) {
|
|
262
|
+
// SDK buries the protobuf-style status in the message; the body's
|
|
263
|
+
// `error.status` is also exposed when present.
|
|
264
|
+
const status = err?.error?.status || err?.response?.data?.error?.status
|
|
265
|
+
if (status === 'RESOURCE_EXHAUSTED' || /resource_exhausted/i.test(detail || '')) {
|
|
266
|
+
return tierResult(detail)
|
|
267
|
+
}
|
|
268
|
+
return undefined
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Decide whether a 429 is caller-tier or provider-load. The order is
|
|
274
|
+
* intentional: provider override first (most specific signals), then
|
|
275
|
+
* generic header-based detection, then a "remaining=0 on a present
|
|
276
|
+
* header" definitive tier signal, otherwise fall back to ambiguous.
|
|
277
|
+
* @param {any} err
|
|
278
|
+
* @param {string} code already-lowercased code from extractCode()
|
|
279
|
+
* @param {string | undefined} detail
|
|
280
|
+
* @param {string} [provider]
|
|
281
|
+
* @returns {import('#core/errors.js').TypedError}
|
|
282
|
+
*/
|
|
283
|
+
function classify429 (err, code, detail, provider) {
|
|
284
|
+
const override = provider && providerOverrides[provider]?.(err, code, detail)
|
|
285
|
+
if (override) return override
|
|
286
|
+
|
|
287
|
+
const { any, zero } = readRemainingHeaders(err)
|
|
288
|
+
if (zero) return tierResult(detail)
|
|
289
|
+
if (any) {
|
|
290
|
+
// Headers present but remaining > 0 — provider is throttling
|
|
291
|
+
// despite the caller having budget. That's a load signal.
|
|
292
|
+
return loadResult(detail)
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
return ambiguousResult(detail)
|
|
296
|
+
}
|
|
297
|
+
|
|
105
298
|
/**
|
|
106
299
|
* @param {unknown} e
|
|
107
300
|
* @param {string} [key] Optional API key the call was made with. When
|
|
@@ -110,9 +303,15 @@ function matchesContextOverflow (msg) {
|
|
|
110
303
|
* providers occasionally echo the rejected key
|
|
111
304
|
* in error bodies (notably 401/403) and that
|
|
112
305
|
* must not leak.
|
|
306
|
+
* @param {{ provider?: string }} [opts]
|
|
307
|
+
* Adapter-supplied provider name. Enables
|
|
308
|
+
* provider-specific 429 disambiguation
|
|
309
|
+
* (`RATE_LIMIT_TIER` vs `RATE_LIMIT_LOAD`).
|
|
310
|
+
* When omitted, 429 classification falls back
|
|
311
|
+
* to header-only detection.
|
|
113
312
|
* @returns {import('#core/errors.js').TypedError}
|
|
114
313
|
*/
|
|
115
|
-
export function classifyProviderError (e, key) {
|
|
314
|
+
export function classifyProviderError (e, key, opts = {}) {
|
|
116
315
|
const err = /** @type {any} */(e)
|
|
117
316
|
const status = err?.status
|
|
118
317
|
const code = extractCode(err)
|
|
@@ -182,13 +381,7 @@ export function classifyProviderError (e, key) {
|
|
|
182
381
|
}
|
|
183
382
|
}
|
|
184
383
|
if (status === 429) {
|
|
185
|
-
return
|
|
186
|
-
message: 'rate limit exceeded',
|
|
187
|
-
severity: 'warn',
|
|
188
|
-
retryable: true,
|
|
189
|
-
type: 'RATE_LIMIT',
|
|
190
|
-
detail
|
|
191
|
-
}
|
|
384
|
+
return classify429(err, code, detail, opts.provider)
|
|
192
385
|
}
|
|
193
386
|
if (typeof status === 'number' && status >= 500) {
|
|
194
387
|
return {
|
|
@@ -100,7 +100,7 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
100
100
|
if (blocks.length) injectImageBlocks(conversation, blocks)
|
|
101
101
|
} catch (e) {
|
|
102
102
|
log?.warn({ err: e }, '[mohdel:anthropic] image load failed')
|
|
103
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
103
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'anthropic' }) }
|
|
104
104
|
return
|
|
105
105
|
}
|
|
106
106
|
}
|
|
@@ -201,7 +201,7 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
201
201
|
return
|
|
202
202
|
}
|
|
203
203
|
log?.warn({ err: e }, '[mohdel:anthropic] stream failed')
|
|
204
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
204
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'anthropic' }) }
|
|
205
205
|
return
|
|
206
206
|
}
|
|
207
207
|
|
|
@@ -58,7 +58,7 @@ export async function * gemini (envelope, deps = {}) {
|
|
|
58
58
|
if (parts.length) injectParts(contents, parts)
|
|
59
59
|
} catch (e) {
|
|
60
60
|
log?.warn({ err: e }, '[mohdel:gemini] image load failed')
|
|
61
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
61
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'gemini' }) }
|
|
62
62
|
return
|
|
63
63
|
}
|
|
64
64
|
}
|
|
@@ -80,7 +80,7 @@ export async function * gemini (envelope, deps = {}) {
|
|
|
80
80
|
// `typed` lets _videos.js surface PROVIDER_UNAVAILABLE on
|
|
81
81
|
// upload-deadline timeouts; fall back to generic classification.
|
|
82
82
|
const typed = /** @type {any} */(e).typed
|
|
83
|
-
yield { type: 'error', error: typed || classifyProviderError(e, envelope.auth?.key) }
|
|
83
|
+
yield { type: 'error', error: typed || classifyProviderError(e, envelope.auth?.key, { provider: 'gemini' }) }
|
|
84
84
|
return
|
|
85
85
|
}
|
|
86
86
|
}
|
|
@@ -172,7 +172,7 @@ export async function * gemini (envelope, deps = {}) {
|
|
|
172
172
|
return
|
|
173
173
|
}
|
|
174
174
|
log?.warn({ err: e }, '[mohdel:gemini] stream failed')
|
|
175
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
175
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'gemini' }) }
|
|
176
176
|
return
|
|
177
177
|
}
|
|
178
178
|
|
|
@@ -62,7 +62,7 @@ export async function * openai (envelope, deps = {}) {
|
|
|
62
62
|
if (parts.length) injectImageParts(input, parts)
|
|
63
63
|
} catch (e) {
|
|
64
64
|
log?.warn({ err: e }, '[mohdel:openai] image load failed')
|
|
65
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
65
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'openai' }) }
|
|
66
66
|
return
|
|
67
67
|
}
|
|
68
68
|
}
|
|
@@ -161,7 +161,7 @@ export async function * openai (envelope, deps = {}) {
|
|
|
161
161
|
return
|
|
162
162
|
}
|
|
163
163
|
log?.warn({ err: e }, '[mohdel:openai] stream failed')
|
|
164
|
-
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key) }
|
|
164
|
+
yield { type: 'error', error: classifyProviderError(e, envelope.auth?.key, { provider: 'openai' }) }
|
|
165
165
|
return
|
|
166
166
|
}
|
|
167
167
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.105.1",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -83,19 +83,19 @@
|
|
|
83
83
|
}
|
|
84
84
|
},
|
|
85
85
|
"optionalDependencies": {
|
|
86
|
-
"@clack/prompts": "^1.
|
|
87
|
-
"@opentelemetry/exporter-trace-otlp-grpc": "^0.
|
|
88
|
-
"@opentelemetry/sdk-node": "^0.
|
|
86
|
+
"@clack/prompts": "^1.4.0",
|
|
87
|
+
"@opentelemetry/exporter-trace-otlp-grpc": "^0.218.0",
|
|
88
|
+
"@opentelemetry/sdk-node": "^0.218.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.105.1"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
|
-
"@anthropic-ai/sdk": "^0.95.
|
|
93
|
+
"@anthropic-ai/sdk": "^0.95.2",
|
|
94
94
|
"@cerebras/cerebras_cloud_sdk": "^1.61.1",
|
|
95
|
-
"@google/genai": "^2.0
|
|
95
|
+
"@google/genai": "^2.2.0",
|
|
96
96
|
"@opentelemetry/api": "^1.9.1",
|
|
97
97
|
"env-paths": "^4.0.0",
|
|
98
|
-
"groq-sdk": "^1.
|
|
98
|
+
"groq-sdk": "^1.2.0",
|
|
99
99
|
"openai": "^6.37.0",
|
|
100
100
|
"undici": "^7.24.5"
|
|
101
101
|
},
|
|
@@ -104,9 +104,9 @@
|
|
|
104
104
|
},
|
|
105
105
|
"devDependencies": {
|
|
106
106
|
"gpt-tokenizer": "^3.4.0",
|
|
107
|
-
"lint-staged": "^17.0.
|
|
107
|
+
"lint-staged": "^17.0.4",
|
|
108
108
|
"release-it": "^20.0.1",
|
|
109
109
|
"standard": "^17.1.2",
|
|
110
|
-
"vitest": "^4.1.
|
|
110
|
+
"vitest": "^4.1.6"
|
|
111
111
|
}
|
|
112
112
|
}
|