mohdel 0.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +377 -0
- package/config/benchmarks.json +39 -0
- package/js/client/call.js +75 -0
- package/js/client/call_image.js +82 -0
- package/js/client/gate-binary.js +72 -0
- package/js/client/index.js +16 -0
- package/js/client/ndjson.js +29 -0
- package/js/client/transport.js +48 -0
- package/js/core/envelope.js +141 -0
- package/js/core/errors.js +75 -0
- package/js/core/events.js +96 -0
- package/js/core/image.js +58 -0
- package/js/core/index.js +10 -0
- package/js/core/status.js +48 -0
- package/js/factory/bridge.js +372 -0
- package/js/session/_cooldown.js +114 -0
- package/js/session/_logger.js +138 -0
- package/js/session/_rate_limiter.js +77 -0
- package/js/session/_tracing.js +58 -0
- package/js/session/adapters/_cancelled.js +44 -0
- package/js/session/adapters/_catalog.js +58 -0
- package/js/session/adapters/_chat_completions.js +439 -0
- package/js/session/adapters/_errors.js +85 -0
- package/js/session/adapters/_images.js +60 -0
- package/js/session/adapters/_lazy_json_cache.js +76 -0
- package/js/session/adapters/_pricing.js +67 -0
- package/js/session/adapters/_providers.js +60 -0
- package/js/session/adapters/_tools.js +185 -0
- package/js/session/adapters/_videos.js +283 -0
- package/js/session/adapters/anthropic.js +397 -0
- package/js/session/adapters/cerebras.js +28 -0
- package/js/session/adapters/deepseek.js +32 -0
- package/js/session/adapters/echo.js +51 -0
- package/js/session/adapters/fake.js +262 -0
- package/js/session/adapters/fireworks.js +46 -0
- package/js/session/adapters/gemini.js +381 -0
- package/js/session/adapters/groq.js +23 -0
- package/js/session/adapters/image/fake.js +55 -0
- package/js/session/adapters/image/index.js +40 -0
- package/js/session/adapters/image/novita.js +135 -0
- package/js/session/adapters/image/openai.js +50 -0
- package/js/session/adapters/index.js +53 -0
- package/js/session/adapters/mistral.js +31 -0
- package/js/session/adapters/novita.js +29 -0
- package/js/session/adapters/openai.js +381 -0
- package/js/session/adapters/openrouter.js +66 -0
- package/js/session/adapters/xai.js +27 -0
- package/js/session/bin.js +54 -0
- package/js/session/driver.js +160 -0
- package/js/session/index.js +18 -0
- package/js/session/run.js +393 -0
- package/js/session/run_image.js +61 -0
- package/package.json +107 -0
- package/src/cli/ask.js +160 -0
- package/src/cli/backup.js +107 -0
- package/src/cli/bench.js +262 -0
- package/src/cli/check.js +123 -0
- package/src/cli/colored-logger.js +67 -0
- package/src/cli/colors.js +13 -0
- package/src/cli/default.js +39 -0
- package/src/cli/index.js +150 -0
- package/src/cli/json-output.js +60 -0
- package/src/cli/model.js +571 -0
- package/src/cli/onboard.js +232 -0
- package/src/cli/rank.js +176 -0
- package/src/cli/ratelimit.js +160 -0
- package/src/cli/tag.js +105 -0
- package/src/lib/assets/alibaba.svg +1 -0
- package/src/lib/assets/anthropic.svg +5 -0
- package/src/lib/assets/deepseek.svg +1 -0
- package/src/lib/assets/gemini.svg +1 -0
- package/src/lib/assets/google.svg +2 -0
- package/src/lib/assets/kwaipilot.svg +1 -0
- package/src/lib/assets/meta.svg +1 -0
- package/src/lib/assets/minimax.svg +9 -0
- package/src/lib/assets/moonshotai.svg +4 -0
- package/src/lib/assets/openai.svg +5 -0
- package/src/lib/assets/xai.svg +1 -0
- package/src/lib/assets/xiaomi.svg +2 -0
- package/src/lib/assets/zai.svg +219 -0
- package/src/lib/benchmark-score.js +215 -0
- package/src/lib/benchmark-truth.js +68 -0
- package/src/lib/cache.js +76 -0
- package/src/lib/common.js +208 -0
- package/src/lib/cooldown.js +63 -0
- package/src/lib/creators.js +71 -0
- package/src/lib/curated-cache.js +146 -0
- package/src/lib/errors.js +126 -0
- package/src/lib/index.js +726 -0
- package/src/lib/logger.js +29 -0
- package/src/lib/providers.js +87 -0
- package/src/lib/rank.js +390 -0
- package/src/lib/rate-limiter.js +50 -0
- package/src/lib/schema.js +150 -0
- package/src/lib/select.js +474 -0
- package/src/lib/tracing.js +62 -0
- package/src/lib/utils.js +85 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Factory bridge — in-process path from the `mohdel()` factory API
|
|
3
|
+
* to the `/session` runtime.
|
|
4
|
+
*
|
|
5
|
+
* The factory surface is `mohdel().use(model).answer(prompt, options)`
|
|
6
|
+
* returning a `Promise<AnswerResult>`. This module takes those
|
|
7
|
+
* call arguments, builds a `CallEnvelope` (the wire shape
|
|
8
|
+
* documented in `PROTOCOL.md`), drives `run()` / `runImage()`
|
|
9
|
+
* in `js/session/`, and collapses the event stream back into a
|
|
10
|
+
* single result object.
|
|
11
|
+
*
|
|
12
|
+
* The bridge is what the `mo` CLI, library consumers doing
|
|
13
|
+
* `import mohdel from 'mohdel'`, and test harnesses all sit on top
|
|
14
|
+
* of. It's **in-process only** — `thin-gate` is not spawned. For
|
|
15
|
+
* the subprocess path use `mohdel/client` directly.
|
|
16
|
+
*
|
|
17
|
+
* Adapter error events are rethrown as `MohdelError` so caller
|
|
18
|
+
* catch blocks see a thrown exception rather than an event.
|
|
19
|
+
*
|
|
20
|
+
* @module factory/bridge
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import { run } from '../session/run.js'
|
|
24
|
+
import { runImage } from '../session/run_image.js'
|
|
25
|
+
import { MohdelError, Severity } from '../../src/lib/errors.js'
|
|
26
|
+
import { createRealtimeDeltaBuffer } from '../../src/lib/utils.js'
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* @typedef {object} BridgeDeps
|
|
30
|
+
* @property {object} [cooldown] Cooldown tracker injected into run().
|
|
31
|
+
* @property {object} [limiter] Rate limiter injected into run().
|
|
32
|
+
* @property {(provider: string) => any} [resolveProviderLimits]
|
|
33
|
+
* Hook that returns `{rpmLimit, tpmLimit}` for a given provider —
|
|
34
|
+
* lets the factory keep its own `providersConfig` as source of
|
|
35
|
+
* truth instead of the module-level `providers.json` loader.
|
|
36
|
+
* @property {AbortSignal} [signal]
|
|
37
|
+
*/
|
|
38
|
+
|
|
39
|
+
/**
|
|
40
|
+
* Run `answer()` through the /session runtime.
|
|
41
|
+
*
|
|
42
|
+
* ## Option mapping
|
|
43
|
+
*
|
|
44
|
+
* Every factory `answer()` option is either mapped onto the envelope
|
|
45
|
+
* or intentionally ignored. Ignored options are listed explicitly —
|
|
46
|
+
* audit any "missing option" report against this table first.
|
|
47
|
+
*
|
|
48
|
+
* | factory option | envelope destination |
|
|
49
|
+
* |---------------------------------------------------|------------------------------------------------------|
|
|
50
|
+
* | `outputBudget`/`outputType`/`outputStyle`/`outputEffort` | flat fields on envelope |
|
|
51
|
+
* | `images` / `videos` / `cache` | flat fields on envelope |
|
|
52
|
+
* | `tools` / `toolChoice` / `parallelToolCalls` | flat fields on envelope |
|
|
53
|
+
* | `identifier` | envelope.identifier (adapter maps to provider field) |
|
|
54
|
+
* | `realtimeHandler` / `bufferOpts` | drained via createRealtimeDeltaBuffer |
|
|
55
|
+
* | `providerOrder` / `providerAllow` / `providerDeny`| envelope.providerOptions.openrouter |
|
|
56
|
+
* | `traceparent` / `baggage` | envelope transport metadata |
|
|
57
|
+
* | `callId` / `authId` | envelope transport metadata |
|
|
58
|
+
* | `configuration.apiKey` | envelope.auth.key |
|
|
59
|
+
* | **`parentSpan`** | **dropped** — use `traceparent` instead |
|
|
60
|
+
* | **`maybeThrowHandler`** | **dropped** — no factory-side validation hook |
|
|
61
|
+
* | **`configuration.baseURL` / `defaultHeaders` / …**| **rejected** — adapters own baseURL (F24) |
|
|
62
|
+
*
|
|
63
|
+
* @param {object} args
|
|
64
|
+
* @param {string} args.provider Resolved provider name (e.g. 'openai').
|
|
65
|
+
* @param {string} args.model Provider-native model id (no provider prefix).
|
|
66
|
+
* @param {string} args.modelKey Catalog key `<provider>/<model>` — for spec/pricing.
|
|
67
|
+
* @param {any} args.configuration Provider config. Only `apiKey` is threaded; other fields are rejected (F24).
|
|
68
|
+
* @param {string | any[] | {system?: any, messages: any[]}} args.prompt
|
|
69
|
+
* @param {any} [args.options] Factory `answer()` options.
|
|
70
|
+
* @param {BridgeDeps} [deps]
|
|
71
|
+
* @returns {Promise<any>} AnswerResult (matches the factory's return shape).
|
|
72
|
+
*/
|
|
73
|
+
export async function runAnswer ({ provider, model, modelKey, configuration, prompt, options = {} }, deps = {}) {
|
|
74
|
+
const envelope = toEnvelope({ provider, model, configuration, prompt, options })
|
|
75
|
+
|
|
76
|
+
// If the caller passed a `realtimeHandler`, feed every `delta`
|
|
77
|
+
// event into a buffer that invokes the handler on batches matching
|
|
78
|
+
// `bufferOpts` cadence. Without this, streaming callbacks silently
|
|
79
|
+
// never fire — `mo ask --stream` and any integration that relies
|
|
80
|
+
// on streaming callbacks stops working.
|
|
81
|
+
//
|
|
82
|
+
// F54: skip the buffer allocation entirely when no handler was
|
|
83
|
+
// supplied — the common case.
|
|
84
|
+
const deltaBuffer = options.realtimeHandler
|
|
85
|
+
? createRealtimeDeltaBuffer(options.realtimeHandler, options.bufferOpts)
|
|
86
|
+
: null
|
|
87
|
+
|
|
88
|
+
let terminal
|
|
89
|
+
try {
|
|
90
|
+
for await (const ev of run(envelope, deps)) {
|
|
91
|
+
if (ev.type === 'delta' && ev.delta) {
|
|
92
|
+
if (deltaBuffer) deltaBuffer.push(ev.delta.type, ev.delta.delta)
|
|
93
|
+
} else if (ev.type === 'done' || ev.type === 'error') {
|
|
94
|
+
terminal = ev
|
|
95
|
+
break
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
} finally {
|
|
99
|
+
// Flush any pending buffered content regardless of terminal
|
|
100
|
+
// path (success, error, or exception) so the handler sees the
|
|
101
|
+
// tail of the stream.
|
|
102
|
+
deltaBuffer?.flush()
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (!terminal) {
|
|
106
|
+
throw new MohdelError('SESSION_NO_TERMINAL', {
|
|
107
|
+
severity: Severity.ERROR,
|
|
108
|
+
detail: 'session run produced no terminal event',
|
|
109
|
+
retryable: false
|
|
110
|
+
})
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (terminal.type === 'error') {
|
|
114
|
+
throw fromTypedError(terminal.error, { provider, model, modelKey })
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return terminal.result
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Run an `image()` call through the /session runtime.
|
|
122
|
+
*
|
|
123
|
+
* @param {object} args
|
|
124
|
+
* @param {string} args.provider
|
|
125
|
+
* @param {string} args.model
|
|
126
|
+
* @param {any} args.configuration
|
|
127
|
+
* @param {string} args.prompt
|
|
128
|
+
* @param {any} [args.options]
|
|
129
|
+
* @param {any} [args.spec] modelSpec passthrough so the image
|
|
130
|
+
* adapter picks up `imageEndpoint`,
|
|
131
|
+
* `imageDefaultSize`, etc. without
|
|
132
|
+
* re-reading the catalog.
|
|
133
|
+
* @returns {Promise<any>}
|
|
134
|
+
*/
|
|
135
|
+
export async function runAnswerImage ({ provider, model, configuration, prompt, options = {}, spec }) {
|
|
136
|
+
const envelope = {
|
|
137
|
+
callId: options.callId || newCallId(),
|
|
138
|
+
authId: options.authId || 'local',
|
|
139
|
+
auth: configToAuth(configuration),
|
|
140
|
+
provider,
|
|
141
|
+
model,
|
|
142
|
+
prompt
|
|
143
|
+
}
|
|
144
|
+
if (options.size) envelope.size = options.size
|
|
145
|
+
if (options.seed != null) envelope.seed = options.seed
|
|
146
|
+
|
|
147
|
+
const out = await runImage(envelope, spec ? { spec } : {})
|
|
148
|
+
if (!out.ok) throw fromTypedError(out.error, { provider, model })
|
|
149
|
+
return out.result
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
/**
|
|
153
|
+
* @param {object} args
|
|
154
|
+
* @param {string} args.provider
|
|
155
|
+
* @param {string} args.model
|
|
156
|
+
* @param {any} args.configuration
|
|
157
|
+
* @param {string | any[] | {system?: any, messages: any[]}} args.prompt
|
|
158
|
+
* @param {any} args.options
|
|
159
|
+
* @returns {import('#core/envelope.js').CallEnvelope}
|
|
160
|
+
*/
|
|
161
|
+
function toEnvelope ({ provider, model, configuration, prompt, options }) {
|
|
162
|
+
/** @type {import('#core/envelope.js').CallEnvelope} */
|
|
163
|
+
const envelope = {
|
|
164
|
+
callId: options.callId || newCallId(),
|
|
165
|
+
authId: options.authId || 'local',
|
|
166
|
+
auth: configToAuth(configuration),
|
|
167
|
+
provider,
|
|
168
|
+
model,
|
|
169
|
+
prompt: toEnvelopePrompt(prompt)
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
if (options.traceparent) envelope.traceparent = options.traceparent
|
|
173
|
+
if (options.baggage) envelope.baggage = options.baggage
|
|
174
|
+
if (options.outputBudget !== undefined) envelope.outputBudget = options.outputBudget
|
|
175
|
+
if (options.outputType) envelope.outputType = options.outputType
|
|
176
|
+
if (options.outputStyle) envelope.outputStyle = options.outputStyle
|
|
177
|
+
if (options.outputEffort) envelope.outputEffort = options.outputEffort
|
|
178
|
+
if (options.images?.length) envelope.images = options.images
|
|
179
|
+
if (options.videos?.length) envelope.videos = options.videos
|
|
180
|
+
if (options.cache !== undefined) envelope.cache = options.cache
|
|
181
|
+
if (options.tools?.length) envelope.tools = options.tools
|
|
182
|
+
if (options.toolChoice) envelope.toolChoice = options.toolChoice
|
|
183
|
+
if (options.parallelToolCalls === false) envelope.parallelToolCalls = false
|
|
184
|
+
if (options.identifier) envelope.identifier = options.identifier
|
|
185
|
+
|
|
186
|
+
// OpenRouter routing prefs ride in their own bag to keep the flat
|
|
187
|
+
// envelope clean. The openrouter adapter reads this via
|
|
188
|
+
// `config.mutateArgs`.
|
|
189
|
+
if (options.providerOrder || options.providerAllow || options.providerDeny) {
|
|
190
|
+
envelope.providerOptions = {
|
|
191
|
+
openrouter: {
|
|
192
|
+
order: options.providerOrder,
|
|
193
|
+
allow: options.providerAllow,
|
|
194
|
+
deny: options.providerDeny
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return envelope
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/**
|
|
203
|
+
* Re-throw a TypedError as a MohdelError so factory-API caller catch
|
|
204
|
+
* blocks — which duck-type on `.detail` / `.retryable` — keep
|
|
205
|
+
* working without knowing about the session event-stream shape.
|
|
206
|
+
*
|
|
207
|
+
* @param {import('#core/errors.js').TypedError} err
|
|
208
|
+
* @param {{provider: string, model: string, modelKey?: string}} ctx
|
|
209
|
+
* @returns {MohdelError}
|
|
210
|
+
*/
|
|
211
|
+
function fromTypedError (err, ctx) {
|
|
212
|
+
// TypedError `message` is the machine-key label (e.g. "provider error
|
|
213
|
+
// 400"); `detail` carries the provider's own rejection text when it
|
|
214
|
+
// was safe to surface. Prefer the detail so callers see what to fix.
|
|
215
|
+
return new MohdelError(err.type || 'PROVIDER_ERROR', {
|
|
216
|
+
severity: toSeveritySymbol(err.severity),
|
|
217
|
+
detail: err.detail || err.message,
|
|
218
|
+
retryable: !!err.retryable,
|
|
219
|
+
context: { provider: ctx.provider, model: ctx.model }
|
|
220
|
+
})
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
/** @param {string | undefined} s */
|
|
224
|
+
function toSeveritySymbol (s) {
|
|
225
|
+
switch (s) {
|
|
226
|
+
case 'trace': return Severity.TRACE
|
|
227
|
+
case 'debug': return Severity.DEBUG
|
|
228
|
+
case 'info': return Severity.INFO
|
|
229
|
+
case 'warn': return Severity.WARN
|
|
230
|
+
case 'error': return Severity.ERROR
|
|
231
|
+
case 'fatal': return Severity.FATAL
|
|
232
|
+
default: return Severity.ERROR
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
function newCallId () {
|
|
237
|
+
return `m86_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Turn the factory's `configuration` bag into an envelope `auth` object.
|
|
242
|
+
* Accepts `apiKey` + `baseURL`; anything else is rejected with
|
|
243
|
+
* `CONFIGURATION_UNSUPPORTED` rather than silently dropped — otherwise
|
|
244
|
+
* a caller could believe their proxy / org header / timeout is in
|
|
245
|
+
* effect when it isn't, which in the worst case leaks auth keys and
|
|
246
|
+
* prompts to a provider they never intended to reach.
|
|
247
|
+
*
|
|
248
|
+
* @param {any} configuration
|
|
249
|
+
* @returns {import('#core/envelope.js').Auth}
|
|
250
|
+
*/
|
|
251
|
+
const ALLOWED_CONFIG_KEYS = new Set(['apiKey', 'baseURL'])
|
|
252
|
+
|
|
253
|
+
function configToAuth (configuration) {
|
|
254
|
+
if (!configuration) return { key: '' }
|
|
255
|
+
const unsupported = Object.keys(configuration).filter(k => !ALLOWED_CONFIG_KEYS.has(k))
|
|
256
|
+
if (unsupported.length > 0) {
|
|
257
|
+
throw new MohdelError('CONFIGURATION_UNSUPPORTED', {
|
|
258
|
+
severity: Severity.ERROR,
|
|
259
|
+
detail:
|
|
260
|
+
'per-call SDK configuration is limited to `apiKey` and `baseURL`. ' +
|
|
261
|
+
`Unsupported keys: ${unsupported.join(', ')}. ` +
|
|
262
|
+
'Move other fields to environment variables or pin them at factory construction.',
|
|
263
|
+
retryable: false
|
|
264
|
+
})
|
|
265
|
+
}
|
|
266
|
+
const auth = { key: configuration.apiKey || '' }
|
|
267
|
+
if (configuration.baseURL) auth.baseURL = configuration.baseURL
|
|
268
|
+
return auth
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* Translate the factory's structured-input shape
|
|
273
|
+
* (`{ system?, messages: [{role, content, toolCalls?, toolCallId?, toolName?}] }`)
|
|
274
|
+
* into the envelope `Message[]` expected by session adapters. Plain
|
|
275
|
+
* strings and pre-shaped arrays pass through untouched.
|
|
276
|
+
*
|
|
277
|
+
* Role mapping:
|
|
278
|
+
* - factory `tool_result` → envelope `tool` (carrying `toolCallId`,
|
|
279
|
+
* `content`, and optional `name` from `toolName`).
|
|
280
|
+
* - `assistant.toolCalls` carries through as-is onto the envelope
|
|
281
|
+
* Message so adapters can emit the provider-native tool_use.
|
|
282
|
+
*
|
|
283
|
+
* @param {unknown} prompt
|
|
284
|
+
* @returns {string | import('#core/envelope.js').Message[]}
|
|
285
|
+
*/
|
|
286
|
+
function toEnvelopePrompt (prompt) {
|
|
287
|
+
if (typeof prompt === 'string') return prompt
|
|
288
|
+
if (Array.isArray(prompt)) return /** @type {any} */(prompt)
|
|
289
|
+
|
|
290
|
+
if (prompt && typeof prompt === 'object' && Array.isArray(prompt.messages)) {
|
|
291
|
+
const out = []
|
|
292
|
+
if (prompt.system != null) {
|
|
293
|
+
out.push({
|
|
294
|
+
role: 'system',
|
|
295
|
+
content: flattenSystem(prompt.system)
|
|
296
|
+
})
|
|
297
|
+
}
|
|
298
|
+
for (const m of prompt.messages) {
|
|
299
|
+
if (m.role === 'tool_result') {
|
|
300
|
+
const msg = {
|
|
301
|
+
role: 'tool',
|
|
302
|
+
content: m.content ?? ''
|
|
303
|
+
}
|
|
304
|
+
if (m.toolCallId) msg.toolCallId = m.toolCallId
|
|
305
|
+
if (m.toolName) msg.toolName = m.toolName
|
|
306
|
+
out.push(msg)
|
|
307
|
+
} else if (m.role === 'assistant' && Array.isArray(m.toolCalls) && m.toolCalls.length) {
|
|
308
|
+
const msg = {
|
|
309
|
+
role: 'assistant',
|
|
310
|
+
content: m.content ?? '',
|
|
311
|
+
toolCalls: m.toolCalls.map(tc => {
|
|
312
|
+
const out = {
|
|
313
|
+
id: tc.id,
|
|
314
|
+
name: tc.name,
|
|
315
|
+
arguments: tc.arguments ?? {}
|
|
316
|
+
}
|
|
317
|
+
// Preserve provider-specific bits so a subsequent replay
|
|
318
|
+
// reaches the adapter intact. Gemini rejects a tool-call
|
|
319
|
+
// replay whose original `thoughtSignature` has been stripped.
|
|
320
|
+
if (tc.thoughtSignature) out.thoughtSignature = tc.thoughtSignature
|
|
321
|
+
return out
|
|
322
|
+
})
|
|
323
|
+
}
|
|
324
|
+
out.push(msg)
|
|
325
|
+
} else {
|
|
326
|
+
out.push({ role: m.role, content: m.content ?? '' })
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return out
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// Unknown shape — reject early with a clear error. Letting this
|
|
333
|
+
// fall through would land a raw non-iterable in the envelope and
|
|
334
|
+
// produce a confusing `prompt.map is not a function` deep inside
|
|
335
|
+
// the adapter.
|
|
336
|
+
throw new MohdelError('SESSION_INVALID_PROMPT', {
|
|
337
|
+
severity: Severity.ERROR,
|
|
338
|
+
retryable: false,
|
|
339
|
+
detail:
|
|
340
|
+
'prompt must be a string, a Message[] array, ' +
|
|
341
|
+
'or { system?, messages: [...] }; received ' +
|
|
342
|
+
describePromptShape(prompt)
|
|
343
|
+
})
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/** @param {unknown} v */
|
|
347
|
+
function describePromptShape (v) {
|
|
348
|
+
if (v === null) return 'null'
|
|
349
|
+
if (v === undefined) return 'undefined'
|
|
350
|
+
if (typeof v !== 'object') return typeof v
|
|
351
|
+
if ('messages' in v && !Array.isArray(/** @type {any} */(v).messages)) {
|
|
352
|
+
return 'object with non-array messages'
|
|
353
|
+
}
|
|
354
|
+
return 'object without messages'
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* The factory accepts `system` as either a plain string or an array
|
|
359
|
+
* of `{text, cache?}` blocks (for Anthropic prompt caching). The
|
|
360
|
+
* envelope's `Message.content` is string-or-MessagePart[]; flatten
|
|
361
|
+
* blocks to a single string here. Callers who need Anthropic
|
|
362
|
+
* cache-control blocks preserved need a separate path.
|
|
363
|
+
*
|
|
364
|
+
* @param {string | Array<{text?: string}>} system
|
|
365
|
+
*/
|
|
366
|
+
function flattenSystem (system) {
|
|
367
|
+
if (typeof system === 'string') return system
|
|
368
|
+
if (Array.isArray(system)) {
|
|
369
|
+
return system.map(b => b?.text ?? '').filter(Boolean).join('\n')
|
|
370
|
+
}
|
|
371
|
+
return String(system ?? '')
|
|
372
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Consecutive-failure cooldown tracker.
|
|
3
|
+
*
|
|
4
|
+
* - Auth failures (401/403) trigger immediate cooldown (1 failure).
|
|
5
|
+
* - Other retryable failures require `threshold` consecutive
|
|
6
|
+
* failures before cooldown triggers.
|
|
7
|
+
* - Success resets the failure counter.
|
|
8
|
+
* - `check`/`throwIfCoolingDown` expires the entry when the
|
|
9
|
+
* cooldown window passes.
|
|
10
|
+
*
|
|
11
|
+
* Configuration comes from env vars (session-level, global):
|
|
12
|
+
* - MOHDEL_COOLDOWN_THRESHOLD (default 3)
|
|
13
|
+
* - MOHDEL_COOLDOWN_DURATION_MS (default 60_000)
|
|
14
|
+
*
|
|
15
|
+
* State is session-process-local.
|
|
16
|
+
*
|
|
17
|
+
* @module session/cooldown
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
const DEFAULT_THRESHOLD = 3
|
|
21
|
+
const DEFAULT_DURATION_MS = 60_000
|
|
22
|
+
|
|
23
|
+
function envInt (name, fallback) {
|
|
24
|
+
const v = process.env[name]
|
|
25
|
+
if (!v) return fallback
|
|
26
|
+
const n = Number.parseInt(v, 10)
|
|
27
|
+
return Number.isFinite(n) && n > 0 ? n : fallback
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export function createCooldownTracker (
|
|
31
|
+
threshold = envInt('MOHDEL_COOLDOWN_THRESHOLD', DEFAULT_THRESHOLD),
|
|
32
|
+
durationMs = envInt('MOHDEL_COOLDOWN_DURATION_MS', DEFAULT_DURATION_MS)
|
|
33
|
+
) {
|
|
34
|
+
/** @type {Map<string, {failCount: number, until: number | null, reason: string | null}>} */
|
|
35
|
+
const entries = new Map()
|
|
36
|
+
|
|
37
|
+
/** @param {string} key */
|
|
38
|
+
const check = (key) => {
|
|
39
|
+
const entry = entries.get(key)
|
|
40
|
+
if (!entry || !entry.until) return null
|
|
41
|
+
if (Date.now() >= entry.until) {
|
|
42
|
+
entries.delete(key)
|
|
43
|
+
return null
|
|
44
|
+
}
|
|
45
|
+
return entry
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Records a failure. Returns `true` only if this call caused a
|
|
50
|
+
* **fresh** cooldown activation — no active window existed (or the
|
|
51
|
+
* previous one had expired). Late failures during an active window
|
|
52
|
+
* increment `failCount` (diagnostics) but do NOT push `until`
|
|
53
|
+
* forward; the cooldown is set-once, waited-out, reset on success.
|
|
54
|
+
* Without this freeze, concurrent failures racing past the
|
|
55
|
+
* pre-dispatch check would keep sliding the deadline and the user
|
|
56
|
+
* would effectively never recover.
|
|
57
|
+
*
|
|
58
|
+
* @param {string} key
|
|
59
|
+
* @param {{immediate?: boolean}} [options]
|
|
60
|
+
* @returns {boolean} true if this call freshly activated the cooldown
|
|
61
|
+
*/
|
|
62
|
+
const recordFailure = (key, { immediate = false } = {}) => {
|
|
63
|
+
const entry = entries.get(key) || { failCount: 0, until: null, reason: null }
|
|
64
|
+
entry.failCount++
|
|
65
|
+
const shouldCooldown = immediate || entry.failCount >= threshold
|
|
66
|
+
if (shouldCooldown) {
|
|
67
|
+
const now = Date.now()
|
|
68
|
+
if (entry.until == null || now >= entry.until) {
|
|
69
|
+
entry.until = now + durationMs
|
|
70
|
+
entry.reason = immediate ? 'auth' : 'consecutive_failures'
|
|
71
|
+
entries.set(key, entry)
|
|
72
|
+
return true
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
entries.set(key, entry)
|
|
76
|
+
return false
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** @param {string} key */
|
|
80
|
+
const reset = (key) => {
|
|
81
|
+
entries.delete(key)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Returns a TypedError shape if the bucket is cooling down, else
|
|
86
|
+
* undefined. Caller yields it as a `call.error` event.
|
|
87
|
+
*
|
|
88
|
+
* @param {string} key
|
|
89
|
+
* @returns {import('#core/errors.js').TypedError | undefined}
|
|
90
|
+
*/
|
|
91
|
+
const coolingDownError = (key) => {
|
|
92
|
+
const entry = check(key)
|
|
93
|
+
if (!entry) return undefined
|
|
94
|
+
const secsLeft = Math.ceil((entry.until - Date.now()) / 1000)
|
|
95
|
+
return {
|
|
96
|
+
message: 'provider in cooldown',
|
|
97
|
+
detail: `${key} is in cooldown for ${secsLeft}s after ${entry.failCount} consecutive failures (${entry.reason})`,
|
|
98
|
+
severity: 'warn',
|
|
99
|
+
retryable: true,
|
|
100
|
+
type: 'PROVIDER_COOLDOWN'
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return { check, recordFailure, reset, coolingDownError, threshold, durationMs }
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
// Single session-local tracker. Re-exported as named members so
|
|
108
|
+
// callers can `import * as cooldown from './_cooldown.js'` and use
|
|
109
|
+
// `cooldown.reset(key)` / `cooldown.coolingDownError(key)`.
|
|
110
|
+
const defaultTracker = createCooldownTracker()
|
|
111
|
+
export const check = defaultTracker.check
|
|
112
|
+
export const recordFailure = defaultTracker.recordFailure
|
|
113
|
+
export const reset = defaultTracker.reset
|
|
114
|
+
export const coolingDownError = defaultTracker.coolingDownError
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Session-process logger.
|
|
3
|
+
*
|
|
4
|
+
* Minimal pino-shape writer with no pino dependency. Writes one
|
|
5
|
+
* newline-delimited JSON object per call to **stderr** — stdout is
|
|
6
|
+
* reserved for the NDJSON event stream. Every line carries the
|
|
7
|
+
* log level, subsystem message, and any structured fields the caller
|
|
8
|
+
* included (including `traceId` / `spanId` for SigNoz / Jaeger /
|
|
9
|
+
* Honeycomb correlation).
|
|
10
|
+
*
|
|
11
|
+
* Verbosity tier (`MOHDEL_VERBOSITY`, 0/1/2) matches the LOGGING.md
|
|
12
|
+
* spec. Level gate (`MOHDEL_LOG_LEVEL`, one of trace/debug/info/warn/
|
|
13
|
+
* error/fatal/silent) is the standard severity filter applied on top
|
|
14
|
+
* of the tier.
|
|
15
|
+
*
|
|
16
|
+
* @module session/logger
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
const LEVELS = {
|
|
20
|
+
trace: 10,
|
|
21
|
+
debug: 20,
|
|
22
|
+
info: 30,
|
|
23
|
+
warn: 40,
|
|
24
|
+
error: 50,
|
|
25
|
+
fatal: 60,
|
|
26
|
+
silent: 100
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Real sessions want "warn" (anomalies visible) but tests would
|
|
30
|
+
// otherwise flood stderr with the lines we deliberately induce.
|
|
31
|
+
// Under vitest, default to silent unless MOHDEL_LOG_LEVEL is set
|
|
32
|
+
// explicitly.
|
|
33
|
+
const DEFAULT_LEVEL = process.env.VITEST ? 'silent' : 'warn'
|
|
34
|
+
const DEFAULT_VERBOSITY = 1
|
|
35
|
+
|
|
36
|
+
function envInt (name, fallback) {
|
|
37
|
+
const v = process.env[name]
|
|
38
|
+
if (!v) return fallback
|
|
39
|
+
const n = Number.parseInt(v, 10)
|
|
40
|
+
return Number.isFinite(n) ? n : fallback
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
function envLevel (name, fallback) {
|
|
44
|
+
const v = process.env[name]
|
|
45
|
+
if (!v) return fallback
|
|
46
|
+
const k = String(v).toLowerCase()
|
|
47
|
+
return Object.prototype.hasOwnProperty.call(LEVELS, k) ? k : fallback
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* @param {{level?: string, verbosity?: number, stream?: NodeJS.WritableStream, context?: object}} [opts]
|
|
52
|
+
*/
|
|
53
|
+
export function createLogger (opts = {}) {
|
|
54
|
+
const level = opts.level ?? envLevel('MOHDEL_LOG_LEVEL', DEFAULT_LEVEL)
|
|
55
|
+
const verbosity = opts.verbosity ?? envInt('MOHDEL_VERBOSITY', DEFAULT_VERBOSITY)
|
|
56
|
+
const stream = opts.stream ?? process.stderr
|
|
57
|
+
const baseContext = { ...(opts.context ?? {}) }
|
|
58
|
+
const threshold = LEVELS[level] ?? LEVELS[DEFAULT_LEVEL]
|
|
59
|
+
|
|
60
|
+
const emit = (lvl, firstArg, msg) => {
|
|
61
|
+
if (LEVELS[lvl] < threshold) return
|
|
62
|
+
const line = { level: lvl, time: Date.now(), ...baseContext }
|
|
63
|
+
if (typeof firstArg === 'string') {
|
|
64
|
+
line.msg = firstArg
|
|
65
|
+
} else if (firstArg && typeof firstArg === 'object') {
|
|
66
|
+
// Stack traces can harbor sensitive data from some SDKs (tokens,
|
|
67
|
+
// request bodies passed into Error constructors). Keep them only
|
|
68
|
+
// at the verbose levels (trace/debug), which are off by default
|
|
69
|
+
// in production.
|
|
70
|
+
mergeFields(line, firstArg, LEVELS[lvl] <= LEVELS.debug)
|
|
71
|
+
if (msg !== undefined) line.msg = msg
|
|
72
|
+
}
|
|
73
|
+
try {
|
|
74
|
+
stream.write(JSON.stringify(line) + '\n')
|
|
75
|
+
} catch {
|
|
76
|
+
// swallow: stderr write failures must not take down the session
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
const trace = (fields, msg) => emit('trace', fields, msg)
|
|
81
|
+
const debug = (fields, msg) => emit('debug', fields, msg)
|
|
82
|
+
const info = (fields, msg) => emit('info', fields, msg)
|
|
83
|
+
const warn = (fields, msg) => emit('warn', fields, msg)
|
|
84
|
+
const error = (fields, msg) => emit('error', fields, msg)
|
|
85
|
+
const fatal = (fields, msg) => emit('fatal', fields, msg)
|
|
86
|
+
|
|
87
|
+
/**
|
|
88
|
+
* @param {object} extra
|
|
89
|
+
*/
|
|
90
|
+
const withContext = (extra) => createLogger({
|
|
91
|
+
level,
|
|
92
|
+
verbosity,
|
|
93
|
+
stream,
|
|
94
|
+
context: { ...baseContext, ...extra }
|
|
95
|
+
})
|
|
96
|
+
|
|
97
|
+
return Object.freeze({
|
|
98
|
+
level,
|
|
99
|
+
verbosity,
|
|
100
|
+
trace,
|
|
101
|
+
debug,
|
|
102
|
+
info,
|
|
103
|
+
warn,
|
|
104
|
+
error,
|
|
105
|
+
fatal,
|
|
106
|
+
withContext
|
|
107
|
+
})
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Merge structured log fields. Errors are flattened to a stable shape
|
|
112
|
+
* so `JSON.stringify` doesn't drop the message.
|
|
113
|
+
*
|
|
114
|
+
* @param {object} target
|
|
115
|
+
* @param {object} source
|
|
116
|
+
* @param {boolean} includeStack
|
|
117
|
+
*/
|
|
118
|
+
function mergeFields (target, source, includeStack) {
|
|
119
|
+
for (const [k, v] of Object.entries(source)) {
|
|
120
|
+
if (v instanceof Error) {
|
|
121
|
+
target[k] = {
|
|
122
|
+
message: v.message,
|
|
123
|
+
name: v.name,
|
|
124
|
+
...(includeStack && v.stack ? { stack: v.stack } : {}),
|
|
125
|
+
...(v.code ? { code: v.code } : {}),
|
|
126
|
+
...(v.status ? { status: v.status } : {})
|
|
127
|
+
}
|
|
128
|
+
} else {
|
|
129
|
+
target[k] = v
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// Module-level default — used when a caller doesn't pass their own.
|
|
135
|
+
// Keeping the stream open lets adapters import the singleton
|
|
136
|
+
// directly; `withContext` spawns scoped children per call without
|
|
137
|
+
// allocating a new writer.
|
|
138
|
+
export const logger = createLogger()
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minute-bucket rate limiter (per-key: provider or provider/model).
|
|
3
|
+
*
|
|
4
|
+
* Tracks RPM and TPM. Returns ms to wait if over limit — throttles
|
|
5
|
+
* rather than rejecting, so the caller can absorb small bursts
|
|
6
|
+
* without a 429 round-trip.
|
|
7
|
+
*
|
|
8
|
+
* State is session-process-local. In a pool, each session has its
|
|
9
|
+
* own counters — total throughput ~= pool_size × limit. For strict
|
|
10
|
+
* cross-process enforcement, configure it at thin-gate level via
|
|
11
|
+
* the `QuotaPolicy` hook.
|
|
12
|
+
*
|
|
13
|
+
* @module session/rate-limiter
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
export function createRateLimiter () {
|
|
17
|
+
/** @type {Map<string, {count: number, tokens: number, minute: number}>} */
|
|
18
|
+
const buckets = new Map()
|
|
19
|
+
|
|
20
|
+
const currentMinute = () => Math.floor(Date.now() / 60000)
|
|
21
|
+
|
|
22
|
+
/** @param {string} key */
|
|
23
|
+
const getBucket = (key) => {
|
|
24
|
+
const minute = currentMinute()
|
|
25
|
+
const b = buckets.get(key)
|
|
26
|
+
if (b && b.minute === minute) return b
|
|
27
|
+
const fresh = { count: 0, tokens: 0, minute }
|
|
28
|
+
buckets.set(key, fresh)
|
|
29
|
+
return fresh
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** @param {number} minute */
|
|
33
|
+
const msUntilNextMinute = (minute) => Math.max(0, (minute + 1) * 60000 - Date.now())
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Returns ms to wait before sending. 0 means go ahead.
|
|
37
|
+
*
|
|
38
|
+
* Semantics:
|
|
39
|
+
* - `undefined` / `null` on a dimension → no limit configured,
|
|
40
|
+
* skipped.
|
|
41
|
+
* - `0` → **deny all** (killswitch). `msUntilNextMinute` is
|
|
42
|
+
* returned regardless of the current bucket.
|
|
43
|
+
* - positive number → throttle at that value.
|
|
44
|
+
*
|
|
45
|
+
* @param {string} key
|
|
46
|
+
* @param {{rpmLimit?: number, tpmLimit?: number}} limits
|
|
47
|
+
* @returns {number}
|
|
48
|
+
*/
|
|
49
|
+
const check = (key, { rpmLimit, tpmLimit } = {}) => {
|
|
50
|
+
if (rpmLimit == null && tpmLimit == null) return 0
|
|
51
|
+
const b = getBucket(key)
|
|
52
|
+
if (rpmLimit != null && b.count >= rpmLimit) return msUntilNextMinute(b.minute)
|
|
53
|
+
if (tpmLimit != null && b.tokens >= tpmLimit) return msUntilNextMinute(b.minute)
|
|
54
|
+
return 0
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/** @param {string} key */
|
|
58
|
+
const recordRequest = (key) => {
|
|
59
|
+
getBucket(key).count++
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
/**
|
|
63
|
+
* @param {string} key
|
|
64
|
+
* @param {number} tokens
|
|
65
|
+
*/
|
|
66
|
+
const recordTokens = (key, tokens) => {
|
|
67
|
+
getBucket(key).tokens += tokens
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
return { check, recordRequest, recordTokens }
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Single session-local instance.
|
|
74
|
+
const defaultLimiter = createRateLimiter()
|
|
75
|
+
export const check = defaultLimiter.check
|
|
76
|
+
export const recordRequest = defaultLimiter.recordRequest
|
|
77
|
+
export const recordTokens = defaultLimiter.recordTokens
|