mohdel 0.90.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +377 -0
  3. package/config/benchmarks.json +39 -0
  4. package/js/client/call.js +75 -0
  5. package/js/client/call_image.js +82 -0
  6. package/js/client/gate-binary.js +72 -0
  7. package/js/client/index.js +16 -0
  8. package/js/client/ndjson.js +29 -0
  9. package/js/client/transport.js +48 -0
  10. package/js/core/envelope.js +141 -0
  11. package/js/core/errors.js +75 -0
  12. package/js/core/events.js +96 -0
  13. package/js/core/image.js +58 -0
  14. package/js/core/index.js +10 -0
  15. package/js/core/status.js +48 -0
  16. package/js/factory/bridge.js +372 -0
  17. package/js/session/_cooldown.js +114 -0
  18. package/js/session/_logger.js +138 -0
  19. package/js/session/_rate_limiter.js +77 -0
  20. package/js/session/_tracing.js +58 -0
  21. package/js/session/adapters/_cancelled.js +44 -0
  22. package/js/session/adapters/_catalog.js +58 -0
  23. package/js/session/adapters/_chat_completions.js +439 -0
  24. package/js/session/adapters/_errors.js +85 -0
  25. package/js/session/adapters/_images.js +60 -0
  26. package/js/session/adapters/_lazy_json_cache.js +76 -0
  27. package/js/session/adapters/_pricing.js +67 -0
  28. package/js/session/adapters/_providers.js +60 -0
  29. package/js/session/adapters/_tools.js +185 -0
  30. package/js/session/adapters/_videos.js +283 -0
  31. package/js/session/adapters/anthropic.js +397 -0
  32. package/js/session/adapters/cerebras.js +28 -0
  33. package/js/session/adapters/deepseek.js +32 -0
  34. package/js/session/adapters/echo.js +51 -0
  35. package/js/session/adapters/fake.js +262 -0
  36. package/js/session/adapters/fireworks.js +46 -0
  37. package/js/session/adapters/gemini.js +381 -0
  38. package/js/session/adapters/groq.js +23 -0
  39. package/js/session/adapters/image/fake.js +55 -0
  40. package/js/session/adapters/image/index.js +40 -0
  41. package/js/session/adapters/image/novita.js +135 -0
  42. package/js/session/adapters/image/openai.js +50 -0
  43. package/js/session/adapters/index.js +53 -0
  44. package/js/session/adapters/mistral.js +31 -0
  45. package/js/session/adapters/novita.js +29 -0
  46. package/js/session/adapters/openai.js +381 -0
  47. package/js/session/adapters/openrouter.js +66 -0
  48. package/js/session/adapters/xai.js +27 -0
  49. package/js/session/bin.js +54 -0
  50. package/js/session/driver.js +160 -0
  51. package/js/session/index.js +18 -0
  52. package/js/session/run.js +393 -0
  53. package/js/session/run_image.js +61 -0
  54. package/package.json +107 -0
  55. package/src/cli/ask.js +160 -0
  56. package/src/cli/backup.js +107 -0
  57. package/src/cli/bench.js +262 -0
  58. package/src/cli/check.js +123 -0
  59. package/src/cli/colored-logger.js +67 -0
  60. package/src/cli/colors.js +13 -0
  61. package/src/cli/default.js +39 -0
  62. package/src/cli/index.js +150 -0
  63. package/src/cli/json-output.js +60 -0
  64. package/src/cli/model.js +571 -0
  65. package/src/cli/onboard.js +232 -0
  66. package/src/cli/rank.js +176 -0
  67. package/src/cli/ratelimit.js +160 -0
  68. package/src/cli/tag.js +105 -0
  69. package/src/lib/assets/alibaba.svg +1 -0
  70. package/src/lib/assets/anthropic.svg +5 -0
  71. package/src/lib/assets/deepseek.svg +1 -0
  72. package/src/lib/assets/gemini.svg +1 -0
  73. package/src/lib/assets/google.svg +2 -0
  74. package/src/lib/assets/kwaipilot.svg +1 -0
  75. package/src/lib/assets/meta.svg +1 -0
  76. package/src/lib/assets/minimax.svg +9 -0
  77. package/src/lib/assets/moonshotai.svg +4 -0
  78. package/src/lib/assets/openai.svg +5 -0
  79. package/src/lib/assets/xai.svg +1 -0
  80. package/src/lib/assets/xiaomi.svg +2 -0
  81. package/src/lib/assets/zai.svg +219 -0
  82. package/src/lib/benchmark-score.js +215 -0
  83. package/src/lib/benchmark-truth.js +68 -0
  84. package/src/lib/cache.js +76 -0
  85. package/src/lib/common.js +208 -0
  86. package/src/lib/cooldown.js +63 -0
  87. package/src/lib/creators.js +71 -0
  88. package/src/lib/curated-cache.js +146 -0
  89. package/src/lib/errors.js +126 -0
  90. package/src/lib/index.js +726 -0
  91. package/src/lib/logger.js +29 -0
  92. package/src/lib/providers.js +87 -0
  93. package/src/lib/rank.js +390 -0
  94. package/src/lib/rate-limiter.js +50 -0
  95. package/src/lib/schema.js +150 -0
  96. package/src/lib/select.js +474 -0
  97. package/src/lib/tracing.js +62 -0
  98. package/src/lib/utils.js +85 -0
@@ -0,0 +1,372 @@
1
+ /**
2
+ * Factory bridge — in-process path from the `mohdel()` factory API
3
+ * to the `/session` runtime.
4
+ *
5
+ * The factory surface is `mohdel().use(model).answer(prompt, options)`
6
+ * returning a `Promise<AnswerResult>`. This module takes those
7
+ * call arguments, builds a `CallEnvelope` (the wire shape
8
+ * documented in `PROTOCOL.md`), drives `run()` / `runImage()`
9
+ * in `js/session/`, and collapses the event stream back into a
10
+ * single result object.
11
+ *
12
+ * The bridge is what the `mo` CLI, library consumers doing
13
+ * `import mohdel from 'mohdel'`, and test harnesses all sit on top
14
+ * of. It's **in-process only** — `thin-gate` is not spawned. For
15
+ * the subprocess path use `mohdel/client` directly.
16
+ *
17
+ * Adapter error events are rethrown as `MohdelError` so caller
18
+ * catch blocks see a thrown exception rather than an event.
19
+ *
20
+ * @module factory/bridge
21
+ */
22
+
23
+ import { run } from '../session/run.js'
24
+ import { runImage } from '../session/run_image.js'
25
+ import { MohdelError, Severity } from '../../src/lib/errors.js'
26
+ import { createRealtimeDeltaBuffer } from '../../src/lib/utils.js'
27
+
28
+ /**
29
+ * @typedef {object} BridgeDeps
30
+ * @property {object} [cooldown] Cooldown tracker injected into run().
31
+ * @property {object} [limiter] Rate limiter injected into run().
32
+ * @property {(provider: string) => any} [resolveProviderLimits]
33
+ * Hook that returns `{rpmLimit, tpmLimit}` for a given provider —
34
+ * lets the factory keep its own `providersConfig` as source of
35
+ * truth instead of the module-level `providers.json` loader.
36
+ * @property {AbortSignal} [signal]
37
+ */
38
+
39
+ /**
40
+ * Run `answer()` through the /session runtime.
41
+ *
42
+ * ## Option mapping
43
+ *
44
+ * Every factory `answer()` option is either mapped onto the envelope
45
+ * or intentionally ignored. Ignored options are listed explicitly —
46
+ * audit any "missing option" report against this table first.
47
+ *
48
+ * | factory option | envelope destination |
49
+ * |---------------------------------------------------|------------------------------------------------------|
50
+ * | `outputBudget`/`outputType`/`outputStyle`/`outputEffort` | flat fields on envelope |
51
+ * | `images` / `videos` / `cache` | flat fields on envelope |
52
+ * | `tools` / `toolChoice` / `parallelToolCalls` | flat fields on envelope |
53
+ * | `identifier` | envelope.identifier (adapter maps to provider field) |
54
+ * | `realtimeHandler` / `bufferOpts` | drained via createRealtimeDeltaBuffer |
55
+ * | `providerOrder` / `providerAllow` / `providerDeny`| envelope.providerOptions.openrouter |
56
+ * | `traceparent` / `baggage` | envelope transport metadata |
57
+ * | `callId` / `authId` | envelope transport metadata |
58
+ * | `configuration.apiKey` | envelope.auth.key |
59
+ * | **`parentSpan`** | **dropped** — use `traceparent` instead |
60
+ * | **`maybeThrowHandler`** | **dropped** — no factory-side validation hook |
61
+ * | **`configuration.baseURL` / `defaultHeaders` / …**| **rejected** — adapters own baseURL (F24) |
62
+ *
63
+ * @param {object} args
64
+ * @param {string} args.provider Resolved provider name (e.g. 'openai').
65
+ * @param {string} args.model Provider-native model id (no provider prefix).
66
+ * @param {string} args.modelKey Catalog key `<provider>/<model>` — for spec/pricing.
67
+ * @param {any} args.configuration Provider config. Only `apiKey` is threaded; other fields are rejected (F24).
68
+ * @param {string | any[] | {system?: any, messages: any[]}} args.prompt
69
+ * @param {any} [args.options] Factory `answer()` options.
70
+ * @param {BridgeDeps} [deps]
71
+ * @returns {Promise<any>} AnswerResult (matches the factory's return shape).
72
+ */
73
+ export async function runAnswer ({ provider, model, modelKey, configuration, prompt, options = {} }, deps = {}) {
74
+ const envelope = toEnvelope({ provider, model, configuration, prompt, options })
75
+
76
+ // If the caller passed a `realtimeHandler`, feed every `delta`
77
+ // event into a buffer that invokes the handler on batches matching
78
+ // `bufferOpts` cadence. Without this, streaming callbacks silently
79
+ // never fire — `mo ask --stream` and any integration that relies
80
+ // on streaming callbacks stops working.
81
+ //
82
+ // F54: skip the buffer allocation entirely when no handler was
83
+ // supplied — the common case.
84
+ const deltaBuffer = options.realtimeHandler
85
+ ? createRealtimeDeltaBuffer(options.realtimeHandler, options.bufferOpts)
86
+ : null
87
+
88
+ let terminal
89
+ try {
90
+ for await (const ev of run(envelope, deps)) {
91
+ if (ev.type === 'delta' && ev.delta) {
92
+ if (deltaBuffer) deltaBuffer.push(ev.delta.type, ev.delta.delta)
93
+ } else if (ev.type === 'done' || ev.type === 'error') {
94
+ terminal = ev
95
+ break
96
+ }
97
+ }
98
+ } finally {
99
+ // Flush any pending buffered content regardless of terminal
100
+ // path (success, error, or exception) so the handler sees the
101
+ // tail of the stream.
102
+ deltaBuffer?.flush()
103
+ }
104
+
105
+ if (!terminal) {
106
+ throw new MohdelError('SESSION_NO_TERMINAL', {
107
+ severity: Severity.ERROR,
108
+ detail: 'session run produced no terminal event',
109
+ retryable: false
110
+ })
111
+ }
112
+
113
+ if (terminal.type === 'error') {
114
+ throw fromTypedError(terminal.error, { provider, model, modelKey })
115
+ }
116
+
117
+ return terminal.result
118
+ }
119
+
120
+ /**
121
+ * Run an `image()` call through the /session runtime.
122
+ *
123
+ * @param {object} args
124
+ * @param {string} args.provider
125
+ * @param {string} args.model
126
+ * @param {any} args.configuration
127
+ * @param {string} args.prompt
128
+ * @param {any} [args.options]
129
+ * @param {any} [args.spec] modelSpec passthrough so the image
130
+ * adapter picks up `imageEndpoint`,
131
+ * `imageDefaultSize`, etc. without
132
+ * re-reading the catalog.
133
+ * @returns {Promise<any>}
134
+ */
135
+ export async function runAnswerImage ({ provider, model, configuration, prompt, options = {}, spec }) {
136
+ const envelope = {
137
+ callId: options.callId || newCallId(),
138
+ authId: options.authId || 'local',
139
+ auth: configToAuth(configuration),
140
+ provider,
141
+ model,
142
+ prompt
143
+ }
144
+ if (options.size) envelope.size = options.size
145
+ if (options.seed != null) envelope.seed = options.seed
146
+
147
+ const out = await runImage(envelope, spec ? { spec } : {})
148
+ if (!out.ok) throw fromTypedError(out.error, { provider, model })
149
+ return out.result
150
+ }
151
+
152
+ /**
153
+ * @param {object} args
154
+ * @param {string} args.provider
155
+ * @param {string} args.model
156
+ * @param {any} args.configuration
157
+ * @param {string | any[] | {system?: any, messages: any[]}} args.prompt
158
+ * @param {any} args.options
159
+ * @returns {import('#core/envelope.js').CallEnvelope}
160
+ */
161
+ function toEnvelope ({ provider, model, configuration, prompt, options }) {
162
+ /** @type {import('#core/envelope.js').CallEnvelope} */
163
+ const envelope = {
164
+ callId: options.callId || newCallId(),
165
+ authId: options.authId || 'local',
166
+ auth: configToAuth(configuration),
167
+ provider,
168
+ model,
169
+ prompt: toEnvelopePrompt(prompt)
170
+ }
171
+
172
+ if (options.traceparent) envelope.traceparent = options.traceparent
173
+ if (options.baggage) envelope.baggage = options.baggage
174
+ if (options.outputBudget !== undefined) envelope.outputBudget = options.outputBudget
175
+ if (options.outputType) envelope.outputType = options.outputType
176
+ if (options.outputStyle) envelope.outputStyle = options.outputStyle
177
+ if (options.outputEffort) envelope.outputEffort = options.outputEffort
178
+ if (options.images?.length) envelope.images = options.images
179
+ if (options.videos?.length) envelope.videos = options.videos
180
+ if (options.cache !== undefined) envelope.cache = options.cache
181
+ if (options.tools?.length) envelope.tools = options.tools
182
+ if (options.toolChoice) envelope.toolChoice = options.toolChoice
183
+ if (options.parallelToolCalls === false) envelope.parallelToolCalls = false
184
+ if (options.identifier) envelope.identifier = options.identifier
185
+
186
+ // OpenRouter routing prefs ride in their own bag to keep the flat
187
+ // envelope clean. The openrouter adapter reads this via
188
+ // `config.mutateArgs`.
189
+ if (options.providerOrder || options.providerAllow || options.providerDeny) {
190
+ envelope.providerOptions = {
191
+ openrouter: {
192
+ order: options.providerOrder,
193
+ allow: options.providerAllow,
194
+ deny: options.providerDeny
195
+ }
196
+ }
197
+ }
198
+
199
+ return envelope
200
+ }
201
+
202
+ /**
203
+ * Re-throw a TypedError as a MohdelError so factory-API caller catch
204
+ * blocks — which duck-type on `.detail` / `.retryable` — keep
205
+ * working without knowing about the session event-stream shape.
206
+ *
207
+ * @param {import('#core/errors.js').TypedError} err
208
+ * @param {{provider: string, model: string, modelKey?: string}} ctx
209
+ * @returns {MohdelError}
210
+ */
211
+ function fromTypedError (err, ctx) {
212
+ // TypedError `message` is the machine-key label (e.g. "provider error
213
+ // 400"); `detail` carries the provider's own rejection text when it
214
+ // was safe to surface. Prefer the detail so callers see what to fix.
215
+ return new MohdelError(err.type || 'PROVIDER_ERROR', {
216
+ severity: toSeveritySymbol(err.severity),
217
+ detail: err.detail || err.message,
218
+ retryable: !!err.retryable,
219
+ context: { provider: ctx.provider, model: ctx.model }
220
+ })
221
+ }
222
+
223
+ /** @param {string | undefined} s */
224
+ function toSeveritySymbol (s) {
225
+ switch (s) {
226
+ case 'trace': return Severity.TRACE
227
+ case 'debug': return Severity.DEBUG
228
+ case 'info': return Severity.INFO
229
+ case 'warn': return Severity.WARN
230
+ case 'error': return Severity.ERROR
231
+ case 'fatal': return Severity.FATAL
232
+ default: return Severity.ERROR
233
+ }
234
+ }
235
+
236
+ function newCallId () {
237
+ return `m86_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`
238
+ }
239
+
240
+ /**
241
+ * Turn the factory's `configuration` bag into an envelope `auth` object.
242
+ * Accepts `apiKey` + `baseURL`; anything else is rejected with
243
+ * `CONFIGURATION_UNSUPPORTED` rather than silently dropped — otherwise
244
+ * a caller could believe their proxy / org header / timeout is in
245
+ * effect when it isn't, which in the worst case leaks auth keys and
246
+ * prompts to a provider they never intended to reach.
247
+ *
248
+ * @param {any} configuration
249
+ * @returns {import('#core/envelope.js').Auth}
250
+ */
251
+ const ALLOWED_CONFIG_KEYS = new Set(['apiKey', 'baseURL'])
252
+
253
+ function configToAuth (configuration) {
254
+ if (!configuration) return { key: '' }
255
+ const unsupported = Object.keys(configuration).filter(k => !ALLOWED_CONFIG_KEYS.has(k))
256
+ if (unsupported.length > 0) {
257
+ throw new MohdelError('CONFIGURATION_UNSUPPORTED', {
258
+ severity: Severity.ERROR,
259
+ detail:
260
+ 'per-call SDK configuration is limited to `apiKey` and `baseURL`. ' +
261
+ `Unsupported keys: ${unsupported.join(', ')}. ` +
262
+ 'Move other fields to environment variables or pin them at factory construction.',
263
+ retryable: false
264
+ })
265
+ }
266
+ const auth = { key: configuration.apiKey || '' }
267
+ if (configuration.baseURL) auth.baseURL = configuration.baseURL
268
+ return auth
269
+ }
270
+
271
+ /**
272
+ * Translate the factory's structured-input shape
273
+ * (`{ system?, messages: [{role, content, toolCalls?, toolCallId?, toolName?}] }`)
274
+ * into the envelope `Message[]` expected by session adapters. Plain
275
+ * strings and pre-shaped arrays pass through untouched.
276
+ *
277
+ * Role mapping:
278
+ * - factory `tool_result` → envelope `tool` (carrying `toolCallId`,
279
+ * `content`, and optional `name` from `toolName`).
280
+ * - `assistant.toolCalls` carries through as-is onto the envelope
281
+ * Message so adapters can emit the provider-native tool_use.
282
+ *
283
+ * @param {unknown} prompt
284
+ * @returns {string | import('#core/envelope.js').Message[]}
285
+ */
286
+ function toEnvelopePrompt (prompt) {
287
+ if (typeof prompt === 'string') return prompt
288
+ if (Array.isArray(prompt)) return /** @type {any} */(prompt)
289
+
290
+ if (prompt && typeof prompt === 'object' && Array.isArray(prompt.messages)) {
291
+ const out = []
292
+ if (prompt.system != null) {
293
+ out.push({
294
+ role: 'system',
295
+ content: flattenSystem(prompt.system)
296
+ })
297
+ }
298
+ for (const m of prompt.messages) {
299
+ if (m.role === 'tool_result') {
300
+ const msg = {
301
+ role: 'tool',
302
+ content: m.content ?? ''
303
+ }
304
+ if (m.toolCallId) msg.toolCallId = m.toolCallId
305
+ if (m.toolName) msg.toolName = m.toolName
306
+ out.push(msg)
307
+ } else if (m.role === 'assistant' && Array.isArray(m.toolCalls) && m.toolCalls.length) {
308
+ const msg = {
309
+ role: 'assistant',
310
+ content: m.content ?? '',
311
+ toolCalls: m.toolCalls.map(tc => {
312
+ const out = {
313
+ id: tc.id,
314
+ name: tc.name,
315
+ arguments: tc.arguments ?? {}
316
+ }
317
+ // Preserve provider-specific bits so a subsequent replay
318
+ // reaches the adapter intact. Gemini rejects a tool-call
319
+ // replay whose original `thoughtSignature` has been stripped.
320
+ if (tc.thoughtSignature) out.thoughtSignature = tc.thoughtSignature
321
+ return out
322
+ })
323
+ }
324
+ out.push(msg)
325
+ } else {
326
+ out.push({ role: m.role, content: m.content ?? '' })
327
+ }
328
+ }
329
+ return out
330
+ }
331
+
332
+ // Unknown shape — reject early with a clear error. Letting this
333
+ // fall through would land a raw non-iterable in the envelope and
334
+ // produce a confusing `prompt.map is not a function` deep inside
335
+ // the adapter.
336
+ throw new MohdelError('SESSION_INVALID_PROMPT', {
337
+ severity: Severity.ERROR,
338
+ retryable: false,
339
+ detail:
340
+ 'prompt must be a string, a Message[] array, ' +
341
+ 'or { system?, messages: [...] }; received ' +
342
+ describePromptShape(prompt)
343
+ })
344
+ }
345
+
346
+ /** @param {unknown} v */
347
+ function describePromptShape (v) {
348
+ if (v === null) return 'null'
349
+ if (v === undefined) return 'undefined'
350
+ if (typeof v !== 'object') return typeof v
351
+ if ('messages' in v && !Array.isArray(/** @type {any} */(v).messages)) {
352
+ return 'object with non-array messages'
353
+ }
354
+ return 'object without messages'
355
+ }
356
+
357
+ /**
358
+ * The factory accepts `system` as either a plain string or an array
359
+ * of `{text, cache?}` blocks (for Anthropic prompt caching). The
360
+ * envelope's `Message.content` is string-or-MessagePart[]; flatten
361
+ * blocks to a single string here. Callers who need Anthropic
362
+ * cache-control blocks preserved need a separate path.
363
+ *
364
+ * @param {string | Array<{text?: string}>} system
365
+ */
366
+ function flattenSystem (system) {
367
+ if (typeof system === 'string') return system
368
+ if (Array.isArray(system)) {
369
+ return system.map(b => b?.text ?? '').filter(Boolean).join('\n')
370
+ }
371
+ return String(system ?? '')
372
+ }
@@ -0,0 +1,114 @@
1
+ /**
2
+ * Consecutive-failure cooldown tracker.
3
+ *
4
+ * - Auth failures (401/403) trigger immediate cooldown (1 failure).
5
+ * - Other retryable failures require `threshold` consecutive
6
+ * failures before cooldown triggers.
7
+ * - Success resets the failure counter.
8
+ * - `check`/`throwIfCoolingDown` expires the entry when the
9
+ * cooldown window passes.
10
+ *
11
+ * Configuration comes from env vars (session-level, global):
12
+ * - MOHDEL_COOLDOWN_THRESHOLD (default 3)
13
+ * - MOHDEL_COOLDOWN_DURATION_MS (default 60_000)
14
+ *
15
+ * State is session-process-local.
16
+ *
17
+ * @module session/cooldown
18
+ */
19
+
20
+ const DEFAULT_THRESHOLD = 3
21
+ const DEFAULT_DURATION_MS = 60_000
22
+
23
+ function envInt (name, fallback) {
24
+ const v = process.env[name]
25
+ if (!v) return fallback
26
+ const n = Number.parseInt(v, 10)
27
+ return Number.isFinite(n) && n > 0 ? n : fallback
28
+ }
29
+
30
+ export function createCooldownTracker (
31
+ threshold = envInt('MOHDEL_COOLDOWN_THRESHOLD', DEFAULT_THRESHOLD),
32
+ durationMs = envInt('MOHDEL_COOLDOWN_DURATION_MS', DEFAULT_DURATION_MS)
33
+ ) {
34
+ /** @type {Map<string, {failCount: number, until: number | null, reason: string | null}>} */
35
+ const entries = new Map()
36
+
37
+ /** @param {string} key */
38
+ const check = (key) => {
39
+ const entry = entries.get(key)
40
+ if (!entry || !entry.until) return null
41
+ if (Date.now() >= entry.until) {
42
+ entries.delete(key)
43
+ return null
44
+ }
45
+ return entry
46
+ }
47
+
48
+ /**
49
+ * Records a failure. Returns `true` only if this call caused a
50
+ * **fresh** cooldown activation — no active window existed (or the
51
+ * previous one had expired). Late failures during an active window
52
+ * increment `failCount` (diagnostics) but do NOT push `until`
53
+ * forward; the cooldown is set-once, waited-out, reset on success.
54
+ * Without this freeze, concurrent failures racing past the
55
+ * pre-dispatch check would keep sliding the deadline and the user
56
+ * would effectively never recover.
57
+ *
58
+ * @param {string} key
59
+ * @param {{immediate?: boolean}} [options]
60
+ * @returns {boolean} true if this call freshly activated the cooldown
61
+ */
62
+ const recordFailure = (key, { immediate = false } = {}) => {
63
+ const entry = entries.get(key) || { failCount: 0, until: null, reason: null }
64
+ entry.failCount++
65
+ const shouldCooldown = immediate || entry.failCount >= threshold
66
+ if (shouldCooldown) {
67
+ const now = Date.now()
68
+ if (entry.until == null || now >= entry.until) {
69
+ entry.until = now + durationMs
70
+ entry.reason = immediate ? 'auth' : 'consecutive_failures'
71
+ entries.set(key, entry)
72
+ return true
73
+ }
74
+ }
75
+ entries.set(key, entry)
76
+ return false
77
+ }
78
+
79
+ /** @param {string} key */
80
+ const reset = (key) => {
81
+ entries.delete(key)
82
+ }
83
+
84
+ /**
85
+ * Returns a TypedError shape if the bucket is cooling down, else
86
+ * undefined. Caller yields it as a `call.error` event.
87
+ *
88
+ * @param {string} key
89
+ * @returns {import('#core/errors.js').TypedError | undefined}
90
+ */
91
+ const coolingDownError = (key) => {
92
+ const entry = check(key)
93
+ if (!entry) return undefined
94
+ const secsLeft = Math.ceil((entry.until - Date.now()) / 1000)
95
+ return {
96
+ message: 'provider in cooldown',
97
+ detail: `${key} is in cooldown for ${secsLeft}s after ${entry.failCount} consecutive failures (${entry.reason})`,
98
+ severity: 'warn',
99
+ retryable: true,
100
+ type: 'PROVIDER_COOLDOWN'
101
+ }
102
+ }
103
+
104
+ return { check, recordFailure, reset, coolingDownError, threshold, durationMs }
105
+ }
106
+
107
+ // Single session-local tracker. Re-exported as named members so
108
+ // callers can `import * as cooldown from './_cooldown.js'` and use
109
+ // `cooldown.reset(key)` / `cooldown.coolingDownError(key)`.
110
+ const defaultTracker = createCooldownTracker()
111
+ export const check = defaultTracker.check
112
+ export const recordFailure = defaultTracker.recordFailure
113
+ export const reset = defaultTracker.reset
114
+ export const coolingDownError = defaultTracker.coolingDownError
@@ -0,0 +1,138 @@
1
+ /**
2
+ * Session-process logger.
3
+ *
4
+ * Minimal pino-shape writer with no pino dependency. Writes one
5
+ * newline-delimited JSON object per call to **stderr** — stdout is
6
+ * reserved for the NDJSON event stream. Every line carries the
7
+ * log level, subsystem message, and any structured fields the caller
8
+ * included (including `traceId` / `spanId` for SigNoz / Jaeger /
9
+ * Honeycomb correlation).
10
+ *
11
+ * Verbosity tier (`MOHDEL_VERBOSITY`, 0/1/2) matches the LOGGING.md
12
+ * spec. Level gate (`MOHDEL_LOG_LEVEL`, one of trace/debug/info/warn/
13
+ * error/fatal/silent) is the standard severity filter applied on top
14
+ * of the tier.
15
+ *
16
+ * @module session/logger
17
+ */
18
+
19
+ const LEVELS = {
20
+ trace: 10,
21
+ debug: 20,
22
+ info: 30,
23
+ warn: 40,
24
+ error: 50,
25
+ fatal: 60,
26
+ silent: 100
27
+ }
28
+
29
+ // Real sessions want "warn" (anomalies visible) but tests would
30
+ // otherwise flood stderr with the lines we deliberately induce.
31
+ // Under vitest, default to silent unless MOHDEL_LOG_LEVEL is set
32
+ // explicitly.
33
+ const DEFAULT_LEVEL = process.env.VITEST ? 'silent' : 'warn'
34
+ const DEFAULT_VERBOSITY = 1
35
+
36
+ function envInt (name, fallback) {
37
+ const v = process.env[name]
38
+ if (!v) return fallback
39
+ const n = Number.parseInt(v, 10)
40
+ return Number.isFinite(n) ? n : fallback
41
+ }
42
+
43
+ function envLevel (name, fallback) {
44
+ const v = process.env[name]
45
+ if (!v) return fallback
46
+ const k = String(v).toLowerCase()
47
+ return Object.prototype.hasOwnProperty.call(LEVELS, k) ? k : fallback
48
+ }
49
+
50
+ /**
51
+ * @param {{level?: string, verbosity?: number, stream?: NodeJS.WritableStream, context?: object}} [opts]
52
+ */
53
+ export function createLogger (opts = {}) {
54
+ const level = opts.level ?? envLevel('MOHDEL_LOG_LEVEL', DEFAULT_LEVEL)
55
+ const verbosity = opts.verbosity ?? envInt('MOHDEL_VERBOSITY', DEFAULT_VERBOSITY)
56
+ const stream = opts.stream ?? process.stderr
57
+ const baseContext = { ...(opts.context ?? {}) }
58
+ const threshold = LEVELS[level] ?? LEVELS[DEFAULT_LEVEL]
59
+
60
+ const emit = (lvl, firstArg, msg) => {
61
+ if (LEVELS[lvl] < threshold) return
62
+ const line = { level: lvl, time: Date.now(), ...baseContext }
63
+ if (typeof firstArg === 'string') {
64
+ line.msg = firstArg
65
+ } else if (firstArg && typeof firstArg === 'object') {
66
+ // Stack traces can harbor sensitive data from some SDKs (tokens,
67
+ // request bodies passed into Error constructors). Keep them only
68
+ // at the verbose levels (trace/debug), which are off by default
69
+ // in production.
70
+ mergeFields(line, firstArg, LEVELS[lvl] <= LEVELS.debug)
71
+ if (msg !== undefined) line.msg = msg
72
+ }
73
+ try {
74
+ stream.write(JSON.stringify(line) + '\n')
75
+ } catch {
76
+ // swallow: stderr write failures must not take down the session
77
+ }
78
+ }
79
+
80
+ const trace = (fields, msg) => emit('trace', fields, msg)
81
+ const debug = (fields, msg) => emit('debug', fields, msg)
82
+ const info = (fields, msg) => emit('info', fields, msg)
83
+ const warn = (fields, msg) => emit('warn', fields, msg)
84
+ const error = (fields, msg) => emit('error', fields, msg)
85
+ const fatal = (fields, msg) => emit('fatal', fields, msg)
86
+
87
+ /**
88
+ * @param {object} extra
89
+ */
90
+ const withContext = (extra) => createLogger({
91
+ level,
92
+ verbosity,
93
+ stream,
94
+ context: { ...baseContext, ...extra }
95
+ })
96
+
97
+ return Object.freeze({
98
+ level,
99
+ verbosity,
100
+ trace,
101
+ debug,
102
+ info,
103
+ warn,
104
+ error,
105
+ fatal,
106
+ withContext
107
+ })
108
+ }
109
+
110
+ /**
111
+ * Merge structured log fields. Errors are flattened to a stable shape
112
+ * so `JSON.stringify` doesn't drop the message.
113
+ *
114
+ * @param {object} target
115
+ * @param {object} source
116
+ * @param {boolean} includeStack
117
+ */
118
+ function mergeFields (target, source, includeStack) {
119
+ for (const [k, v] of Object.entries(source)) {
120
+ if (v instanceof Error) {
121
+ target[k] = {
122
+ message: v.message,
123
+ name: v.name,
124
+ ...(includeStack && v.stack ? { stack: v.stack } : {}),
125
+ ...(v.code ? { code: v.code } : {}),
126
+ ...(v.status ? { status: v.status } : {})
127
+ }
128
+ } else {
129
+ target[k] = v
130
+ }
131
+ }
132
+ }
133
+
134
+ // Module-level default — used when a caller doesn't pass their own.
135
+ // Keeping the stream open lets adapters import the singleton
136
+ // directly; `withContext` spawns scoped children per call without
137
+ // allocating a new writer.
138
+ export const logger = createLogger()
@@ -0,0 +1,77 @@
1
+ /**
2
+ * Minute-bucket rate limiter (per-key: provider or provider/model).
3
+ *
4
+ * Tracks RPM and TPM. Returns ms to wait if over limit — throttles
5
+ * rather than rejecting, so the caller can absorb small bursts
6
+ * without a 429 round-trip.
7
+ *
8
+ * State is session-process-local. In a pool, each session has its
9
+ * own counters — total throughput ~= pool_size × limit. For strict
10
+ * cross-process enforcement, configure it at thin-gate level via
11
+ * the `QuotaPolicy` hook.
12
+ *
13
+ * @module session/rate-limiter
14
+ */
15
+
16
+ export function createRateLimiter () {
17
+ /** @type {Map<string, {count: number, tokens: number, minute: number}>} */
18
+ const buckets = new Map()
19
+
20
+ const currentMinute = () => Math.floor(Date.now() / 60000)
21
+
22
+ /** @param {string} key */
23
+ const getBucket = (key) => {
24
+ const minute = currentMinute()
25
+ const b = buckets.get(key)
26
+ if (b && b.minute === minute) return b
27
+ const fresh = { count: 0, tokens: 0, minute }
28
+ buckets.set(key, fresh)
29
+ return fresh
30
+ }
31
+
32
+ /** @param {number} minute */
33
+ const msUntilNextMinute = (minute) => Math.max(0, (minute + 1) * 60000 - Date.now())
34
+
35
+ /**
36
+ * Returns ms to wait before sending. 0 means go ahead.
37
+ *
38
+ * Semantics:
39
+ * - `undefined` / `null` on a dimension → no limit configured,
40
+ * skipped.
41
+ * - `0` → **deny all** (killswitch). `msUntilNextMinute` is
42
+ * returned regardless of the current bucket.
43
+ * - positive number → throttle at that value.
44
+ *
45
+ * @param {string} key
46
+ * @param {{rpmLimit?: number, tpmLimit?: number}} limits
47
+ * @returns {number}
48
+ */
49
+ const check = (key, { rpmLimit, tpmLimit } = {}) => {
50
+ if (rpmLimit == null && tpmLimit == null) return 0
51
+ const b = getBucket(key)
52
+ if (rpmLimit != null && b.count >= rpmLimit) return msUntilNextMinute(b.minute)
53
+ if (tpmLimit != null && b.tokens >= tpmLimit) return msUntilNextMinute(b.minute)
54
+ return 0
55
+ }
56
+
57
+ /** @param {string} key */
58
+ const recordRequest = (key) => {
59
+ getBucket(key).count++
60
+ }
61
+
62
+ /**
63
+ * @param {string} key
64
+ * @param {number} tokens
65
+ */
66
+ const recordTokens = (key, tokens) => {
67
+ getBucket(key).tokens += tokens
68
+ }
69
+
70
+ return { check, recordRequest, recordTokens }
71
+ }
72
+
73
+ // Single session-local instance.
74
+ const defaultLimiter = createRateLimiter()
75
+ export const check = defaultLimiter.check
76
+ export const recordRequest = defaultLimiter.recordRequest
77
+ export const recordTokens = defaultLimiter.recordTokens