mohdel 0.90.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +377 -0
  3. package/config/benchmarks.json +39 -0
  4. package/js/client/call.js +75 -0
  5. package/js/client/call_image.js +82 -0
  6. package/js/client/gate-binary.js +72 -0
  7. package/js/client/index.js +16 -0
  8. package/js/client/ndjson.js +29 -0
  9. package/js/client/transport.js +48 -0
  10. package/js/core/envelope.js +141 -0
  11. package/js/core/errors.js +75 -0
  12. package/js/core/events.js +96 -0
  13. package/js/core/image.js +58 -0
  14. package/js/core/index.js +10 -0
  15. package/js/core/status.js +48 -0
  16. package/js/factory/bridge.js +372 -0
  17. package/js/session/_cooldown.js +114 -0
  18. package/js/session/_logger.js +138 -0
  19. package/js/session/_rate_limiter.js +77 -0
  20. package/js/session/_tracing.js +58 -0
  21. package/js/session/adapters/_cancelled.js +44 -0
  22. package/js/session/adapters/_catalog.js +58 -0
  23. package/js/session/adapters/_chat_completions.js +439 -0
  24. package/js/session/adapters/_errors.js +85 -0
  25. package/js/session/adapters/_images.js +60 -0
  26. package/js/session/adapters/_lazy_json_cache.js +76 -0
  27. package/js/session/adapters/_pricing.js +67 -0
  28. package/js/session/adapters/_providers.js +60 -0
  29. package/js/session/adapters/_tools.js +185 -0
  30. package/js/session/adapters/_videos.js +283 -0
  31. package/js/session/adapters/anthropic.js +397 -0
  32. package/js/session/adapters/cerebras.js +28 -0
  33. package/js/session/adapters/deepseek.js +32 -0
  34. package/js/session/adapters/echo.js +51 -0
  35. package/js/session/adapters/fake.js +262 -0
  36. package/js/session/adapters/fireworks.js +46 -0
  37. package/js/session/adapters/gemini.js +381 -0
  38. package/js/session/adapters/groq.js +23 -0
  39. package/js/session/adapters/image/fake.js +55 -0
  40. package/js/session/adapters/image/index.js +40 -0
  41. package/js/session/adapters/image/novita.js +135 -0
  42. package/js/session/adapters/image/openai.js +50 -0
  43. package/js/session/adapters/index.js +53 -0
  44. package/js/session/adapters/mistral.js +31 -0
  45. package/js/session/adapters/novita.js +29 -0
  46. package/js/session/adapters/openai.js +381 -0
  47. package/js/session/adapters/openrouter.js +66 -0
  48. package/js/session/adapters/xai.js +27 -0
  49. package/js/session/bin.js +54 -0
  50. package/js/session/driver.js +160 -0
  51. package/js/session/index.js +18 -0
  52. package/js/session/run.js +393 -0
  53. package/js/session/run_image.js +61 -0
  54. package/package.json +107 -0
  55. package/src/cli/ask.js +160 -0
  56. package/src/cli/backup.js +107 -0
  57. package/src/cli/bench.js +262 -0
  58. package/src/cli/check.js +123 -0
  59. package/src/cli/colored-logger.js +67 -0
  60. package/src/cli/colors.js +13 -0
  61. package/src/cli/default.js +39 -0
  62. package/src/cli/index.js +150 -0
  63. package/src/cli/json-output.js +60 -0
  64. package/src/cli/model.js +571 -0
  65. package/src/cli/onboard.js +232 -0
  66. package/src/cli/rank.js +176 -0
  67. package/src/cli/ratelimit.js +160 -0
  68. package/src/cli/tag.js +105 -0
  69. package/src/lib/assets/alibaba.svg +1 -0
  70. package/src/lib/assets/anthropic.svg +5 -0
  71. package/src/lib/assets/deepseek.svg +1 -0
  72. package/src/lib/assets/gemini.svg +1 -0
  73. package/src/lib/assets/google.svg +2 -0
  74. package/src/lib/assets/kwaipilot.svg +1 -0
  75. package/src/lib/assets/meta.svg +1 -0
  76. package/src/lib/assets/minimax.svg +9 -0
  77. package/src/lib/assets/moonshotai.svg +4 -0
  78. package/src/lib/assets/openai.svg +5 -0
  79. package/src/lib/assets/xai.svg +1 -0
  80. package/src/lib/assets/xiaomi.svg +2 -0
  81. package/src/lib/assets/zai.svg +219 -0
  82. package/src/lib/benchmark-score.js +215 -0
  83. package/src/lib/benchmark-truth.js +68 -0
  84. package/src/lib/cache.js +76 -0
  85. package/src/lib/common.js +208 -0
  86. package/src/lib/cooldown.js +63 -0
  87. package/src/lib/creators.js +71 -0
  88. package/src/lib/curated-cache.js +146 -0
  89. package/src/lib/errors.js +126 -0
  90. package/src/lib/index.js +726 -0
  91. package/src/lib/logger.js +29 -0
  92. package/src/lib/providers.js +87 -0
  93. package/src/lib/rank.js +390 -0
  94. package/src/lib/rate-limiter.js +50 -0
  95. package/src/lib/schema.js +150 -0
  96. package/src/lib/select.js +474 -0
  97. package/src/lib/tracing.js +62 -0
  98. package/src/lib/utils.js +85 -0
@@ -0,0 +1,726 @@
1
+ // TODO: split into factory.js / model-proxy.js / answer-call.js.
2
+ // This file is the default-export surface (`import mohdel from 'mohdel'`);
3
+ // any split must preserve that resolution to a factory function.
4
+ // Behaviour-preserving refactor only.
5
+
6
+ import providers from './providers.js'
7
+ import { getAPIKey, loadDefaultEnv, getProvidersConfig, saveProvidersConfig, setLogger as setCommonLogger } from './common.js'
8
+ import {
9
+ loadCuratedCache,
10
+ getCuratedCacheSnapshot,
11
+ expandModelAliasSync,
12
+ suggestModels,
13
+ persistCuratedCache
14
+ } from './curated-cache.js'
15
+ import { createRateLimiter } from '../../js/session/_rate_limiter.js'
16
+ import { createCooldownTracker } from '../../js/session/_cooldown.js'
17
+ import { runAnswer, runAnswerImage } from '../../js/factory/bridge.js'
18
+ import { startSpan, endSpanOk, endSpanError } from './tracing.js'
19
+ import { isValidTag } from './schema.js'
20
+ import { silent } from './logger.js'
21
+ import { createRequire } from 'node:module'
22
+
23
+ export const version = createRequire(import.meta.url)('../../package.json').version
24
+
25
+ const noop = () => {}
26
+
27
+ // Verbosity tiers — controls which mohdel internal log lines fire.
28
+ //
29
+ // 0 Anomaly-only. Failures, throttling, deprecation, server lifecycle.
30
+ // Recommended for production where you want to see only what needs attention.
31
+ // 1 Default. Adds per-call `start` (debug), `done` (debug), and a basic
32
+ // `result` envelope (trace). Useful in dev to see every model call without
33
+ // drowning in payload detail.
34
+ // 2 Verbose. Adds request preview (trace), tool call expansion (trace),
35
+ // and full output preview (trace). Use when debugging mohdel internals
36
+ // or specific model behavior.
37
+ //
38
+ // Selection precedence: factory opt `verbosity` > env `MOHDEL_VERBOSITY` > default 1.
39
+ // Captured once at factory init; restart your process to change at runtime.
40
+ const VERBOSITY_DEFAULT = 1
41
+ const VERBOSITY_MIN = 0
42
+ const VERBOSITY_MAX = 2
43
+
44
+ const parseVerbosity = (raw) => {
45
+ if (raw == null || raw === '') return VERBOSITY_DEFAULT
46
+ const v = typeof raw === 'number' ? raw : parseInt(raw, 10)
47
+ if (!Number.isFinite(v) || v < VERBOSITY_MIN) return VERBOSITY_DEFAULT
48
+ if (v > VERBOSITY_MAX) return VERBOSITY_MAX
49
+ return v
50
+ }
51
+
52
+ // @internal — exported under an underscore-prefixed alias for unit tests only.
53
+ // Not part of the public package surface.
54
+ export { parseVerbosity as _parseVerbosityForTests }
55
+
56
+ const TOOL_SDKS = new Set(['anthropic', 'openai', 'gemini', 'cerebras', 'fireworks', 'openrouter'])
57
+
58
+ const resolvePrice = (price, inputTokens) => {
59
+ if (typeof price === 'number') return price
60
+ if (price == null || typeof price !== 'object') return 0
61
+ let resolved = price.default ?? 0
62
+ let highestThreshold = -1
63
+ for (const key of Object.keys(price)) {
64
+ if (key.startsWith('>')) {
65
+ const threshold = parseInt(key.slice(1), 10)
66
+ if (inputTokens > threshold && threshold > highestThreshold) {
67
+ highestThreshold = threshold
68
+ resolved = price[key]
69
+ }
70
+ }
71
+ }
72
+ return resolved
73
+ }
74
+
75
+ // @internal — exported for unit tests only.
76
+ export { resolvePrice as _resolvePriceForTests }
77
+
78
+ const normalizeModelSpec = (resolvedModelId, modelSpec, providerConfig) => {
79
+ const normalized = { ...modelSpec }
80
+ if (!normalized.provider) {
81
+ normalized.provider = resolvedModelId.split('/')[0]
82
+ }
83
+ if (!normalized.sdk) {
84
+ normalized.sdk = providerConfig.sdk
85
+ }
86
+ if (!normalized.api && providerConfig.api) {
87
+ normalized.api = providerConfig.api
88
+ }
89
+ if (normalized.supportsTools === undefined) {
90
+ normalized.supportsTools = TOOL_SDKS.has(normalized.sdk)
91
+ }
92
+ return normalized
93
+ }
94
+
95
+ const createFallbackModelSpec = (resolvedModelId) => {
96
+ const [providerName, ...modelParts] = resolvedModelId.split('/')
97
+ if (!providerName || modelParts.length === 0) return null
98
+
99
+ const providerConfig = providers[providerName]
100
+ if (!providerConfig?.createFallbackModelSpec) return null
101
+
102
+ const fallback = providerConfig.createFallbackModelSpec(resolvedModelId, modelParts.join('/'))
103
+ if (!fallback) return null
104
+
105
+ return normalizeModelSpec(resolvedModelId, fallback, providerConfig)
106
+ }
107
+
108
+ const resolveProviderConfiguration = async (provider, providerName) => {
109
+ if (provider.resolveConfiguration) {
110
+ return provider.resolveConfiguration()
111
+ }
112
+
113
+ if (provider.apiKeyEnv) {
114
+ const apiKey = getAPIKey(provider.apiKeyEnv)
115
+ if (!apiKey) {
116
+ throw new Error(`API key not found for ${providerName}. Set ${provider.apiKeyEnv} environment variable.`)
117
+ }
118
+ return provider.createConfiguration(apiKey)
119
+ }
120
+
121
+ if (provider.createConfiguration) {
122
+ return provider.createConfiguration()
123
+ }
124
+
125
+ return {}
126
+ }
127
+
128
+ // Build the handlers bag from a caller-provided logger.
129
+ //
130
+ // The logger is the single source of log routing. Each level becomes a closure
131
+ // that captures the original `logger` reference and forwards calls through it.
132
+ // The closure preserves `this` binding so stateful loggers (pino, winston,
133
+ // bunyan, or any class-based logger that stores instance state via `this` —
134
+ // pino uses Symbol-keyed properties like `Symbol(pino.msgPrefix)`) work
135
+ // correctly even after the methods are stored on a different object.
136
+ //
137
+ // Canonical logger shape — the minimum interface mohdel calls into:
138
+ //
139
+ // {
140
+ // trace: (firstArg, ...rest) => void,
141
+ // debug: (firstArg, ...rest) => void,
142
+ // info: (firstArg, ...rest) => void,
143
+ // warn: (firstArg, ...rest) => void,
144
+ // error: (firstArg, ...rest) => void,
145
+ // fatal: (firstArg, ...rest) => void
146
+ // }
147
+ //
148
+ // `firstArg` may be a string (the message) or an object (structured fields like
149
+ // `{ span }` or `{ span, err }`) followed by a format string in `rest`. This
150
+ // matches pino's call convention; other loggers can adapt with a thin wrapper.
151
+ //
152
+ // If a method is missing from the logger (partial logger), that level is silent
153
+ // — never console-logged. Default when no logger is provided is `silent` from
154
+ // `./logger.js`. Pass `silent` explicitly to be intentional about wanting no output.
155
+ //
156
+ // `onSuccess` and `onFailure` are independent of the logger and pass through
157
+ // unchanged.
158
+ const buildHandlers = ({ logger, onSuccess, onFailure }) => {
159
+ const log = logger || silent
160
+ return {
161
+ trace: typeof log.trace === 'function' ? (...args) => log.trace(...args) : noop,
162
+ debug: typeof log.debug === 'function' ? (...args) => log.debug(...args) : noop,
163
+ info: typeof log.info === 'function' ? (...args) => log.info(...args) : noop,
164
+ warn: typeof log.warn === 'function' ? (...args) => log.warn(...args) : noop,
165
+ error: typeof log.error === 'function' ? (...args) => log.error(...args) : noop,
166
+ fatal: typeof log.fatal === 'function' ? (...args) => log.fatal(...args) : noop,
167
+ onSuccess,
168
+ onFailure
169
+ }
170
+ }
171
+
172
+ // @internal — exported under an underscore-prefixed alias for unit tests only.
173
+ // Not part of the public package surface.
174
+ export { buildHandlers as _buildHandlersForTests }
175
+
176
+ /**
177
+ * mohdel factory.
178
+ *
179
+ * @param {object} [opts]
180
+ * @param {object} [opts.logger] — logger object with `{trace, debug, info, warn, error, fatal}`
181
+ * methods. Defaults to `silent` from './logger.js'. See the
182
+ * `buildHandlers` comment above for the canonical shape and how
183
+ * stateful loggers (pino, winston) are supported via
184
+ * `this`-preserving closures.
185
+ * @param {number} [opts.verbosity] — internal log verbosity tier (0/1/2). Defaults to env
186
+ * `MOHDEL_VERBOSITY` if set, otherwise 1. See the verbosity
187
+ * comment near `parseVerbosity` for what each tier emits.
188
+ * @param {Function} [opts.onSuccess] — fired after every successful answer call
189
+ * @param {Function} [opts.onFailure] — fired after every failed answer call
190
+ * @param {number} [opts.cooldownThreshold=3] — consecutive provider failures before cooldown
191
+ * @param {number} [opts.cooldownDuration=60000] — cooldown duration in ms
192
+ * @param {object} [opts.models] — model catalog (library mode — skips disk init)
193
+ * @param {object} [opts.configurations] — provider configurations (library mode)
194
+ */
195
+ const mohdel = async ({ logger, verbosity: verbosityOpt, onSuccess, onFailure, cooldownThreshold, cooldownDuration, models, configurations } = {}) => {
196
+ // When consumer provides models + configurations, skip disk-based init (library mode).
197
+ // Otherwise load from ~/.config/mohdel/ (CLI / standalone mode).
198
+ const libraryMode = !!(models && configurations)
199
+
200
+ if (!libraryMode) {
201
+ loadDefaultEnv()
202
+ await loadCuratedCache()
203
+ }
204
+
205
+ // Provider config: user-specific rate limits per provider (~/.config/mohdel/providers.json)
206
+ const providersConfig = libraryMode ? {} : await getProvidersConfig()
207
+
208
+ // Resolve verbosity tier once at init. Factory opt > env var > default.
209
+ // Captured in the answer() closure below to gate per-call log lines.
210
+ const verbosity = parseVerbosity(verbosityOpt ?? process.env.MOHDEL_VERBOSITY)
211
+
212
+ const handlers = buildHandlers({ logger, onSuccess, onFailure })
213
+ // Expose verbosity so SDK files (anthropic.js, openai.js, etc.) and any
214
+ // downstream consumers reading from `handlers` can gate their own logs.
215
+ handlers.verbosity = verbosity
216
+
217
+ // SDK instance cache: providerName → Promise<{ provider, configuration, sdk, specs }>
218
+ // Models from the same provider share one SDK client instance
219
+ const sdkCache = new Map()
220
+
221
+ // Rate limiter: throttles requests to stay within provider/model RPM and TPM limits
222
+ const rateLimiter = createRateLimiter()
223
+
224
+ // Cooldown: fast-fail after consecutive provider failures. 0.90
225
+ // tracker is injected into the session `run()` by the bridge.
226
+ const cooldown = createCooldownTracker(cooldownThreshold, cooldownDuration)
227
+
228
+ // Resolve per-provider rate limits from the factory's local
229
+ // providersConfig — keeps `setProviderRateLimit` / etc. working
230
+ // as the source of truth inside this factory instance.
231
+ const resolveProviderLimits = (provider) => providersConfig[provider]
232
+
233
+ // Wire common.js module-level logger so file I/O errors route to consumer
234
+ setCommonLogger(handlers)
235
+
236
+ return new Proxy({}, {
237
+ get: (target, prop) => {
238
+ if (prop === 'list') {
239
+ return (tag) => { // Sync
240
+ const catalog = libraryMode ? models : getCuratedCacheSnapshot()
241
+ let modelEntries = Object.entries(catalog)
242
+ .filter(([, metadata]) => !metadata.deprecated)
243
+
244
+ if (tag) {
245
+ modelEntries = modelEntries.filter(([/* value */, metadata]) =>
246
+ metadata.tags && Array.isArray(metadata.tags) && metadata.tags.includes(tag)
247
+ )
248
+ }
249
+
250
+ return modelEntries.map(([value, metadata]) => ({
251
+ value,
252
+ label: metadata.label || metadata.displayName || metadata.model || value.split('/').pop()
253
+ }))
254
+ }
255
+ }
256
+
257
+ if (prop === 'use') {
258
+ return (modelId) => { // Sync
259
+ const catalog = libraryMode ? models : getCuratedCacheSnapshot()
260
+
261
+ // Parse optional :outputEffort suffix (e.g. "claude-opus:max").
262
+ // Effort levels vary per spec (`thinkingEffortLevels` keys —
263
+ // Anthropic Opus adds `minimal`/`max`; future providers may
264
+ // add their own), so we can't hardcode a keyword list.
265
+ // Instead: tentatively split, look up `base`. If that
266
+ // resolves to a spec, treat the suffix as an effort
267
+ // candidate and defer validation to the spec-aware check
268
+ // below. If `base` doesn't resolve, fall through — the full
269
+ // `modelId` (with colon) gets the normal lookup + "not
270
+ // found" error path.
271
+ let aliasOutputEffort
272
+ const colonIdx = modelId.lastIndexOf(':')
273
+ if (colonIdx > 0) {
274
+ const candidate = modelId.slice(colonIdx + 1)
275
+ const base = modelId.slice(0, colonIdx)
276
+ const baseResolved = libraryMode ? base : expandModelAliasSync(base)
277
+ const baseSpec = catalog[baseResolved] || createFallbackModelSpec(baseResolved)
278
+ if (baseSpec) {
279
+ aliasOutputEffort = candidate
280
+ modelId = base
281
+ }
282
+ }
283
+
284
+ let resolvedModelId = libraryMode ? modelId : expandModelAliasSync(modelId)
285
+ let modelSpec = catalog[resolvedModelId]
286
+
287
+ if (!modelSpec) {
288
+ modelSpec = createFallbackModelSpec(resolvedModelId)
289
+ if (!modelSpec) {
290
+ if (libraryMode) {
291
+ throw new Error(`Model '${modelId}' not found in provided models.`)
292
+ }
293
+ const suggestions = suggestModels(modelId)
294
+ let msg = `Model '${modelId}' not found in curated models.`
295
+ if (suggestions.length) {
296
+ msg += ' Did you mean?\n' + suggestions.map(s => ` ${s.id} ${s.label}`).join('\n')
297
+ }
298
+ throw new Error(msg)
299
+ }
300
+ }
301
+
302
+ // Handle deprecated models — follow chain up to 5 levels
303
+ const seen = new Set()
304
+ while (modelSpec.deprecated) {
305
+ if (seen.has(resolvedModelId)) {
306
+ throw new Error(`Circular deprecation chain detected at '${resolvedModelId}'.`)
307
+ }
308
+ seen.add(resolvedModelId)
309
+ const replacement = modelSpec.deprecated
310
+ handlers.warn(`[mohdel:catalog] model '${resolvedModelId}' is deprecated, using '${replacement}' instead`)
311
+ const replacementSpec = catalog[replacement]
312
+ if (!replacementSpec) {
313
+ throw new Error(`Model '${resolvedModelId}' is deprecated (replacement: '${replacement}'), but replacement not found.`)
314
+ }
315
+ resolvedModelId = replacement
316
+ modelSpec = replacementSpec
317
+ }
318
+
319
+ // Ensure provider is part of modelSpec, critical for SDK operations.
320
+ // sync.js should populate this, but as a safeguard:
321
+ if (!modelSpec.provider) {
322
+ modelSpec.provider = resolvedModelId.split('/')[0]
323
+ }
324
+ const providerConfig = providers[modelSpec.provider]
325
+ if (!providerConfig) {
326
+ throw new Error(`Provider configuration for '${modelSpec.provider}' not found while preparing model '${resolvedModelId}'.`)
327
+ }
328
+ modelSpec = normalizeModelSpec(resolvedModelId, modelSpec, providerConfig)
329
+
330
+ // Validate outputEffort alias against model capabilities
331
+ if (aliasOutputEffort) {
332
+ if (!modelSpec.thinkingEffortLevels) {
333
+ throw new Error(`Model '${resolvedModelId}' does not support output effort (no thinkingEffortLevels). Cannot use ':${aliasOutputEffort}' suffix.`)
334
+ }
335
+ if (aliasOutputEffort !== 'none' && !modelSpec.thinkingEffortLevels[aliasOutputEffort]) {
336
+ throw new Error(`Model '${resolvedModelId}' does not support output effort level '${aliasOutputEffort}'. Available: ${Object.keys(modelSpec.thinkingEffortLevels).join(', ')}`)
337
+ }
338
+ }
339
+
340
+ return createModelProxy(resolvedModelId, modelSpec, handlers, aliasOutputEffort, sdkCache, rateLimiter, providersConfig, cooldown, configurations, resolveProviderLimits)
341
+ }
342
+ }
343
+
344
+ if (prop === 'getProviderRateLimit') {
345
+ return (providerName) => {
346
+ const entry = providersConfig[providerName]
347
+ if (!entry) return null
348
+ const { rpmLimit, tpmLimit } = entry
349
+ return (rpmLimit || tpmLimit) ? { rpmLimit, tpmLimit } : null
350
+ }
351
+ }
352
+
353
+ if (prop === 'setProviderRateLimit') {
354
+ return async (providerName, { rpm, tpm } = {}) => {
355
+ const entry = providersConfig[providerName] || (providersConfig[providerName] = {})
356
+ if (rpm != null) entry.rpmLimit = rpm
357
+ if (tpm != null) entry.tpmLimit = tpm
358
+ await saveProvidersConfig(providersConfig)
359
+ return entry
360
+ }
361
+ }
362
+
363
+ if (prop === 'clearProviderRateLimit') {
364
+ return async (providerName) => {
365
+ const entry = providersConfig[providerName]
366
+ if (entry) {
367
+ delete entry.rpmLimit
368
+ delete entry.tpmLimit
369
+ if (Object.keys(entry).length === 0) delete providersConfig[providerName]
370
+ await saveProvidersConfig(providersConfig)
371
+ }
372
+ }
373
+ }
374
+
375
+ if (prop === 'close') {
376
+ return () => {
377
+ sdkCache.clear()
378
+ }
379
+ }
380
+
381
+ return target[prop]
382
+ }
383
+ })
384
+ }
385
+
386
+ const createModelProxy = (resolvedModelId, modelSpec, handlers, aliasOutputEffort, sdkCache, rateLimiter, providersConfig, cooldown, externalConfigurations, resolveProviderLimits) => {
387
+ // modelSpec is the full metadata object for resolvedModelId
388
+ let runtimePromise = null
389
+
390
+ const getRuntime = async () => {
391
+ if (runtimePromise) return runtimePromise
392
+
393
+ runtimePromise = (async () => {
394
+ const providerName = modelSpec.provider
395
+ const provider = providers[providerName]
396
+ if (!provider) {
397
+ throw new Error(`Provider ${providerName} not supported for model ${resolvedModelId}`)
398
+ }
399
+
400
+ // Reuse cached configuration for this provider
401
+ if (sdkCache.has(providerName)) {
402
+ const cached = await sdkCache.get(providerName)
403
+ cached.specs[resolvedModelId] = modelSpec
404
+ return cached
405
+ }
406
+
407
+ // Resolve configuration and cache the promise to handle
408
+ // concurrent use() calls.
409
+ const initPromise = (async () => {
410
+ const configuration = externalConfigurations?.[providerName] ?? await resolveProviderConfiguration(provider, providerName)
411
+ const specs = { [resolvedModelId]: modelSpec }
412
+ return { provider, configuration, specs }
413
+ })()
414
+
415
+ sdkCache.set(providerName, initPromise)
416
+
417
+ try {
418
+ return await initPromise
419
+ } catch (err) {
420
+ sdkCache.delete(providerName)
421
+ throw err
422
+ }
423
+ })().catch(err => {
424
+ runtimePromise = null
425
+ throw err
426
+ })
427
+
428
+ return runtimePromise
429
+ }
430
+
431
+ return new Proxy({}, {
432
+ get: (target, prop) => {
433
+ if (prop === 'id') {
434
+ return resolvedModelId
435
+ }
436
+ if (prop === 'label') {
437
+ return modelSpec.label || modelSpec.displayName || resolvedModelId.split('/').pop()
438
+ }
439
+ if (prop === 'supportsTools') {
440
+ return !!modelSpec.supportsTools
441
+ }
442
+
443
+ if (prop === 'answer') {
444
+ return async (prompt, options = {}) => {
445
+ if (aliasOutputEffort && !options.outputEffort) {
446
+ options.outputEffort = aliasOutputEffort
447
+ }
448
+
449
+ // Verbosity tier — captured from handlers (set by the mohdel factory). Used
450
+ // to gate per-call log lines. Read once per call so the closure doesn't have
451
+ // to dereference handlers.verbosity at each gate.
452
+ const verbosity = handlers.verbosity ?? 1
453
+
454
+ // OTEL span: child of caller's span (engine inference → mohdel answer)
455
+ const { parentSpan, ...sdkOptions } = options
456
+ const spanAttrs = {
457
+ 'gen_ai.request.model': resolvedModelId,
458
+ 'gen_ai.system': modelSpec.provider
459
+ }
460
+ if (sdkOptions.outputBudget) spanAttrs['gen_ai.request.max_tokens'] = sdkOptions.outputBudget
461
+ if (sdkOptions.outputEffort) spanAttrs['mohdel.output_effort'] = sdkOptions.outputEffort
462
+ const span = startSpan('mohdel.answer', spanAttrs, parentSpan)
463
+
464
+ // Dev visibility: log call entry at debug level. The OTel span carries the
465
+ // same dimensions but is invisible without a configured exporter; this line
466
+ // lets developers see per-call activity directly in their pino output.
467
+ // Gated at verbosity >= 1 (default).
468
+ const startedAt = Date.now()
469
+ if (verbosity >= 1) {
470
+ handlers.debug({ span, provider: modelSpec.provider, model: resolvedModelId, effort: sdkOptions.outputEffort || 'default', budget: sdkOptions.outputBudget || '?', tools: sdkOptions.tools?.length || 0, images: sdkOptions.images?.length || 0 }, '[mohdel:answer] start')
471
+ }
472
+
473
+ // Verbose-only request preview at trace level (verbosity >= 2). Compact
474
+ // summary of what's being sent: input shape, system block presence,
475
+ // message count, last user message preview, tool list.
476
+ // Wrapped in try/catch because prompts have wildly varying shapes
477
+ // (string vs structured vs multimodal arrays).
478
+ if (verbosity >= 2) {
479
+ try {
480
+ const reqMeta = {}
481
+ if (typeof prompt === 'string') {
482
+ reqMeta.input = prompt.slice(0, 500)
483
+ reqMeta.inputLength = prompt.length
484
+ } else if (prompt && typeof prompt === 'object') {
485
+ if (prompt.system) {
486
+ const sysText = Array.isArray(prompt.system)
487
+ ? prompt.system.map(s => s?.text || '').join('\n')
488
+ : String(prompt.system)
489
+ reqMeta.systemPreview = sysText.slice(0, 500)
490
+ reqMeta.systemLength = sysText.length
491
+ }
492
+ if (Array.isArray(prompt.messages)) {
493
+ reqMeta.messageCount = prompt.messages.length
494
+ const last = prompt.messages[prompt.messages.length - 1]
495
+ if (last) {
496
+ reqMeta.lastRole = last.role
497
+ reqMeta.lastContentPreview = typeof last.content === 'string'
498
+ ? last.content.slice(0, 300)
499
+ : '[multimodal]'
500
+ }
501
+ }
502
+ }
503
+ if (sdkOptions.tools?.length) reqMeta.toolNames = sdkOptions.tools.map(t => t.name)
504
+ if (sdkOptions.images?.length) reqMeta.images = sdkOptions.images.length
505
+ handlers.trace({ span, ...reqMeta }, '[mohdel:answer] request')
506
+ } catch (previewErr) {
507
+ handlers.trace({ span, err: previewErr }, '[mohdel:answer] request preview unavailable')
508
+ }
509
+ }
510
+
511
+ try {
512
+ // 0.90 session handles cooldown + rate-limit + cost inline;
513
+ // factory-owned trackers are threaded in so
514
+ // `setProviderRateLimit` / factory-scoped cooldown settings
515
+ // remain the source of truth for this factory instance.
516
+ const { configuration: defaultConfiguration } = await getRuntime()
517
+ const effectiveConfiguration = sdkOptions.configuration || defaultConfiguration
518
+ delete sdkOptions.configuration
519
+
520
+ const result = await runAnswer({
521
+ provider: modelSpec.provider,
522
+ model: modelSpec.model ?? resolvedModelId.split('/').pop(),
523
+ modelKey: resolvedModelId,
524
+ configuration: effectiveConfiguration,
525
+ prompt,
526
+ options: sdkOptions
527
+ }, { cooldown, limiter: rateLimiter, resolveProviderLimits })
528
+
529
+ // End span with result attributes
530
+ const endAttrs = {
531
+ 'gen_ai.usage.input_tokens': result.inputTokens || 0,
532
+ 'gen_ai.usage.output_tokens': result.outputTokens || 0,
533
+ 'mohdel.thinking_tokens': result.thinkingTokens || 0,
534
+ 'mohdel.status': result.status
535
+ }
536
+ if (result.cost != null) endAttrs['mohdel.cost'] = result.cost
537
+ if (result.warning) endAttrs['mohdel.warning'] = result.warning
538
+ if (result.timestamps) {
539
+ const start = BigInt(result.timestamps.start)
540
+ const first = BigInt(result.timestamps.first)
541
+ if (first > start) {
542
+ endAttrs['mohdel.time_to_first_token_ms'] = Number(first - start) / 1e6
543
+ }
544
+ }
545
+ endSpanOk(span, endAttrs)
546
+
547
+ // Dev visibility: per-call summary at debug level. Mirrors what the OTel
548
+ // span captures so developers see the same data in their pino stream
549
+ // without needing an exporter. Cost is formatted in dollars to 5 decimals
550
+ // (per-call cost is typically sub-cent for short prompts).
551
+ // Gated at verbosity >= 1 (default).
552
+ const totalMs = Date.now() - startedAt
553
+ const ttftMs = endAttrs['mohdel.time_to_first_token_ms']
554
+ if (verbosity >= 1) {
555
+ handlers.debug({
556
+ span,
557
+ status: result.status,
558
+ in: result.inputTokens || 0,
559
+ out: result.outputTokens || 0,
560
+ think: result.thinkingTokens || 0,
561
+ cost: result.cost != null ? result.cost : undefined,
562
+ ttf: ttftMs != null ? Math.round(ttftMs) : undefined,
563
+ total: totalMs,
564
+ warning: result.warning || undefined
565
+ }, '[mohdel:answer] done')
566
+ }
567
+
568
+ // Trace-level basic envelope for deep debugging. See
569
+ // LOGGING.md: trace may include user-visible content
570
+ // (prompts, outputs, tool args), so the consumer's log
571
+ // pipeline must treat trace output as sensitive. Output
572
+ // preview truncated to 200 chars. Gated at verbosity >= 1.
573
+ if (verbosity >= 1) {
574
+ handlers.trace({
575
+ span,
576
+ status: result.status,
577
+ inputTokens: result.inputTokens,
578
+ outputTokens: result.outputTokens,
579
+ thinkingTokens: result.thinkingTokens,
580
+ toolCallCount: result.toolCalls?.length || 0,
581
+ outputPreview: typeof result.output === 'string' ? result.output.slice(0, 200) : null
582
+ }, '[mohdel:answer] result')
583
+ }
584
+
585
+ // Verbose-only deep envelope at trace level (verbosity >= 2):
586
+ // - Tool call expansion (id, name, full args) when present
587
+ // - Full output preview (truncated to 2000 chars instead of 200)
588
+ // These are separate lines so they can be filtered independently.
589
+ if (verbosity >= 2) {
590
+ if (result.toolCalls?.length) {
591
+ try {
592
+ handlers.trace({ span, toolCalls: result.toolCalls.map(tc => ({ id: tc.id, name: tc.name, arguments: tc.arguments })) }, '[mohdel:answer] tool_calls')
593
+ } catch (toolErr) {
594
+ handlers.trace({ span, err: toolErr }, '[mohdel:answer] tool_calls serialization failed')
595
+ }
596
+ }
597
+ if (typeof result.output === 'string' && result.output.length > 0) {
598
+ handlers.trace({ span, output: result.output.slice(0, 2000) }, '[mohdel:answer] output')
599
+ }
600
+ }
601
+
602
+ if (handlers.onSuccess) {
603
+ try { handlers.onSuccess(result, { model: resolvedModelId, provider: modelSpec.provider }) } catch (e) { handlers.warn({ span, err: e }, '[mohdel:answer] onSuccess callback error') }
604
+ }
605
+
606
+ return result
607
+ } catch (err) {
608
+ endSpanError(span, err)
609
+
610
+ // Cooldown-rejected calls are the expected fast-fail path
611
+ // when the provider is in backoff; annotate the span so
612
+ // operators can distinguish these from real adapter errors.
613
+ if (err.message === 'PROVIDER_COOLDOWN') {
614
+ span.setAttribute('mohdel.cooldown', true)
615
+ }
616
+
617
+ // Dev visibility: surface failure with provider/model + total latency.
618
+ // Provider SDK files (anthropic.js etc.) emit their own [mohdel:<provider>]
619
+ // warn lines on the underlying error; this is the consolidated boundary
620
+ // line so callers can spot failures without grepping per-provider prefixes.
621
+ const totalMs = Date.now() - startedAt
622
+ handlers.warn({ span, err, provider: modelSpec.provider, model: resolvedModelId, totalMs }, '[mohdel:answer] failed')
623
+
624
+ if (handlers.onFailure) {
625
+ try { handlers.onFailure(err, { model: resolvedModelId, provider: modelSpec.provider }) } catch (e) { handlers.warn({ span, err: e }, '[mohdel:answer] onFailure callback error') }
626
+ }
627
+
628
+ throw err
629
+ }
630
+ }
631
+ }
632
+
633
+ if (prop === 'image') {
634
+ return async (prompt, options = {}) => {
635
+ const { configuration } = await getRuntime()
636
+ return runAnswerImage({
637
+ provider: modelSpec.provider,
638
+ model: modelSpec.model ?? resolvedModelId.split('/').pop(),
639
+ configuration,
640
+ prompt,
641
+ options,
642
+ spec: modelSpec
643
+ })
644
+ }
645
+ }
646
+
647
+ if (prop === 'setRateLimit') {
648
+ return async ({ rpm, tpm } = {}) => {
649
+ const curatedCache = getCuratedCacheSnapshot()
650
+ const model = curatedCache[resolvedModelId] || (curatedCache[resolvedModelId] = { ...modelSpec })
651
+ if (rpm != null) model.rpmLimit = rpm
652
+ if (tpm != null) model.tpmLimit = tpm
653
+ model.rateLimitScope = 'model'
654
+ await persistCuratedCache()
655
+ return { rpmLimit: model.rpmLimit, tpmLimit: model.tpmLimit }
656
+ }
657
+ }
658
+
659
+ if (prop === 'clearRateLimit') {
660
+ return async () => {
661
+ const curatedCache = getCuratedCacheSnapshot()
662
+ const model = curatedCache[resolvedModelId]
663
+ if (model) {
664
+ delete model.rpmLimit
665
+ delete model.tpmLimit
666
+ delete model.rateLimitScope
667
+ await persistCuratedCache()
668
+ }
669
+ }
670
+ }
671
+
672
+ if (prop === 'addTag') {
673
+ return async (tag) => {
674
+ if (!isValidTag(tag)) throw new Error(`Invalid tag "${tag}". Tags must match /^[a-zA-Z][a-zA-Z0-9._-]{0,31}$/.`)
675
+ const curatedCache = getCuratedCacheSnapshot()
676
+ const model = curatedCache[resolvedModelId] || (curatedCache[resolvedModelId] = { ...modelSpec })
677
+
678
+ model.tags = model.tags || []
679
+
680
+ if (!model.tags.includes(tag)) {
681
+ model.tags.push(tag)
682
+ await persistCuratedCache()
683
+ }
684
+
685
+ return model.tags
686
+ }
687
+ }
688
+
689
+ if (prop === 'removeTag' || prop === 'delTag') {
690
+ return async (tag) => {
691
+ const curatedCache = getCuratedCacheSnapshot()
692
+ const model = curatedCache[resolvedModelId]
693
+ if (!model) return modelSpec.tags || []
694
+
695
+ if (model.tags && Array.isArray(model.tags)) {
696
+ model.tags = model.tags.filter(t => t !== tag)
697
+ await persistCuratedCache()
698
+ }
699
+
700
+ return model.tags || []
701
+ }
702
+ }
703
+
704
+ if (prop === 'listTags' || prop === 'tags') {
705
+ return () => { // Sync
706
+ const curatedCache = getCuratedCacheSnapshot()
707
+ const model = curatedCache[resolvedModelId] || modelSpec
708
+ return model?.tags || []
709
+ }
710
+ }
711
+
712
+ if (prop === 'info') {
713
+ return () => { // Sync
714
+ const catalog = externalConfigurations ? null : getCuratedCacheSnapshot()
715
+ return catalog?.[resolvedModelId] ? { ...catalog[resolvedModelId] } : { ...modelSpec }
716
+ }
717
+ }
718
+
719
+ return target[prop]
720
+ }
721
+ })
722
+ }
723
+
724
+ export { silent } from './logger.js'
725
+ export { loadCuratedCache, getCuratedCacheSnapshot } from './curated-cache.js'
726
+ export default mohdel