mohdel 0.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +377 -0
- package/config/benchmarks.json +39 -0
- package/js/client/call.js +75 -0
- package/js/client/call_image.js +82 -0
- package/js/client/gate-binary.js +72 -0
- package/js/client/index.js +16 -0
- package/js/client/ndjson.js +29 -0
- package/js/client/transport.js +48 -0
- package/js/core/envelope.js +141 -0
- package/js/core/errors.js +75 -0
- package/js/core/events.js +96 -0
- package/js/core/image.js +58 -0
- package/js/core/index.js +10 -0
- package/js/core/status.js +48 -0
- package/js/factory/bridge.js +372 -0
- package/js/session/_cooldown.js +114 -0
- package/js/session/_logger.js +138 -0
- package/js/session/_rate_limiter.js +77 -0
- package/js/session/_tracing.js +58 -0
- package/js/session/adapters/_cancelled.js +44 -0
- package/js/session/adapters/_catalog.js +58 -0
- package/js/session/adapters/_chat_completions.js +439 -0
- package/js/session/adapters/_errors.js +85 -0
- package/js/session/adapters/_images.js +60 -0
- package/js/session/adapters/_lazy_json_cache.js +76 -0
- package/js/session/adapters/_pricing.js +67 -0
- package/js/session/adapters/_providers.js +60 -0
- package/js/session/adapters/_tools.js +185 -0
- package/js/session/adapters/_videos.js +283 -0
- package/js/session/adapters/anthropic.js +397 -0
- package/js/session/adapters/cerebras.js +28 -0
- package/js/session/adapters/deepseek.js +32 -0
- package/js/session/adapters/echo.js +51 -0
- package/js/session/adapters/fake.js +262 -0
- package/js/session/adapters/fireworks.js +46 -0
- package/js/session/adapters/gemini.js +381 -0
- package/js/session/adapters/groq.js +23 -0
- package/js/session/adapters/image/fake.js +55 -0
- package/js/session/adapters/image/index.js +40 -0
- package/js/session/adapters/image/novita.js +135 -0
- package/js/session/adapters/image/openai.js +50 -0
- package/js/session/adapters/index.js +53 -0
- package/js/session/adapters/mistral.js +31 -0
- package/js/session/adapters/novita.js +29 -0
- package/js/session/adapters/openai.js +381 -0
- package/js/session/adapters/openrouter.js +66 -0
- package/js/session/adapters/xai.js +27 -0
- package/js/session/bin.js +54 -0
- package/js/session/driver.js +160 -0
- package/js/session/index.js +18 -0
- package/js/session/run.js +393 -0
- package/js/session/run_image.js +61 -0
- package/package.json +107 -0
- package/src/cli/ask.js +160 -0
- package/src/cli/backup.js +107 -0
- package/src/cli/bench.js +262 -0
- package/src/cli/check.js +123 -0
- package/src/cli/colored-logger.js +67 -0
- package/src/cli/colors.js +13 -0
- package/src/cli/default.js +39 -0
- package/src/cli/index.js +150 -0
- package/src/cli/json-output.js +60 -0
- package/src/cli/model.js +571 -0
- package/src/cli/onboard.js +232 -0
- package/src/cli/rank.js +176 -0
- package/src/cli/ratelimit.js +160 -0
- package/src/cli/tag.js +105 -0
- package/src/lib/assets/alibaba.svg +1 -0
- package/src/lib/assets/anthropic.svg +5 -0
- package/src/lib/assets/deepseek.svg +1 -0
- package/src/lib/assets/gemini.svg +1 -0
- package/src/lib/assets/google.svg +2 -0
- package/src/lib/assets/kwaipilot.svg +1 -0
- package/src/lib/assets/meta.svg +1 -0
- package/src/lib/assets/minimax.svg +9 -0
- package/src/lib/assets/moonshotai.svg +4 -0
- package/src/lib/assets/openai.svg +5 -0
- package/src/lib/assets/xai.svg +1 -0
- package/src/lib/assets/xiaomi.svg +2 -0
- package/src/lib/assets/zai.svg +219 -0
- package/src/lib/benchmark-score.js +215 -0
- package/src/lib/benchmark-truth.js +68 -0
- package/src/lib/cache.js +76 -0
- package/src/lib/common.js +208 -0
- package/src/lib/cooldown.js +63 -0
- package/src/lib/creators.js +71 -0
- package/src/lib/curated-cache.js +146 -0
- package/src/lib/errors.js +126 -0
- package/src/lib/index.js +726 -0
- package/src/lib/logger.js +29 -0
- package/src/lib/providers.js +87 -0
- package/src/lib/rank.js +390 -0
- package/src/lib/rate-limiter.js +50 -0
- package/src/lib/schema.js +150 -0
- package/src/lib/select.js +474 -0
- package/src/lib/tracing.js +62 -0
- package/src/lib/utils.js +85 -0
package/src/lib/index.js
ADDED
|
@@ -0,0 +1,726 @@
|
|
|
1
|
+
// TODO: split into factory.js / model-proxy.js / answer-call.js.
|
|
2
|
+
// This file is the default-export surface (`import mohdel from 'mohdel'`);
|
|
3
|
+
// any split must preserve that resolution to a factory function.
|
|
4
|
+
// Behaviour-preserving refactor only.
|
|
5
|
+
|
|
6
|
+
import providers from './providers.js'
|
|
7
|
+
import { getAPIKey, loadDefaultEnv, getProvidersConfig, saveProvidersConfig, setLogger as setCommonLogger } from './common.js'
|
|
8
|
+
import {
|
|
9
|
+
loadCuratedCache,
|
|
10
|
+
getCuratedCacheSnapshot,
|
|
11
|
+
expandModelAliasSync,
|
|
12
|
+
suggestModels,
|
|
13
|
+
persistCuratedCache
|
|
14
|
+
} from './curated-cache.js'
|
|
15
|
+
import { createRateLimiter } from '../../js/session/_rate_limiter.js'
|
|
16
|
+
import { createCooldownTracker } from '../../js/session/_cooldown.js'
|
|
17
|
+
import { runAnswer, runAnswerImage } from '../../js/factory/bridge.js'
|
|
18
|
+
import { startSpan, endSpanOk, endSpanError } from './tracing.js'
|
|
19
|
+
import { isValidTag } from './schema.js'
|
|
20
|
+
import { silent } from './logger.js'
|
|
21
|
+
import { createRequire } from 'node:module'
|
|
22
|
+
|
|
23
|
+
export const version = createRequire(import.meta.url)('../../package.json').version
|
|
24
|
+
|
|
25
|
+
const noop = () => {}
|
|
26
|
+
|
|
27
|
+
// Verbosity tiers — controls which mohdel internal log lines fire.
|
|
28
|
+
//
|
|
29
|
+
// 0 Anomaly-only. Failures, throttling, deprecation, server lifecycle.
|
|
30
|
+
// Recommended for production where you want to see only what needs attention.
|
|
31
|
+
// 1 Default. Adds per-call `start` (debug), `done` (debug), and a basic
|
|
32
|
+
// `result` envelope (trace). Useful in dev to see every model call without
|
|
33
|
+
// drowning in payload detail.
|
|
34
|
+
// 2 Verbose. Adds request preview (trace), tool call expansion (trace),
|
|
35
|
+
// and full output preview (trace). Use when debugging mohdel internals
|
|
36
|
+
// or specific model behavior.
|
|
37
|
+
//
|
|
38
|
+
// Selection precedence: factory opt `verbosity` > env `MOHDEL_VERBOSITY` > default 1.
|
|
39
|
+
// Captured once at factory init; restart your process to change at runtime.
|
|
40
|
+
const VERBOSITY_DEFAULT = 1
|
|
41
|
+
const VERBOSITY_MIN = 0
|
|
42
|
+
const VERBOSITY_MAX = 2
|
|
43
|
+
|
|
44
|
+
const parseVerbosity = (raw) => {
|
|
45
|
+
if (raw == null || raw === '') return VERBOSITY_DEFAULT
|
|
46
|
+
const v = typeof raw === 'number' ? raw : parseInt(raw, 10)
|
|
47
|
+
if (!Number.isFinite(v) || v < VERBOSITY_MIN) return VERBOSITY_DEFAULT
|
|
48
|
+
if (v > VERBOSITY_MAX) return VERBOSITY_MAX
|
|
49
|
+
return v
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// @internal — exported under an underscore-prefixed alias for unit tests only.
|
|
53
|
+
// Not part of the public package surface.
|
|
54
|
+
export { parseVerbosity as _parseVerbosityForTests }
|
|
55
|
+
|
|
56
|
+
const TOOL_SDKS = new Set(['anthropic', 'openai', 'gemini', 'cerebras', 'fireworks', 'openrouter'])
|
|
57
|
+
|
|
58
|
+
const resolvePrice = (price, inputTokens) => {
|
|
59
|
+
if (typeof price === 'number') return price
|
|
60
|
+
if (price == null || typeof price !== 'object') return 0
|
|
61
|
+
let resolved = price.default ?? 0
|
|
62
|
+
let highestThreshold = -1
|
|
63
|
+
for (const key of Object.keys(price)) {
|
|
64
|
+
if (key.startsWith('>')) {
|
|
65
|
+
const threshold = parseInt(key.slice(1), 10)
|
|
66
|
+
if (inputTokens > threshold && threshold > highestThreshold) {
|
|
67
|
+
highestThreshold = threshold
|
|
68
|
+
resolved = price[key]
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return resolved
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// @internal — exported for unit tests only.
|
|
76
|
+
export { resolvePrice as _resolvePriceForTests }
|
|
77
|
+
|
|
78
|
+
const normalizeModelSpec = (resolvedModelId, modelSpec, providerConfig) => {
|
|
79
|
+
const normalized = { ...modelSpec }
|
|
80
|
+
if (!normalized.provider) {
|
|
81
|
+
normalized.provider = resolvedModelId.split('/')[0]
|
|
82
|
+
}
|
|
83
|
+
if (!normalized.sdk) {
|
|
84
|
+
normalized.sdk = providerConfig.sdk
|
|
85
|
+
}
|
|
86
|
+
if (!normalized.api && providerConfig.api) {
|
|
87
|
+
normalized.api = providerConfig.api
|
|
88
|
+
}
|
|
89
|
+
if (normalized.supportsTools === undefined) {
|
|
90
|
+
normalized.supportsTools = TOOL_SDKS.has(normalized.sdk)
|
|
91
|
+
}
|
|
92
|
+
return normalized
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const createFallbackModelSpec = (resolvedModelId) => {
|
|
96
|
+
const [providerName, ...modelParts] = resolvedModelId.split('/')
|
|
97
|
+
if (!providerName || modelParts.length === 0) return null
|
|
98
|
+
|
|
99
|
+
const providerConfig = providers[providerName]
|
|
100
|
+
if (!providerConfig?.createFallbackModelSpec) return null
|
|
101
|
+
|
|
102
|
+
const fallback = providerConfig.createFallbackModelSpec(resolvedModelId, modelParts.join('/'))
|
|
103
|
+
if (!fallback) return null
|
|
104
|
+
|
|
105
|
+
return normalizeModelSpec(resolvedModelId, fallback, providerConfig)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
const resolveProviderConfiguration = async (provider, providerName) => {
|
|
109
|
+
if (provider.resolveConfiguration) {
|
|
110
|
+
return provider.resolveConfiguration()
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (provider.apiKeyEnv) {
|
|
114
|
+
const apiKey = getAPIKey(provider.apiKeyEnv)
|
|
115
|
+
if (!apiKey) {
|
|
116
|
+
throw new Error(`API key not found for ${providerName}. Set ${provider.apiKeyEnv} environment variable.`)
|
|
117
|
+
}
|
|
118
|
+
return provider.createConfiguration(apiKey)
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if (provider.createConfiguration) {
|
|
122
|
+
return provider.createConfiguration()
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return {}
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Build the handlers bag from a caller-provided logger.
|
|
129
|
+
//
|
|
130
|
+
// The logger is the single source of log routing. Each level becomes a closure
|
|
131
|
+
// that captures the original `logger` reference and forwards calls through it.
|
|
132
|
+
// The closure preserves `this` binding so stateful loggers (pino, winston,
|
|
133
|
+
// bunyan, or any class-based logger that stores instance state via `this` —
|
|
134
|
+
// pino uses Symbol-keyed properties like `Symbol(pino.msgPrefix)`) work
|
|
135
|
+
// correctly even after the methods are stored on a different object.
|
|
136
|
+
//
|
|
137
|
+
// Canonical logger shape — the minimum interface mohdel calls into:
|
|
138
|
+
//
|
|
139
|
+
// {
|
|
140
|
+
// trace: (firstArg, ...rest) => void,
|
|
141
|
+
// debug: (firstArg, ...rest) => void,
|
|
142
|
+
// info: (firstArg, ...rest) => void,
|
|
143
|
+
// warn: (firstArg, ...rest) => void,
|
|
144
|
+
// error: (firstArg, ...rest) => void,
|
|
145
|
+
// fatal: (firstArg, ...rest) => void
|
|
146
|
+
// }
|
|
147
|
+
//
|
|
148
|
+
// `firstArg` may be a string (the message) or an object (structured fields like
|
|
149
|
+
// `{ span }` or `{ span, err }`) followed by a format string in `rest`. This
|
|
150
|
+
// matches pino's call convention; other loggers can adapt with a thin wrapper.
|
|
151
|
+
//
|
|
152
|
+
// If a method is missing from the logger (partial logger), that level is silent
|
|
153
|
+
// — never console-logged. Default when no logger is provided is `silent` from
|
|
154
|
+
// `./logger.js`. Pass `silent` explicitly to be intentional about wanting no output.
|
|
155
|
+
//
|
|
156
|
+
// `onSuccess` and `onFailure` are independent of the logger and pass through
|
|
157
|
+
// unchanged.
|
|
158
|
+
const buildHandlers = ({ logger, onSuccess, onFailure }) => {
|
|
159
|
+
const log = logger || silent
|
|
160
|
+
return {
|
|
161
|
+
trace: typeof log.trace === 'function' ? (...args) => log.trace(...args) : noop,
|
|
162
|
+
debug: typeof log.debug === 'function' ? (...args) => log.debug(...args) : noop,
|
|
163
|
+
info: typeof log.info === 'function' ? (...args) => log.info(...args) : noop,
|
|
164
|
+
warn: typeof log.warn === 'function' ? (...args) => log.warn(...args) : noop,
|
|
165
|
+
error: typeof log.error === 'function' ? (...args) => log.error(...args) : noop,
|
|
166
|
+
fatal: typeof log.fatal === 'function' ? (...args) => log.fatal(...args) : noop,
|
|
167
|
+
onSuccess,
|
|
168
|
+
onFailure
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
// @internal — exported under an underscore-prefixed alias for unit tests only.
|
|
173
|
+
// Not part of the public package surface.
|
|
174
|
+
export { buildHandlers as _buildHandlersForTests }
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* mohdel factory.
|
|
178
|
+
*
|
|
179
|
+
* @param {object} [opts]
|
|
180
|
+
* @param {object} [opts.logger] — logger object with `{trace, debug, info, warn, error, fatal}`
|
|
181
|
+
* methods. Defaults to `silent` from './logger.js'. See the
|
|
182
|
+
* `buildHandlers` comment above for the canonical shape and how
|
|
183
|
+
* stateful loggers (pino, winston) are supported via
|
|
184
|
+
* `this`-preserving closures.
|
|
185
|
+
* @param {number} [opts.verbosity] — internal log verbosity tier (0/1/2). Defaults to env
|
|
186
|
+
* `MOHDEL_VERBOSITY` if set, otherwise 1. See the verbosity
|
|
187
|
+
* comment near `parseVerbosity` for what each tier emits.
|
|
188
|
+
* @param {Function} [opts.onSuccess] — fired after every successful answer call
|
|
189
|
+
* @param {Function} [opts.onFailure] — fired after every failed answer call
|
|
190
|
+
* @param {number} [opts.cooldownThreshold=3] — consecutive provider failures before cooldown
|
|
191
|
+
* @param {number} [opts.cooldownDuration=60000] — cooldown duration in ms
|
|
192
|
+
* @param {object} [opts.models] — model catalog (library mode — skips disk init)
|
|
193
|
+
* @param {object} [opts.configurations] — provider configurations (library mode)
|
|
194
|
+
*/
|
|
195
|
+
const mohdel = async ({ logger, verbosity: verbosityOpt, onSuccess, onFailure, cooldownThreshold, cooldownDuration, models, configurations } = {}) => {
|
|
196
|
+
// When consumer provides models + configurations, skip disk-based init (library mode).
|
|
197
|
+
// Otherwise load from ~/.config/mohdel/ (CLI / standalone mode).
|
|
198
|
+
const libraryMode = !!(models && configurations)
|
|
199
|
+
|
|
200
|
+
if (!libraryMode) {
|
|
201
|
+
loadDefaultEnv()
|
|
202
|
+
await loadCuratedCache()
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Provider config: user-specific rate limits per provider (~/.config/mohdel/providers.json)
|
|
206
|
+
const providersConfig = libraryMode ? {} : await getProvidersConfig()
|
|
207
|
+
|
|
208
|
+
// Resolve verbosity tier once at init. Factory opt > env var > default.
|
|
209
|
+
// Captured in the answer() closure below to gate per-call log lines.
|
|
210
|
+
const verbosity = parseVerbosity(verbosityOpt ?? process.env.MOHDEL_VERBOSITY)
|
|
211
|
+
|
|
212
|
+
const handlers = buildHandlers({ logger, onSuccess, onFailure })
|
|
213
|
+
// Expose verbosity so SDK files (anthropic.js, openai.js, etc.) and any
|
|
214
|
+
// downstream consumers reading from `handlers` can gate their own logs.
|
|
215
|
+
handlers.verbosity = verbosity
|
|
216
|
+
|
|
217
|
+
// SDK instance cache: providerName → Promise<{ provider, configuration, sdk, specs }>
|
|
218
|
+
// Models from the same provider share one SDK client instance
|
|
219
|
+
const sdkCache = new Map()
|
|
220
|
+
|
|
221
|
+
// Rate limiter: throttles requests to stay within provider/model RPM and TPM limits
|
|
222
|
+
const rateLimiter = createRateLimiter()
|
|
223
|
+
|
|
224
|
+
// Cooldown: fast-fail after consecutive provider failures. 0.90
|
|
225
|
+
// tracker is injected into the session `run()` by the bridge.
|
|
226
|
+
const cooldown = createCooldownTracker(cooldownThreshold, cooldownDuration)
|
|
227
|
+
|
|
228
|
+
// Resolve per-provider rate limits from the factory's local
|
|
229
|
+
// providersConfig — keeps `setProviderRateLimit` / etc. working
|
|
230
|
+
// as the source of truth inside this factory instance.
|
|
231
|
+
const resolveProviderLimits = (provider) => providersConfig[provider]
|
|
232
|
+
|
|
233
|
+
// Wire common.js module-level logger so file I/O errors route to consumer
|
|
234
|
+
setCommonLogger(handlers)
|
|
235
|
+
|
|
236
|
+
return new Proxy({}, {
|
|
237
|
+
get: (target, prop) => {
|
|
238
|
+
if (prop === 'list') {
|
|
239
|
+
return (tag) => { // Sync
|
|
240
|
+
const catalog = libraryMode ? models : getCuratedCacheSnapshot()
|
|
241
|
+
let modelEntries = Object.entries(catalog)
|
|
242
|
+
.filter(([, metadata]) => !metadata.deprecated)
|
|
243
|
+
|
|
244
|
+
if (tag) {
|
|
245
|
+
modelEntries = modelEntries.filter(([/* value */, metadata]) =>
|
|
246
|
+
metadata.tags && Array.isArray(metadata.tags) && metadata.tags.includes(tag)
|
|
247
|
+
)
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
return modelEntries.map(([value, metadata]) => ({
|
|
251
|
+
value,
|
|
252
|
+
label: metadata.label || metadata.displayName || metadata.model || value.split('/').pop()
|
|
253
|
+
}))
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
if (prop === 'use') {
|
|
258
|
+
return (modelId) => { // Sync
|
|
259
|
+
const catalog = libraryMode ? models : getCuratedCacheSnapshot()
|
|
260
|
+
|
|
261
|
+
// Parse optional :outputEffort suffix (e.g. "claude-opus:max").
|
|
262
|
+
// Effort levels vary per spec (`thinkingEffortLevels` keys —
|
|
263
|
+
// Anthropic Opus adds `minimal`/`max`; future providers may
|
|
264
|
+
// add their own), so we can't hardcode a keyword list.
|
|
265
|
+
// Instead: tentatively split, look up `base`. If that
|
|
266
|
+
// resolves to a spec, treat the suffix as an effort
|
|
267
|
+
// candidate and defer validation to the spec-aware check
|
|
268
|
+
// below. If `base` doesn't resolve, fall through — the full
|
|
269
|
+
// `modelId` (with colon) gets the normal lookup + "not
|
|
270
|
+
// found" error path.
|
|
271
|
+
let aliasOutputEffort
|
|
272
|
+
const colonIdx = modelId.lastIndexOf(':')
|
|
273
|
+
if (colonIdx > 0) {
|
|
274
|
+
const candidate = modelId.slice(colonIdx + 1)
|
|
275
|
+
const base = modelId.slice(0, colonIdx)
|
|
276
|
+
const baseResolved = libraryMode ? base : expandModelAliasSync(base)
|
|
277
|
+
const baseSpec = catalog[baseResolved] || createFallbackModelSpec(baseResolved)
|
|
278
|
+
if (baseSpec) {
|
|
279
|
+
aliasOutputEffort = candidate
|
|
280
|
+
modelId = base
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
let resolvedModelId = libraryMode ? modelId : expandModelAliasSync(modelId)
|
|
285
|
+
let modelSpec = catalog[resolvedModelId]
|
|
286
|
+
|
|
287
|
+
if (!modelSpec) {
|
|
288
|
+
modelSpec = createFallbackModelSpec(resolvedModelId)
|
|
289
|
+
if (!modelSpec) {
|
|
290
|
+
if (libraryMode) {
|
|
291
|
+
throw new Error(`Model '${modelId}' not found in provided models.`)
|
|
292
|
+
}
|
|
293
|
+
const suggestions = suggestModels(modelId)
|
|
294
|
+
let msg = `Model '${modelId}' not found in curated models.`
|
|
295
|
+
if (suggestions.length) {
|
|
296
|
+
msg += ' Did you mean?\n' + suggestions.map(s => ` ${s.id} ${s.label}`).join('\n')
|
|
297
|
+
}
|
|
298
|
+
throw new Error(msg)
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Handle deprecated models — follow chain up to 5 levels
|
|
303
|
+
const seen = new Set()
|
|
304
|
+
while (modelSpec.deprecated) {
|
|
305
|
+
if (seen.has(resolvedModelId)) {
|
|
306
|
+
throw new Error(`Circular deprecation chain detected at '${resolvedModelId}'.`)
|
|
307
|
+
}
|
|
308
|
+
seen.add(resolvedModelId)
|
|
309
|
+
const replacement = modelSpec.deprecated
|
|
310
|
+
handlers.warn(`[mohdel:catalog] model '${resolvedModelId}' is deprecated, using '${replacement}' instead`)
|
|
311
|
+
const replacementSpec = catalog[replacement]
|
|
312
|
+
if (!replacementSpec) {
|
|
313
|
+
throw new Error(`Model '${resolvedModelId}' is deprecated (replacement: '${replacement}'), but replacement not found.`)
|
|
314
|
+
}
|
|
315
|
+
resolvedModelId = replacement
|
|
316
|
+
modelSpec = replacementSpec
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// Ensure provider is part of modelSpec, critical for SDK operations.
|
|
320
|
+
// sync.js should populate this, but as a safeguard:
|
|
321
|
+
if (!modelSpec.provider) {
|
|
322
|
+
modelSpec.provider = resolvedModelId.split('/')[0]
|
|
323
|
+
}
|
|
324
|
+
const providerConfig = providers[modelSpec.provider]
|
|
325
|
+
if (!providerConfig) {
|
|
326
|
+
throw new Error(`Provider configuration for '${modelSpec.provider}' not found while preparing model '${resolvedModelId}'.`)
|
|
327
|
+
}
|
|
328
|
+
modelSpec = normalizeModelSpec(resolvedModelId, modelSpec, providerConfig)
|
|
329
|
+
|
|
330
|
+
// Validate outputEffort alias against model capabilities
|
|
331
|
+
if (aliasOutputEffort) {
|
|
332
|
+
if (!modelSpec.thinkingEffortLevels) {
|
|
333
|
+
throw new Error(`Model '${resolvedModelId}' does not support output effort (no thinkingEffortLevels). Cannot use ':${aliasOutputEffort}' suffix.`)
|
|
334
|
+
}
|
|
335
|
+
if (aliasOutputEffort !== 'none' && !modelSpec.thinkingEffortLevels[aliasOutputEffort]) {
|
|
336
|
+
throw new Error(`Model '${resolvedModelId}' does not support output effort level '${aliasOutputEffort}'. Available: ${Object.keys(modelSpec.thinkingEffortLevels).join(', ')}`)
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
return createModelProxy(resolvedModelId, modelSpec, handlers, aliasOutputEffort, sdkCache, rateLimiter, providersConfig, cooldown, configurations, resolveProviderLimits)
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
if (prop === 'getProviderRateLimit') {
|
|
345
|
+
return (providerName) => {
|
|
346
|
+
const entry = providersConfig[providerName]
|
|
347
|
+
if (!entry) return null
|
|
348
|
+
const { rpmLimit, tpmLimit } = entry
|
|
349
|
+
return (rpmLimit || tpmLimit) ? { rpmLimit, tpmLimit } : null
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (prop === 'setProviderRateLimit') {
|
|
354
|
+
return async (providerName, { rpm, tpm } = {}) => {
|
|
355
|
+
const entry = providersConfig[providerName] || (providersConfig[providerName] = {})
|
|
356
|
+
if (rpm != null) entry.rpmLimit = rpm
|
|
357
|
+
if (tpm != null) entry.tpmLimit = tpm
|
|
358
|
+
await saveProvidersConfig(providersConfig)
|
|
359
|
+
return entry
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
if (prop === 'clearProviderRateLimit') {
|
|
364
|
+
return async (providerName) => {
|
|
365
|
+
const entry = providersConfig[providerName]
|
|
366
|
+
if (entry) {
|
|
367
|
+
delete entry.rpmLimit
|
|
368
|
+
delete entry.tpmLimit
|
|
369
|
+
if (Object.keys(entry).length === 0) delete providersConfig[providerName]
|
|
370
|
+
await saveProvidersConfig(providersConfig)
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (prop === 'close') {
|
|
376
|
+
return () => {
|
|
377
|
+
sdkCache.clear()
|
|
378
|
+
}
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
return target[prop]
|
|
382
|
+
}
|
|
383
|
+
})
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const createModelProxy = (resolvedModelId, modelSpec, handlers, aliasOutputEffort, sdkCache, rateLimiter, providersConfig, cooldown, externalConfigurations, resolveProviderLimits) => {
|
|
387
|
+
// modelSpec is the full metadata object for resolvedModelId
|
|
388
|
+
let runtimePromise = null
|
|
389
|
+
|
|
390
|
+
const getRuntime = async () => {
|
|
391
|
+
if (runtimePromise) return runtimePromise
|
|
392
|
+
|
|
393
|
+
runtimePromise = (async () => {
|
|
394
|
+
const providerName = modelSpec.provider
|
|
395
|
+
const provider = providers[providerName]
|
|
396
|
+
if (!provider) {
|
|
397
|
+
throw new Error(`Provider ${providerName} not supported for model ${resolvedModelId}`)
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
// Reuse cached configuration for this provider
|
|
401
|
+
if (sdkCache.has(providerName)) {
|
|
402
|
+
const cached = await sdkCache.get(providerName)
|
|
403
|
+
cached.specs[resolvedModelId] = modelSpec
|
|
404
|
+
return cached
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
// Resolve configuration and cache the promise to handle
|
|
408
|
+
// concurrent use() calls.
|
|
409
|
+
const initPromise = (async () => {
|
|
410
|
+
const configuration = externalConfigurations?.[providerName] ?? await resolveProviderConfiguration(provider, providerName)
|
|
411
|
+
const specs = { [resolvedModelId]: modelSpec }
|
|
412
|
+
return { provider, configuration, specs }
|
|
413
|
+
})()
|
|
414
|
+
|
|
415
|
+
sdkCache.set(providerName, initPromise)
|
|
416
|
+
|
|
417
|
+
try {
|
|
418
|
+
return await initPromise
|
|
419
|
+
} catch (err) {
|
|
420
|
+
sdkCache.delete(providerName)
|
|
421
|
+
throw err
|
|
422
|
+
}
|
|
423
|
+
})().catch(err => {
|
|
424
|
+
runtimePromise = null
|
|
425
|
+
throw err
|
|
426
|
+
})
|
|
427
|
+
|
|
428
|
+
return runtimePromise
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
return new Proxy({}, {
|
|
432
|
+
get: (target, prop) => {
|
|
433
|
+
if (prop === 'id') {
|
|
434
|
+
return resolvedModelId
|
|
435
|
+
}
|
|
436
|
+
if (prop === 'label') {
|
|
437
|
+
return modelSpec.label || modelSpec.displayName || resolvedModelId.split('/').pop()
|
|
438
|
+
}
|
|
439
|
+
if (prop === 'supportsTools') {
|
|
440
|
+
return !!modelSpec.supportsTools
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
if (prop === 'answer') {
|
|
444
|
+
return async (prompt, options = {}) => {
|
|
445
|
+
if (aliasOutputEffort && !options.outputEffort) {
|
|
446
|
+
options.outputEffort = aliasOutputEffort
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// Verbosity tier — captured from handlers (set by the mohdel factory). Used
|
|
450
|
+
// to gate per-call log lines. Read once per call so the closure doesn't have
|
|
451
|
+
// to dereference handlers.verbosity at each gate.
|
|
452
|
+
const verbosity = handlers.verbosity ?? 1
|
|
453
|
+
|
|
454
|
+
// OTEL span: child of caller's span (engine inference → mohdel answer)
|
|
455
|
+
const { parentSpan, ...sdkOptions } = options
|
|
456
|
+
const spanAttrs = {
|
|
457
|
+
'gen_ai.request.model': resolvedModelId,
|
|
458
|
+
'gen_ai.system': modelSpec.provider
|
|
459
|
+
}
|
|
460
|
+
if (sdkOptions.outputBudget) spanAttrs['gen_ai.request.max_tokens'] = sdkOptions.outputBudget
|
|
461
|
+
if (sdkOptions.outputEffort) spanAttrs['mohdel.output_effort'] = sdkOptions.outputEffort
|
|
462
|
+
const span = startSpan('mohdel.answer', spanAttrs, parentSpan)
|
|
463
|
+
|
|
464
|
+
// Dev visibility: log call entry at debug level. The OTel span carries the
|
|
465
|
+
// same dimensions but is invisible without a configured exporter; this line
|
|
466
|
+
// lets developers see per-call activity directly in their pino output.
|
|
467
|
+
// Gated at verbosity >= 1 (default).
|
|
468
|
+
const startedAt = Date.now()
|
|
469
|
+
if (verbosity >= 1) {
|
|
470
|
+
handlers.debug({ span, provider: modelSpec.provider, model: resolvedModelId, effort: sdkOptions.outputEffort || 'default', budget: sdkOptions.outputBudget || '?', tools: sdkOptions.tools?.length || 0, images: sdkOptions.images?.length || 0 }, '[mohdel:answer] start')
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
// Verbose-only request preview at trace level (verbosity >= 2). Compact
|
|
474
|
+
// summary of what's being sent: input shape, system block presence,
|
|
475
|
+
// message count, last user message preview, tool list.
|
|
476
|
+
// Wrapped in try/catch because prompts have wildly varying shapes
|
|
477
|
+
// (string vs structured vs multimodal arrays).
|
|
478
|
+
if (verbosity >= 2) {
|
|
479
|
+
try {
|
|
480
|
+
const reqMeta = {}
|
|
481
|
+
if (typeof prompt === 'string') {
|
|
482
|
+
reqMeta.input = prompt.slice(0, 500)
|
|
483
|
+
reqMeta.inputLength = prompt.length
|
|
484
|
+
} else if (prompt && typeof prompt === 'object') {
|
|
485
|
+
if (prompt.system) {
|
|
486
|
+
const sysText = Array.isArray(prompt.system)
|
|
487
|
+
? prompt.system.map(s => s?.text || '').join('\n')
|
|
488
|
+
: String(prompt.system)
|
|
489
|
+
reqMeta.systemPreview = sysText.slice(0, 500)
|
|
490
|
+
reqMeta.systemLength = sysText.length
|
|
491
|
+
}
|
|
492
|
+
if (Array.isArray(prompt.messages)) {
|
|
493
|
+
reqMeta.messageCount = prompt.messages.length
|
|
494
|
+
const last = prompt.messages[prompt.messages.length - 1]
|
|
495
|
+
if (last) {
|
|
496
|
+
reqMeta.lastRole = last.role
|
|
497
|
+
reqMeta.lastContentPreview = typeof last.content === 'string'
|
|
498
|
+
? last.content.slice(0, 300)
|
|
499
|
+
: '[multimodal]'
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
if (sdkOptions.tools?.length) reqMeta.toolNames = sdkOptions.tools.map(t => t.name)
|
|
504
|
+
if (sdkOptions.images?.length) reqMeta.images = sdkOptions.images.length
|
|
505
|
+
handlers.trace({ span, ...reqMeta }, '[mohdel:answer] request')
|
|
506
|
+
} catch (previewErr) {
|
|
507
|
+
handlers.trace({ span, err: previewErr }, '[mohdel:answer] request preview unavailable')
|
|
508
|
+
}
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
try {
|
|
512
|
+
// 0.90 session handles cooldown + rate-limit + cost inline;
|
|
513
|
+
// factory-owned trackers are threaded in so
|
|
514
|
+
// `setProviderRateLimit` / factory-scoped cooldown settings
|
|
515
|
+
// remain the source of truth for this factory instance.
|
|
516
|
+
const { configuration: defaultConfiguration } = await getRuntime()
|
|
517
|
+
const effectiveConfiguration = sdkOptions.configuration || defaultConfiguration
|
|
518
|
+
delete sdkOptions.configuration
|
|
519
|
+
|
|
520
|
+
const result = await runAnswer({
|
|
521
|
+
provider: modelSpec.provider,
|
|
522
|
+
model: modelSpec.model ?? resolvedModelId.split('/').pop(),
|
|
523
|
+
modelKey: resolvedModelId,
|
|
524
|
+
configuration: effectiveConfiguration,
|
|
525
|
+
prompt,
|
|
526
|
+
options: sdkOptions
|
|
527
|
+
}, { cooldown, limiter: rateLimiter, resolveProviderLimits })
|
|
528
|
+
|
|
529
|
+
// End span with result attributes
|
|
530
|
+
const endAttrs = {
|
|
531
|
+
'gen_ai.usage.input_tokens': result.inputTokens || 0,
|
|
532
|
+
'gen_ai.usage.output_tokens': result.outputTokens || 0,
|
|
533
|
+
'mohdel.thinking_tokens': result.thinkingTokens || 0,
|
|
534
|
+
'mohdel.status': result.status
|
|
535
|
+
}
|
|
536
|
+
if (result.cost != null) endAttrs['mohdel.cost'] = result.cost
|
|
537
|
+
if (result.warning) endAttrs['mohdel.warning'] = result.warning
|
|
538
|
+
if (result.timestamps) {
|
|
539
|
+
const start = BigInt(result.timestamps.start)
|
|
540
|
+
const first = BigInt(result.timestamps.first)
|
|
541
|
+
if (first > start) {
|
|
542
|
+
endAttrs['mohdel.time_to_first_token_ms'] = Number(first - start) / 1e6
|
|
543
|
+
}
|
|
544
|
+
}
|
|
545
|
+
endSpanOk(span, endAttrs)
|
|
546
|
+
|
|
547
|
+
// Dev visibility: per-call summary at debug level. Mirrors what the OTel
|
|
548
|
+
// span captures so developers see the same data in their pino stream
|
|
549
|
+
// without needing an exporter. Cost is formatted in dollars to 5 decimals
|
|
550
|
+
// (per-call cost is typically sub-cent for short prompts).
|
|
551
|
+
// Gated at verbosity >= 1 (default).
|
|
552
|
+
const totalMs = Date.now() - startedAt
|
|
553
|
+
const ttftMs = endAttrs['mohdel.time_to_first_token_ms']
|
|
554
|
+
if (verbosity >= 1) {
|
|
555
|
+
handlers.debug({
|
|
556
|
+
span,
|
|
557
|
+
status: result.status,
|
|
558
|
+
in: result.inputTokens || 0,
|
|
559
|
+
out: result.outputTokens || 0,
|
|
560
|
+
think: result.thinkingTokens || 0,
|
|
561
|
+
cost: result.cost != null ? result.cost : undefined,
|
|
562
|
+
ttf: ttftMs != null ? Math.round(ttftMs) : undefined,
|
|
563
|
+
total: totalMs,
|
|
564
|
+
warning: result.warning || undefined
|
|
565
|
+
}, '[mohdel:answer] done')
|
|
566
|
+
}
|
|
567
|
+
|
|
568
|
+
// Trace-level basic envelope for deep debugging. See
|
|
569
|
+
// LOGGING.md: trace may include user-visible content
|
|
570
|
+
// (prompts, outputs, tool args), so the consumer's log
|
|
571
|
+
// pipeline must treat trace output as sensitive. Output
|
|
572
|
+
// preview truncated to 200 chars. Gated at verbosity >= 1.
|
|
573
|
+
if (verbosity >= 1) {
|
|
574
|
+
handlers.trace({
|
|
575
|
+
span,
|
|
576
|
+
status: result.status,
|
|
577
|
+
inputTokens: result.inputTokens,
|
|
578
|
+
outputTokens: result.outputTokens,
|
|
579
|
+
thinkingTokens: result.thinkingTokens,
|
|
580
|
+
toolCallCount: result.toolCalls?.length || 0,
|
|
581
|
+
outputPreview: typeof result.output === 'string' ? result.output.slice(0, 200) : null
|
|
582
|
+
}, '[mohdel:answer] result')
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Verbose-only deep envelope at trace level (verbosity >= 2):
|
|
586
|
+
// - Tool call expansion (id, name, full args) when present
|
|
587
|
+
// - Full output preview (truncated to 2000 chars instead of 200)
|
|
588
|
+
// These are separate lines so they can be filtered independently.
|
|
589
|
+
if (verbosity >= 2) {
|
|
590
|
+
if (result.toolCalls?.length) {
|
|
591
|
+
try {
|
|
592
|
+
handlers.trace({ span, toolCalls: result.toolCalls.map(tc => ({ id: tc.id, name: tc.name, arguments: tc.arguments })) }, '[mohdel:answer] tool_calls')
|
|
593
|
+
} catch (toolErr) {
|
|
594
|
+
handlers.trace({ span, err: toolErr }, '[mohdel:answer] tool_calls serialization failed')
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
if (typeof result.output === 'string' && result.output.length > 0) {
|
|
598
|
+
handlers.trace({ span, output: result.output.slice(0, 2000) }, '[mohdel:answer] output')
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
if (handlers.onSuccess) {
|
|
603
|
+
try { handlers.onSuccess(result, { model: resolvedModelId, provider: modelSpec.provider }) } catch (e) { handlers.warn({ span, err: e }, '[mohdel:answer] onSuccess callback error') }
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
return result
|
|
607
|
+
} catch (err) {
|
|
608
|
+
endSpanError(span, err)
|
|
609
|
+
|
|
610
|
+
// Cooldown-rejected calls are the expected fast-fail path
|
|
611
|
+
// when the provider is in backoff; annotate the span so
|
|
612
|
+
// operators can distinguish these from real adapter errors.
|
|
613
|
+
if (err.message === 'PROVIDER_COOLDOWN') {
|
|
614
|
+
span.setAttribute('mohdel.cooldown', true)
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// Dev visibility: surface failure with provider/model + total latency.
|
|
618
|
+
// Provider SDK files (anthropic.js etc.) emit their own [mohdel:<provider>]
|
|
619
|
+
// warn lines on the underlying error; this is the consolidated boundary
|
|
620
|
+
// line so callers can spot failures without grepping per-provider prefixes.
|
|
621
|
+
const totalMs = Date.now() - startedAt
|
|
622
|
+
handlers.warn({ span, err, provider: modelSpec.provider, model: resolvedModelId, totalMs }, '[mohdel:answer] failed')
|
|
623
|
+
|
|
624
|
+
if (handlers.onFailure) {
|
|
625
|
+
try { handlers.onFailure(err, { model: resolvedModelId, provider: modelSpec.provider }) } catch (e) { handlers.warn({ span, err: e }, '[mohdel:answer] onFailure callback error') }
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
throw err
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
if (prop === 'image') {
|
|
634
|
+
return async (prompt, options = {}) => {
|
|
635
|
+
const { configuration } = await getRuntime()
|
|
636
|
+
return runAnswerImage({
|
|
637
|
+
provider: modelSpec.provider,
|
|
638
|
+
model: modelSpec.model ?? resolvedModelId.split('/').pop(),
|
|
639
|
+
configuration,
|
|
640
|
+
prompt,
|
|
641
|
+
options,
|
|
642
|
+
spec: modelSpec
|
|
643
|
+
})
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
if (prop === 'setRateLimit') {
|
|
648
|
+
return async ({ rpm, tpm } = {}) => {
|
|
649
|
+
const curatedCache = getCuratedCacheSnapshot()
|
|
650
|
+
const model = curatedCache[resolvedModelId] || (curatedCache[resolvedModelId] = { ...modelSpec })
|
|
651
|
+
if (rpm != null) model.rpmLimit = rpm
|
|
652
|
+
if (tpm != null) model.tpmLimit = tpm
|
|
653
|
+
model.rateLimitScope = 'model'
|
|
654
|
+
await persistCuratedCache()
|
|
655
|
+
return { rpmLimit: model.rpmLimit, tpmLimit: model.tpmLimit }
|
|
656
|
+
}
|
|
657
|
+
}
|
|
658
|
+
|
|
659
|
+
if (prop === 'clearRateLimit') {
|
|
660
|
+
return async () => {
|
|
661
|
+
const curatedCache = getCuratedCacheSnapshot()
|
|
662
|
+
const model = curatedCache[resolvedModelId]
|
|
663
|
+
if (model) {
|
|
664
|
+
delete model.rpmLimit
|
|
665
|
+
delete model.tpmLimit
|
|
666
|
+
delete model.rateLimitScope
|
|
667
|
+
await persistCuratedCache()
|
|
668
|
+
}
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
if (prop === 'addTag') {
|
|
673
|
+
return async (tag) => {
|
|
674
|
+
if (!isValidTag(tag)) throw new Error(`Invalid tag "${tag}". Tags must match /^[a-zA-Z][a-zA-Z0-9._-]{0,31}$/.`)
|
|
675
|
+
const curatedCache = getCuratedCacheSnapshot()
|
|
676
|
+
const model = curatedCache[resolvedModelId] || (curatedCache[resolvedModelId] = { ...modelSpec })
|
|
677
|
+
|
|
678
|
+
model.tags = model.tags || []
|
|
679
|
+
|
|
680
|
+
if (!model.tags.includes(tag)) {
|
|
681
|
+
model.tags.push(tag)
|
|
682
|
+
await persistCuratedCache()
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
return model.tags
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
if (prop === 'removeTag' || prop === 'delTag') {
|
|
690
|
+
return async (tag) => {
|
|
691
|
+
const curatedCache = getCuratedCacheSnapshot()
|
|
692
|
+
const model = curatedCache[resolvedModelId]
|
|
693
|
+
if (!model) return modelSpec.tags || []
|
|
694
|
+
|
|
695
|
+
if (model.tags && Array.isArray(model.tags)) {
|
|
696
|
+
model.tags = model.tags.filter(t => t !== tag)
|
|
697
|
+
await persistCuratedCache()
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
return model.tags || []
|
|
701
|
+
}
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
if (prop === 'listTags' || prop === 'tags') {
|
|
705
|
+
return () => { // Sync
|
|
706
|
+
const curatedCache = getCuratedCacheSnapshot()
|
|
707
|
+
const model = curatedCache[resolvedModelId] || modelSpec
|
|
708
|
+
return model?.tags || []
|
|
709
|
+
}
|
|
710
|
+
}
|
|
711
|
+
|
|
712
|
+
if (prop === 'info') {
|
|
713
|
+
return () => { // Sync
|
|
714
|
+
const catalog = externalConfigurations ? null : getCuratedCacheSnapshot()
|
|
715
|
+
return catalog?.[resolvedModelId] ? { ...catalog[resolvedModelId] } : { ...modelSpec }
|
|
716
|
+
}
|
|
717
|
+
}
|
|
718
|
+
|
|
719
|
+
return target[prop]
|
|
720
|
+
}
|
|
721
|
+
})
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
export { silent } from './logger.js'
|
|
725
|
+
export { loadCuratedCache, getCuratedCacheSnapshot } from './curated-cache.js'
|
|
726
|
+
export default mohdel
|