mohdel 0.93.0 → 0.95.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/js/core/envelope.js +5 -3
- package/js/core/image.js +4 -3
- package/js/core/model-id.js +92 -0
- package/js/factory/bridge.js +2 -4
- package/js/session/adapters/_cancelled.js +2 -1
- package/js/session/adapters/_chat_completions.js +5 -4
- package/js/session/adapters/_pricing.js +51 -5
- package/js/session/adapters/anthropic.js +4 -3
- package/js/session/adapters/gemini.js +7 -5
- package/js/session/adapters/image/novita.js +2 -1
- package/js/session/adapters/image/openai.js +3 -2
- package/js/session/adapters/index.js +1 -1
- package/js/session/adapters/openai.js +7 -5
- package/js/session/adapters/xai.js +1 -1
- package/js/session/run.js +42 -28
- package/js/session/run_image.js +2 -1
- package/package.json +3 -3
package/js/core/envelope.js
CHANGED
|
@@ -23,8 +23,11 @@
|
|
|
23
23
|
* @property {string} [traceparent] W3C tracecontext header.
|
|
24
24
|
* @property {string} [baggage] W3C baggage header.
|
|
25
25
|
*
|
|
26
|
-
* @property {
|
|
27
|
-
*
|
|
26
|
+
* @property {import('./model-id.js').ModelId} model
|
|
27
|
+
* Full mohdel id — `"<provider>/<bare>[:<effort>]"`. Same shape
|
|
28
|
+
* on the wire and in-process. See PROTOCOL §3. No separate
|
|
29
|
+
* `provider` field exists at any layer; callers that need the
|
|
30
|
+
* provider or bare part use the helpers in `core/model-id.js`.
|
|
28
31
|
*
|
|
29
32
|
* @property {(string|Message[])} prompt
|
|
30
33
|
* Either a plain string or a structured array of messages.
|
|
@@ -123,7 +126,6 @@ export const ENVELOPE_FIELDS = Object.freeze([
|
|
|
123
126
|
'auth',
|
|
124
127
|
'traceparent',
|
|
125
128
|
'baggage',
|
|
126
|
-
'provider',
|
|
127
129
|
'model',
|
|
128
130
|
'prompt',
|
|
129
131
|
'outputBudget',
|
package/js/core/image.js
CHANGED
|
@@ -17,8 +17,10 @@
|
|
|
17
17
|
* @property {string} [traceparent]
|
|
18
18
|
* @property {string} [baggage]
|
|
19
19
|
*
|
|
20
|
-
* @property {
|
|
21
|
-
*
|
|
20
|
+
* @property {import('./model-id.js').ModelId} model
|
|
21
|
+
* Full mohdel id — `"<provider>/<bare>"`. Same shape as
|
|
22
|
+
* `CallEnvelope.model` (see `envelope.js`). No separate `provider`
|
|
23
|
+
* field.
|
|
22
24
|
* @property {string} prompt
|
|
23
25
|
*
|
|
24
26
|
* @property {string} [size] e.g. "1024x1024". Provider-specific.
|
|
@@ -50,7 +52,6 @@ export const IMAGE_ENVELOPE_FIELDS = Object.freeze([
|
|
|
50
52
|
'auth',
|
|
51
53
|
'traceparent',
|
|
52
54
|
'baggage',
|
|
53
|
-
'provider',
|
|
54
55
|
'model',
|
|
55
56
|
'prompt',
|
|
56
57
|
'size',
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Model-id helpers.
|
|
3
|
+
*
|
|
4
|
+
* A mohdel model id is a single string of shape
|
|
5
|
+
* `"<provider>/<bare>[:<effort>]"` — same on the wire and in-process.
|
|
6
|
+
* See PROTOCOL §3. Nothing in mohdel ever holds the id in a split
|
|
7
|
+
* object form; when the provider or bare part is needed, these
|
|
8
|
+
* helpers return it as a substring.
|
|
9
|
+
*
|
|
10
|
+
* `parseModelId` validates + brands at the boundary (factory input,
|
|
11
|
+
* wire deserialize, admin endpoints). After that every `ModelId` in
|
|
12
|
+
* memory is known-valid; adapters and core code call the accessors
|
|
13
|
+
* freely without re-validating.
|
|
14
|
+
*
|
|
15
|
+
* @module core/model-id
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Branded string type. Only `parseModelId` produces one.
|
|
20
|
+
* @typedef {string & { __brand: 'ModelId' }} ModelId
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
const MODEL_ID_RE = /^[a-z0-9][a-z0-9-]*\/[a-z0-9][a-z0-9._-]*(?::[a-z]+)?$/i
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Validate and brand a raw string. Throws on malformed input so the
|
|
27
|
+
* boundary layer fails loudly instead of letting a bad id flow
|
|
28
|
+
* through.
|
|
29
|
+
*
|
|
30
|
+
* @param {string} raw
|
|
31
|
+
* @returns {ModelId}
|
|
32
|
+
*/
|
|
33
|
+
export function parseModelId (raw) {
|
|
34
|
+
if (typeof raw !== 'string' || !MODEL_ID_RE.test(raw)) {
|
|
35
|
+
throw new TypeError(`invalid model id: ${JSON.stringify(raw)} (expected "<provider>/<bare>[:<effort>]")`)
|
|
36
|
+
}
|
|
37
|
+
return /** @type {ModelId} */ (raw)
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Provider segment of a model id.
|
|
42
|
+
* @param {ModelId | string} model
|
|
43
|
+
* @returns {string}
|
|
44
|
+
*/
|
|
45
|
+
export function providerOf (model) {
|
|
46
|
+
const slash = model.indexOf('/')
|
|
47
|
+
return slash > 0 ? model.slice(0, slash) : ''
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Bare id (everything after the provider slash), including any
|
|
52
|
+
* `:effort` suffix. Callers that want effort stripped use
|
|
53
|
+
* `catalogKey()` instead.
|
|
54
|
+
*
|
|
55
|
+
* @param {ModelId | string} model
|
|
56
|
+
* @returns {string}
|
|
57
|
+
*/
|
|
58
|
+
export function bareOf (model) {
|
|
59
|
+
const slash = model.indexOf('/')
|
|
60
|
+
return slash >= 0 ? model.slice(slash + 1) : model
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* The catalog key: `<provider>/<bare>` with any `:effort` suffix
|
|
65
|
+
* removed. This is the key under which prices, thinking levels,
|
|
66
|
+
* output limits etc. are stored — per-effort variants do not get
|
|
67
|
+
* their own entry.
|
|
68
|
+
*
|
|
69
|
+
* @param {ModelId | string} model
|
|
70
|
+
* @returns {string}
|
|
71
|
+
*/
|
|
72
|
+
export function catalogKey (model) {
|
|
73
|
+
const colon = model.lastIndexOf(':')
|
|
74
|
+
const slash = model.indexOf('/')
|
|
75
|
+
// Only treat `:` as an effort separator when it appears after the
|
|
76
|
+
// provider slash (otherwise a model id without `/` that happens to
|
|
77
|
+
// contain `:` would get the wrong thing stripped).
|
|
78
|
+
return colon > slash ? model.slice(0, colon) : model
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Effort suffix, without the `:`, or `undefined` if absent.
|
|
83
|
+
*
|
|
84
|
+
* @param {ModelId | string} model
|
|
85
|
+
* @returns {string | undefined}
|
|
86
|
+
*/
|
|
87
|
+
export function effortOf (model) {
|
|
88
|
+
const colon = model.lastIndexOf(':')
|
|
89
|
+
const slash = model.indexOf('/')
|
|
90
|
+
if (colon <= slash) return undefined
|
|
91
|
+
return model.slice(colon + 1)
|
|
92
|
+
}
|
package/js/factory/bridge.js
CHANGED
|
@@ -137,8 +137,7 @@ export async function runAnswerImage ({ provider, model, configuration, prompt,
|
|
|
137
137
|
callId: options.callId || newCallId(),
|
|
138
138
|
authId: options.authId || 'local',
|
|
139
139
|
auth: configToAuth(configuration),
|
|
140
|
-
provider
|
|
141
|
-
model,
|
|
140
|
+
model: `${provider}/${model}`,
|
|
142
141
|
prompt
|
|
143
142
|
}
|
|
144
143
|
if (options.size) envelope.size = options.size
|
|
@@ -164,8 +163,7 @@ function toEnvelope ({ provider, model, configuration, prompt, options }) {
|
|
|
164
163
|
callId: options.callId || newCallId(),
|
|
165
164
|
authId: options.authId || 'local',
|
|
166
165
|
auth: configToAuth(configuration),
|
|
167
|
-
provider,
|
|
168
|
-
model,
|
|
166
|
+
model: /** @type {import('#core/model-id.js').ModelId} */ (`${provider}/${model}`),
|
|
169
167
|
prompt: toEnvelopePrompt(prompt)
|
|
170
168
|
}
|
|
171
169
|
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
|
|
14
14
|
import { STATUS_INCOMPLETE, WARNING_CANCELLED } from '#core/status.js'
|
|
15
15
|
import { costFor } from './_pricing.js'
|
|
16
|
+
import { catalogKey } from '#core/model-id.js'
|
|
16
17
|
|
|
17
18
|
/**
|
|
18
19
|
* @param {string} start hrtime-bigint-as-string at call entry
|
|
@@ -34,7 +35,7 @@ export function cancelledDone (start, first, envelope, output, inputTokens, outp
|
|
|
34
35
|
outputTokens,
|
|
35
36
|
thinkingTokens: 0,
|
|
36
37
|
cost: costFor(
|
|
37
|
-
|
|
38
|
+
catalogKey(envelope.model),
|
|
38
39
|
{ inputTokens, outputTokens, thinkingTokens: 0 }
|
|
39
40
|
),
|
|
40
41
|
timestamps: { start, first: first ?? end, end },
|
|
@@ -19,6 +19,7 @@
|
|
|
19
19
|
import { getSpec } from './_catalog.js'
|
|
20
20
|
import { classifyProviderError } from './_errors.js'
|
|
21
21
|
import { costFor } from './_pricing.js'
|
|
22
|
+
import { catalogKey, bareOf } from '#core/model-id.js'
|
|
22
23
|
import {
|
|
23
24
|
STATUS_COMPLETED,
|
|
24
25
|
STATUS_INCOMPLETE,
|
|
@@ -78,7 +79,7 @@ const DSML_PARAM_RE = /<\uFF5CDSML\uFF5Cparameter\s+name="([^"]+)"(?:\s+string="
|
|
|
78
79
|
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
79
80
|
*/
|
|
80
81
|
export async function * runChatCompletions (envelope, client, config, deps = {}) {
|
|
81
|
-
const spec = getSpec(
|
|
82
|
+
const spec = getSpec(catalogKey(envelope.model)) || {}
|
|
82
83
|
const start = String(process.hrtime.bigint())
|
|
83
84
|
|
|
84
85
|
const args = buildRequest(envelope, spec, config)
|
|
@@ -263,7 +264,7 @@ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, f
|
|
|
263
264
|
outputTokens: visibleOutputTokens,
|
|
264
265
|
thinkingTokens,
|
|
265
266
|
cost: costFor(
|
|
266
|
-
|
|
267
|
+
catalogKey(envelope.model),
|
|
267
268
|
{ inputTokens, outputTokens: visibleOutputTokens, thinkingTokens }
|
|
268
269
|
),
|
|
269
270
|
timestamps: { start, first: first ?? end, end }
|
|
@@ -284,7 +285,7 @@ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, f
|
|
|
284
285
|
function buildRequest (envelope, spec, config) {
|
|
285
286
|
/** @type {Record<string, any>} */
|
|
286
287
|
const args = {
|
|
287
|
-
model: envelope.model,
|
|
288
|
+
model: spec?.model ?? bareOf(envelope.model),
|
|
288
289
|
temperature: 0,
|
|
289
290
|
messages: toChatMessages(envelope.prompt)
|
|
290
291
|
}
|
|
@@ -319,7 +320,7 @@ function buildRequest (envelope, spec, config) {
|
|
|
319
320
|
args.max_tokens += headroom
|
|
320
321
|
}
|
|
321
322
|
delete args.temperature
|
|
322
|
-
if (config.reasoningField === 'cerebras_zai' && /zai/i.test(envelope.model)) {
|
|
323
|
+
if (config.reasoningField === 'cerebras_zai' && /zai/i.test(bareOf(envelope.model))) {
|
|
323
324
|
args.disable_reasoning = false
|
|
324
325
|
} else {
|
|
325
326
|
args.reasoning_effort = effort
|
|
@@ -14,6 +14,19 @@ import { getSpec, setCatalog } from './_catalog.js'
|
|
|
14
14
|
/**
|
|
15
15
|
* Pure cost computation from spec + usage.
|
|
16
16
|
*
|
|
17
|
+
* Each price field (`inputPrice` / `outputPrice` / `thinkingPrice`) is
|
|
18
|
+
* one of:
|
|
19
|
+
*
|
|
20
|
+
* - a `number` — flat per-million rate; or
|
|
21
|
+
* - an object `{">N": number, ..., "default": number}` — tiered.
|
|
22
|
+
* The active rate is the one under the highest `>N` key that the
|
|
23
|
+
* call's `inputTokens` exceeds; falls back to `"default"` when
|
|
24
|
+
* nothing matches. Keys that aren't `">N"` or `"default"` are
|
|
25
|
+
* ignored. `>` is strict — at exactly N, the default is used.
|
|
26
|
+
*
|
|
27
|
+
* `thinkingPrice` is optional and falls back to the resolved
|
|
28
|
+
* `outputPrice` when absent.
|
|
29
|
+
*
|
|
17
30
|
* @param {any} spec Catalog entry (with `inputPrice`/`outputPrice`/`thinkingPrice`),
|
|
18
31
|
* or `undefined`.
|
|
19
32
|
* @param {{inputTokens?: number, outputTokens?: number, thinkingTokens?: number}} usage
|
|
@@ -21,17 +34,50 @@ import { getSpec, setCatalog } from './_catalog.js'
|
|
|
21
34
|
*/
|
|
22
35
|
export function computeCost (spec, usage) {
|
|
23
36
|
if (!spec) return 0
|
|
24
|
-
const ip = spec.inputPrice
|
|
25
|
-
const op = spec.outputPrice
|
|
26
|
-
if (typeof ip !== 'number' || typeof op !== 'number') return 0
|
|
27
37
|
const i = usage.inputTokens ?? 0
|
|
28
38
|
const o = usage.outputTokens ?? 0
|
|
29
39
|
const t = usage.thinkingTokens ?? 0
|
|
30
|
-
const
|
|
31
|
-
const
|
|
40
|
+
const ip = resolveTier(spec.inputPrice, i)
|
|
41
|
+
const op = resolveTier(spec.outputPrice, i)
|
|
42
|
+
if (typeof ip !== 'number' || typeof op !== 'number') return 0
|
|
43
|
+
const tp = resolveTier(spec.thinkingPrice, i)
|
|
44
|
+
const tpFinal = typeof tp === 'number' ? tp : op
|
|
45
|
+
const total = (i * ip + o * op + t * tpFinal) / 1_000_000
|
|
32
46
|
return round(total)
|
|
33
47
|
}
|
|
34
48
|
|
|
49
|
+
/**
|
|
50
|
+
* Resolve a price field against a token count. Scalars pass through;
|
|
51
|
+
* tiered maps return the rate of the highest `>N` key that
|
|
52
|
+
* `tokens` exceeds, falling back to `default`. Returns `null` when
|
|
53
|
+
* the field is absent or malformed — callers decide whether to treat
|
|
54
|
+
* that as "no price" (cost=0) or fall back (thinkingPrice→outputPrice).
|
|
55
|
+
*
|
|
56
|
+
* @param {unknown} price
|
|
57
|
+
* @param {number} tokens
|
|
58
|
+
* @returns {number | null}
|
|
59
|
+
*/
|
|
60
|
+
function resolveTier (price, tokens) {
|
|
61
|
+
if (typeof price === 'number') return price
|
|
62
|
+
if (!price || typeof price !== 'object') return null
|
|
63
|
+
let best = null
|
|
64
|
+
let bestThreshold = -1
|
|
65
|
+
for (const key of Object.keys(price)) {
|
|
66
|
+
if (key === 'default') continue
|
|
67
|
+
const m = /^>(\d+)$/.exec(key)
|
|
68
|
+
if (!m) continue
|
|
69
|
+
const threshold = Number(m[1])
|
|
70
|
+
if (tokens > threshold && threshold > bestThreshold) {
|
|
71
|
+
bestThreshold = threshold
|
|
72
|
+
const v = /** @type {Record<string, unknown>} */ (price)[key]
|
|
73
|
+
if (typeof v === 'number') best = v
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (best != null) return best
|
|
77
|
+
const d = /** @type {Record<string, unknown>} */ (price).default
|
|
78
|
+
return typeof d === 'number' ? d : null
|
|
79
|
+
}
|
|
80
|
+
|
|
35
81
|
/**
|
|
36
82
|
* @param {string} model Fully-qualified `<provider>/<model>`.
|
|
37
83
|
* @param {{inputTokens?: number, outputTokens?: number, thinkingTokens?: number}} usage
|
|
@@ -28,6 +28,7 @@ import { getSpec } from './_catalog.js'
|
|
|
28
28
|
import { classifyProviderError } from './_errors.js'
|
|
29
29
|
import { loadImages } from './_images.js'
|
|
30
30
|
import { costFor } from './_pricing.js'
|
|
31
|
+
import { catalogKey, bareOf } from '#core/model-id.js'
|
|
31
32
|
import {
|
|
32
33
|
toAnthropicTools,
|
|
33
34
|
fromAnthropicToolCalls,
|
|
@@ -216,7 +217,7 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
216
217
|
outputTokens: messageOutputTokens,
|
|
217
218
|
thinkingTokens: estimatedThinkingTokens,
|
|
218
219
|
cost: costFor(
|
|
219
|
-
|
|
220
|
+
catalogKey(envelope.model),
|
|
220
221
|
{ inputTokens, outputTokens: messageOutputTokens, thinkingTokens: estimatedThinkingTokens }
|
|
221
222
|
),
|
|
222
223
|
timestamps: { start, first: first ?? end, end }
|
|
@@ -253,12 +254,12 @@ function safeParseJson (s) {
|
|
|
253
254
|
* @param {string} system
|
|
254
255
|
*/
|
|
255
256
|
function buildRequest (envelope, conversation, system) {
|
|
256
|
-
const spec = getSpec(
|
|
257
|
+
const spec = getSpec(catalogKey(envelope.model))
|
|
257
258
|
const outputTokenLimit = spec?.outputTokenLimit
|
|
258
259
|
|
|
259
260
|
/** @type {Record<string, any>} */
|
|
260
261
|
const request = {
|
|
261
|
-
model: envelope.model,
|
|
262
|
+
model: spec?.model ?? bareOf(envelope.model),
|
|
262
263
|
max_tokens: envelope.outputBudget ?? outputTokenLimit ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
263
264
|
messages: conversation
|
|
264
265
|
}
|
|
@@ -30,6 +30,7 @@ import { classifyProviderError } from './_errors.js'
|
|
|
30
30
|
import { loadImages } from './_images.js'
|
|
31
31
|
import { loadVideos } from './_videos.js'
|
|
32
32
|
import { costFor } from './_pricing.js'
|
|
33
|
+
import { catalogKey, bareOf } from '#core/model-id.js'
|
|
33
34
|
import {
|
|
34
35
|
toGeminiTools,
|
|
35
36
|
fromGeminiToolCalls,
|
|
@@ -195,7 +196,7 @@ export async function * gemini (envelope, deps = {}) {
|
|
|
195
196
|
outputTokens,
|
|
196
197
|
thinkingTokens,
|
|
197
198
|
cost: costFor(
|
|
198
|
-
|
|
199
|
+
catalogKey(envelope.model),
|
|
199
200
|
{ inputTokens, outputTokens, thinkingTokens }
|
|
200
201
|
),
|
|
201
202
|
timestamps: { start, first: first ?? end, end }
|
|
@@ -214,7 +215,7 @@ export async function * gemini (envelope, deps = {}) {
|
|
|
214
215
|
* @param {string} systemInstruction
|
|
215
216
|
*/
|
|
216
217
|
function buildRequest (envelope, contents, systemInstruction) {
|
|
217
|
-
const spec = getSpec(
|
|
218
|
+
const spec = getSpec(catalogKey(envelope.model))
|
|
218
219
|
|
|
219
220
|
/** @type {Record<string, any>} */
|
|
220
221
|
const config = {}
|
|
@@ -235,9 +236,10 @@ function buildRequest (envelope, contents, systemInstruction) {
|
|
|
235
236
|
const effort = envelope.outputEffort ?? spec?.defaultThinkingEffort
|
|
236
237
|
if (spec?.thinkingEffortLevels && effort && effort !== 'none') {
|
|
237
238
|
const budget = spec.thinkingEffortLevels[effort]
|
|
238
|
-
|
|
239
|
+
const bare = bareOf(envelope.model)
|
|
240
|
+
if (/^gemini-3/.test(bare)) {
|
|
239
241
|
config.thinkingConfig = { includeThoughts: true, thinkingLevel: effort }
|
|
240
|
-
} else if (/gemini-2/.test(
|
|
242
|
+
} else if (/gemini-2/.test(bare)) {
|
|
241
243
|
if (typeof budget === 'number') {
|
|
242
244
|
config.thinkingConfig = { thinkingBudget: budget }
|
|
243
245
|
}
|
|
@@ -253,7 +255,7 @@ function buildRequest (envelope, contents, systemInstruction) {
|
|
|
253
255
|
|
|
254
256
|
/** @type {Record<string, any>} */
|
|
255
257
|
const request = {
|
|
256
|
-
model: envelope.model,
|
|
258
|
+
model: spec?.model ?? bareOf(envelope.model),
|
|
257
259
|
contents
|
|
258
260
|
}
|
|
259
261
|
if (Object.keys(config).length > 0) request.config = config
|
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
|
|
13
13
|
import { getSpec } from '../_catalog.js'
|
|
14
14
|
import { classifyProviderError } from '../_errors.js'
|
|
15
|
+
import { catalogKey } from '#core/model-id.js'
|
|
15
16
|
|
|
16
17
|
const BASE_URL = 'https://api.novita.ai'
|
|
17
18
|
const NOVITA_TASK_POLL_INTERVAL_MS = 1000
|
|
@@ -32,7 +33,7 @@ export async function novitaImage (envelope, deps = {}) {
|
|
|
32
33
|
const sleep = deps.sleep ?? defaultSleep
|
|
33
34
|
const now = deps.now ?? Date.now
|
|
34
35
|
|
|
35
|
-
const spec = deps.spec ?? getSpec(
|
|
36
|
+
const spec = deps.spec ?? getSpec(catalogKey(envelope.model)) ?? {}
|
|
36
37
|
const endpoint = spec.imageEndpoint
|
|
37
38
|
if (!endpoint) {
|
|
38
39
|
throw typedError('image endpoint not configured', 'PROVIDER_ERROR', false)
|
|
@@ -10,6 +10,7 @@ import OpenAI from 'openai'
|
|
|
10
10
|
|
|
11
11
|
import { getSpec } from '../_catalog.js'
|
|
12
12
|
import { classifyProviderError } from '../_errors.js'
|
|
13
|
+
import { catalogKey, bareOf } from '#core/model-id.js'
|
|
13
14
|
|
|
14
15
|
/**
|
|
15
16
|
* @param {import('#core/image.js').ImageEnvelope} envelope
|
|
@@ -18,10 +19,10 @@ import { classifyProviderError } from '../_errors.js'
|
|
|
18
19
|
*/
|
|
19
20
|
export async function openaiImage (envelope, deps = {}) {
|
|
20
21
|
const client = deps.client ?? new OpenAI({ apiKey: envelope.auth.key })
|
|
21
|
-
const spec = deps.spec ?? getSpec(
|
|
22
|
+
const spec = deps.spec ?? getSpec(catalogKey(envelope.model)) ?? {}
|
|
22
23
|
const start = String(process.hrtime.bigint())
|
|
23
24
|
|
|
24
|
-
const args = { model: envelope.model, prompt: envelope.prompt }
|
|
25
|
+
const args = { model: spec.model ?? bareOf(envelope.model), prompt: envelope.prompt }
|
|
25
26
|
const size = envelope.size || spec.imageDefaultSize
|
|
26
27
|
if (size) args.size = size
|
|
27
28
|
|
|
@@ -29,6 +29,7 @@ import { getSpec } from './_catalog.js'
|
|
|
29
29
|
import { classifyProviderError } from './_errors.js'
|
|
30
30
|
import { loadImages } from './_images.js'
|
|
31
31
|
import { costFor } from './_pricing.js'
|
|
32
|
+
import { catalogKey, providerOf, bareOf } from '#core/model-id.js'
|
|
32
33
|
import {
|
|
33
34
|
toOpenAITools,
|
|
34
35
|
fromOpenAIToolCalls,
|
|
@@ -181,7 +182,7 @@ export async function * openai (envelope, deps = {}) {
|
|
|
181
182
|
outputTokens: messageOutputTokens,
|
|
182
183
|
thinkingTokens,
|
|
183
184
|
cost: costFor(
|
|
184
|
-
|
|
185
|
+
catalogKey(envelope.model),
|
|
185
186
|
{ inputTokens, outputTokens: messageOutputTokens, thinkingTokens }
|
|
186
187
|
),
|
|
187
188
|
timestamps: { start, first: first ?? end, end }
|
|
@@ -200,11 +201,12 @@ export async function * openai (envelope, deps = {}) {
|
|
|
200
201
|
* @param {string} instructions
|
|
201
202
|
*/
|
|
202
203
|
function buildRequest (envelope, input, instructions) {
|
|
203
|
-
const spec = getSpec(
|
|
204
|
+
const spec = getSpec(catalogKey(envelope.model))
|
|
205
|
+
const provider = providerOf(envelope.model)
|
|
204
206
|
|
|
205
207
|
/** @type {Record<string, any>} */
|
|
206
208
|
const request = {
|
|
207
|
-
model: envelope.model,
|
|
209
|
+
model: spec?.model ?? bareOf(envelope.model),
|
|
208
210
|
input
|
|
209
211
|
}
|
|
210
212
|
if (instructions) request.instructions = instructions
|
|
@@ -231,7 +233,7 @@ function buildRequest (envelope, input, instructions) {
|
|
|
231
233
|
if (request.max_output_tokens && typeof headroom === 'number') {
|
|
232
234
|
request.max_output_tokens += headroom
|
|
233
235
|
}
|
|
234
|
-
if (
|
|
236
|
+
if (provider === 'openai') {
|
|
235
237
|
request.reasoning = { effort }
|
|
236
238
|
}
|
|
237
239
|
}
|
|
@@ -253,7 +255,7 @@ function buildRequest (envelope, input, instructions) {
|
|
|
253
255
|
// Per-user identifier — openai uses `safety_identifier`; other
|
|
254
256
|
// Responses-API providers (xai) use the legacy `user` field.
|
|
255
257
|
if (envelope.identifier) {
|
|
256
|
-
if (
|
|
258
|
+
if (provider === 'openai') {
|
|
257
259
|
request.safety_identifier = envelope.identifier
|
|
258
260
|
} else {
|
|
259
261
|
request.user = envelope.identifier
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* xAI adapter — OpenAI Responses API over x.ai/v1. Delegates to the
|
|
3
3
|
* `openai` adapter with a baseURL-configured client; the openai
|
|
4
|
-
* adapter branches on `envelope.
|
|
4
|
+
* adapter branches on `providerOf(envelope.model) === 'openai'` for fields
|
|
5
5
|
* that differ between vendors (reasoning param, safety_identifier).
|
|
6
6
|
*
|
|
7
7
|
* @module session/adapters/xai
|
package/js/session/run.js
CHANGED
|
@@ -26,6 +26,7 @@ import { getAdapter } from './adapters/index.js'
|
|
|
26
26
|
import { isImageProvider } from './adapters/image/index.js'
|
|
27
27
|
import { getSpec } from './adapters/_catalog.js'
|
|
28
28
|
import { getProviderLimits } from './adapters/_providers.js'
|
|
29
|
+
import { providerOf, catalogKey, effortOf } from '#core/model-id.js'
|
|
29
30
|
import * as defaultCooldown from './_cooldown.js'
|
|
30
31
|
import * as defaultLimiter from './_rate_limiter.js'
|
|
31
32
|
import { logger as defaultLogger } from './_logger.js'
|
|
@@ -74,12 +75,13 @@ export async function * run (envelope, {
|
|
|
74
75
|
if (effortNorm.error) { yield effortNorm.error; return }
|
|
75
76
|
envelope = effortNorm.envelope
|
|
76
77
|
|
|
78
|
+
const provider = providerOf(envelope.model)
|
|
77
79
|
const span = openSpan(envelope)
|
|
78
80
|
const log = scopedLogger(logger, envelope, span)
|
|
79
81
|
const startedAt = Date.now()
|
|
80
82
|
|
|
81
83
|
log.debug({
|
|
82
|
-
provider
|
|
84
|
+
provider,
|
|
83
85
|
model: envelope.model,
|
|
84
86
|
effort: envelope.outputEffort ?? 'default',
|
|
85
87
|
outputBudget: envelope.outputBudget ?? null,
|
|
@@ -89,43 +91,57 @@ export async function * run (envelope, {
|
|
|
89
91
|
|
|
90
92
|
let adapter
|
|
91
93
|
try {
|
|
92
|
-
adapter = resolveAdapter(
|
|
94
|
+
adapter = resolveAdapter(provider)
|
|
93
95
|
} catch (e) {
|
|
94
96
|
// Distinguish "image-only provider invoked via answer" from
|
|
95
97
|
// truly-unknown. Novita-and-friends have no text adapter but a
|
|
96
98
|
// caller using `mohdel.use('novita/...').answer(...)` otherwise
|
|
97
99
|
// gets a bare "unknown provider" with no hint.
|
|
98
|
-
if (isImageProvider(
|
|
99
|
-
const detail = `provider '${
|
|
100
|
+
if (isImageProvider(provider)) {
|
|
101
|
+
const detail = `provider '${provider}' supports image generation only; use mohdel.image(...) instead`
|
|
100
102
|
const err = errorEvent(detail, 'PROVIDER_TEXT_NOT_SUPPORTED')
|
|
101
|
-
log.warn({ provider
|
|
103
|
+
log.warn({ provider }, '[mohdel:answer] image-only provider via answer')
|
|
102
104
|
endSpanError(span, new Error(detail))
|
|
103
105
|
yield err
|
|
104
106
|
return
|
|
105
107
|
}
|
|
106
108
|
const err = errorEvent(messageOf(e), 'SESSION_UNKNOWN_PROVIDER')
|
|
107
|
-
log.warn({ err: e, provider
|
|
109
|
+
log.warn({ err: e, provider }, '[mohdel:answer] unknown provider')
|
|
108
110
|
endSpanError(span, e)
|
|
109
111
|
yield err
|
|
110
112
|
return
|
|
111
113
|
}
|
|
112
114
|
|
|
113
|
-
|
|
115
|
+
// Catalog is authoritative: every callable model must have a
|
|
116
|
+
// spec. Without one we'd silently run the provider call with
|
|
117
|
+
// defaults (no rate-limits, no budget clamps, cost=0), masking
|
|
118
|
+
// misconfiguration in the layer that pushed the catalog. Effort
|
|
119
|
+
// suffix is stripped for the lookup — catalog entries are keyed
|
|
120
|
+
// by the bare `<provider>/<bare>` id, not per-effort variants.
|
|
121
|
+
const key = catalogKey(envelope.model)
|
|
122
|
+
const spec = resolveSpec(key)
|
|
123
|
+
if (!spec) {
|
|
124
|
+
const detail = `Unknown model '${key}' — not in catalog`
|
|
125
|
+
const err = errorEvent(detail, 'SESSION_UNKNOWN_MODEL')
|
|
126
|
+
log.warn({ provider, model: envelope.model }, '[mohdel:answer] unknown model')
|
|
127
|
+
endSpanError(span, new Error(detail))
|
|
128
|
+
yield err
|
|
129
|
+
return
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const coolErr = cooldown.coolingDownError(provider)
|
|
114
133
|
if (coolErr) {
|
|
115
|
-
log.debug({ provider
|
|
134
|
+
log.debug({ provider, detail: coolErr.detail }, '[mohdel:cooldown] fast-fail')
|
|
116
135
|
span.setAttribute('mohdel.cooldown', true)
|
|
117
136
|
endSpanOk(span, { 'mohdel.status': 'cooldown' })
|
|
118
137
|
yield { type: 'error', error: coolErr }
|
|
119
138
|
return
|
|
120
139
|
}
|
|
121
140
|
|
|
122
|
-
const
|
|
123
|
-
const providerCfg = resolveProviderLimits(envelope.provider) || {}
|
|
141
|
+
const providerCfg = resolveProviderLimits(provider) || {}
|
|
124
142
|
const rpmLimit = spec?.rpmLimit ?? providerCfg.rpmLimit
|
|
125
143
|
const tpmLimit = spec?.tpmLimit ?? providerCfg.tpmLimit
|
|
126
|
-
const bucketKey = (spec?.rateLimitScope === 'model')
|
|
127
|
-
? `${envelope.provider}/${envelope.model}`
|
|
128
|
-
: envelope.provider
|
|
144
|
+
const bucketKey = (spec?.rateLimitScope === 'model') ? key : provider
|
|
129
145
|
|
|
130
146
|
// `0` is a killswitch ("deny all"), not "unset"; `undefined`/`null`
|
|
131
147
|
// means no limit configured for that dimension. Gate on nullability
|
|
@@ -175,7 +191,7 @@ export async function * run (envelope, {
|
|
|
175
191
|
// incomplete-budget / tool_use) IS a genuine provider-side
|
|
176
192
|
// success and resets the streak.
|
|
177
193
|
if (ev.result?.warning !== WARNING_CANCELLED) {
|
|
178
|
-
cooldown.reset(
|
|
194
|
+
cooldown.reset(provider)
|
|
179
195
|
}
|
|
180
196
|
if (tpmLimit != null && ev.result) {
|
|
181
197
|
const total =
|
|
@@ -192,10 +208,10 @@ export async function * run (envelope, {
|
|
|
192
208
|
log.debug(summarizeDone(ev.result, startedAt), '[mohdel:answer] done')
|
|
193
209
|
} else if (ev.type === 'error') {
|
|
194
210
|
sawTerminal = true
|
|
195
|
-
recordFailureFromError(cooldown,
|
|
211
|
+
recordFailureFromError(cooldown, provider, ev.error)
|
|
196
212
|
log.warn({
|
|
197
213
|
err: ev.error,
|
|
198
|
-
provider
|
|
214
|
+
provider,
|
|
199
215
|
totalMs: Date.now() - startedAt,
|
|
200
216
|
maxInterFrameMs
|
|
201
217
|
}, '[mohdel:answer] failed')
|
|
@@ -211,7 +227,7 @@ export async function * run (envelope, {
|
|
|
211
227
|
yield fallback
|
|
212
228
|
return
|
|
213
229
|
}
|
|
214
|
-
log.warn({ err: e, provider
|
|
230
|
+
log.warn({ err: e, provider, maxInterFrameMs }, '[mohdel:answer] adapter threw')
|
|
215
231
|
endSpanError(span, e)
|
|
216
232
|
yield errorEvent(messageOf(e), 'SESSION_ADAPTER_THREW')
|
|
217
233
|
return
|
|
@@ -225,7 +241,7 @@ export async function * run (envelope, {
|
|
|
225
241
|
yield fallback
|
|
226
242
|
} else {
|
|
227
243
|
const err = 'adapter returned without a terminal event'
|
|
228
|
-
log.error({ provider
|
|
244
|
+
log.error({ provider, maxInterFrameMs }, '[mohdel:answer] no terminal event')
|
|
229
245
|
endSpanError(span, new Error(err))
|
|
230
246
|
yield errorEvent(err, 'SESSION_ADAPTER_NO_TERMINAL')
|
|
231
247
|
}
|
|
@@ -246,21 +262,19 @@ export async function * run (envelope, {
|
|
|
246
262
|
* }}
|
|
247
263
|
*/
|
|
248
264
|
function normalizeModelEffort (envelope, resolveSpec) {
|
|
249
|
-
const
|
|
250
|
-
|
|
251
|
-
if (colonIdx <= 0) return { envelope }
|
|
265
|
+
const candidate = effortOf(envelope.model)
|
|
266
|
+
if (!candidate) return { envelope }
|
|
252
267
|
if (envelope.outputEffort) return { envelope } // explicit wins
|
|
253
268
|
|
|
254
|
-
const
|
|
255
|
-
const
|
|
256
|
-
const baseSpec = resolveSpec(`${envelope.provider}/${base}`)
|
|
269
|
+
const base = catalogKey(envelope.model)
|
|
270
|
+
const baseSpec = resolveSpec(base)
|
|
257
271
|
if (!baseSpec) return { envelope } // base not known — let full string fall through to not-found
|
|
258
272
|
|
|
259
273
|
if (!baseSpec.thinkingEffortLevels) {
|
|
260
274
|
return {
|
|
261
275
|
envelope,
|
|
262
276
|
error: errorEvent(
|
|
263
|
-
`Model '${
|
|
277
|
+
`Model '${base}' does not support output effort (no thinkingEffortLevels). Cannot use ':${candidate}' suffix.`,
|
|
264
278
|
'SESSION_INVALID_OUTPUT_EFFORT'
|
|
265
279
|
)
|
|
266
280
|
}
|
|
@@ -269,7 +283,7 @@ function normalizeModelEffort (envelope, resolveSpec) {
|
|
|
269
283
|
return {
|
|
270
284
|
envelope,
|
|
271
285
|
error: errorEvent(
|
|
272
|
-
`Model '${
|
|
286
|
+
`Model '${base}' does not support output effort level '${candidate}'. Available: ${Object.keys(baseSpec.thinkingEffortLevels).join(', ')}`,
|
|
273
287
|
'SESSION_INVALID_OUTPUT_EFFORT'
|
|
274
288
|
)
|
|
275
289
|
}
|
|
@@ -290,7 +304,7 @@ function openSpan (envelope) {
|
|
|
290
304
|
/** @type {Record<string, any>} */
|
|
291
305
|
const attrs = {
|
|
292
306
|
'gen_ai.request.model': envelope.model,
|
|
293
|
-
'gen_ai.system': envelope.
|
|
307
|
+
'gen_ai.system': providerOf(envelope.model),
|
|
294
308
|
'mohdel.call_id': envelope.callId,
|
|
295
309
|
'mohdel.auth_id': envelope.authId
|
|
296
310
|
}
|
|
@@ -309,7 +323,7 @@ function scopedLogger (logger, envelope, span) {
|
|
|
309
323
|
return logger.withContext({
|
|
310
324
|
callId: envelope.callId,
|
|
311
325
|
authId: envelope.authId,
|
|
312
|
-
provider: envelope.
|
|
326
|
+
provider: providerOf(envelope.model),
|
|
313
327
|
model: envelope.model,
|
|
314
328
|
traceId: ctx.traceId,
|
|
315
329
|
spanId: ctx.spanId
|
package/js/session/run_image.js
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
|
|
16
16
|
import { getImageAdapter } from './adapters/image/index.js'
|
|
17
17
|
import { classifyProviderError } from './adapters/_errors.js'
|
|
18
|
+
import { providerOf } from '#core/model-id.js'
|
|
18
19
|
|
|
19
20
|
/**
|
|
20
21
|
* @param {import('#core/image.js').ImageEnvelope} envelope
|
|
@@ -33,7 +34,7 @@ import { classifyProviderError } from './adapters/_errors.js'
|
|
|
33
34
|
export async function runImage (envelope, { resolveAdapter = getImageAdapter, spec } = {}) {
|
|
34
35
|
let adapter
|
|
35
36
|
try {
|
|
36
|
-
adapter = resolveAdapter(envelope.
|
|
37
|
+
adapter = resolveAdapter(providerOf(envelope.model))
|
|
37
38
|
} catch (e) {
|
|
38
39
|
return {
|
|
39
40
|
ok: false,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.95.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -87,7 +87,7 @@
|
|
|
87
87
|
"@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
|
|
88
88
|
"@opentelemetry/sdk-node": "^0.215.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.95.0"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
93
|
"@anthropic-ai/sdk": "^0.91.0",
|
|
@@ -103,7 +103,7 @@
|
|
|
103
103
|
},
|
|
104
104
|
"devDependencies": {
|
|
105
105
|
"lint-staged": "^16.4.0",
|
|
106
|
-
"release-it": "^20.0.
|
|
106
|
+
"release-it": "^20.0.1",
|
|
107
107
|
"standard": "^17.1.2",
|
|
108
108
|
"vitest": "^4.1.5"
|
|
109
109
|
}
|