mohdel 0.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +377 -0
- package/config/benchmarks.json +39 -0
- package/js/client/call.js +75 -0
- package/js/client/call_image.js +82 -0
- package/js/client/gate-binary.js +72 -0
- package/js/client/index.js +16 -0
- package/js/client/ndjson.js +29 -0
- package/js/client/transport.js +48 -0
- package/js/core/envelope.js +141 -0
- package/js/core/errors.js +75 -0
- package/js/core/events.js +96 -0
- package/js/core/image.js +58 -0
- package/js/core/index.js +10 -0
- package/js/core/status.js +48 -0
- package/js/factory/bridge.js +372 -0
- package/js/session/_cooldown.js +114 -0
- package/js/session/_logger.js +138 -0
- package/js/session/_rate_limiter.js +77 -0
- package/js/session/_tracing.js +58 -0
- package/js/session/adapters/_cancelled.js +44 -0
- package/js/session/adapters/_catalog.js +58 -0
- package/js/session/adapters/_chat_completions.js +439 -0
- package/js/session/adapters/_errors.js +85 -0
- package/js/session/adapters/_images.js +60 -0
- package/js/session/adapters/_lazy_json_cache.js +76 -0
- package/js/session/adapters/_pricing.js +67 -0
- package/js/session/adapters/_providers.js +60 -0
- package/js/session/adapters/_tools.js +185 -0
- package/js/session/adapters/_videos.js +283 -0
- package/js/session/adapters/anthropic.js +397 -0
- package/js/session/adapters/cerebras.js +28 -0
- package/js/session/adapters/deepseek.js +32 -0
- package/js/session/adapters/echo.js +51 -0
- package/js/session/adapters/fake.js +262 -0
- package/js/session/adapters/fireworks.js +46 -0
- package/js/session/adapters/gemini.js +381 -0
- package/js/session/adapters/groq.js +23 -0
- package/js/session/adapters/image/fake.js +55 -0
- package/js/session/adapters/image/index.js +40 -0
- package/js/session/adapters/image/novita.js +135 -0
- package/js/session/adapters/image/openai.js +50 -0
- package/js/session/adapters/index.js +53 -0
- package/js/session/adapters/mistral.js +31 -0
- package/js/session/adapters/novita.js +29 -0
- package/js/session/adapters/openai.js +381 -0
- package/js/session/adapters/openrouter.js +66 -0
- package/js/session/adapters/xai.js +27 -0
- package/js/session/bin.js +54 -0
- package/js/session/driver.js +160 -0
- package/js/session/index.js +18 -0
- package/js/session/run.js +393 -0
- package/js/session/run_image.js +61 -0
- package/package.json +107 -0
- package/src/cli/ask.js +160 -0
- package/src/cli/backup.js +107 -0
- package/src/cli/bench.js +262 -0
- package/src/cli/check.js +123 -0
- package/src/cli/colored-logger.js +67 -0
- package/src/cli/colors.js +13 -0
- package/src/cli/default.js +39 -0
- package/src/cli/index.js +150 -0
- package/src/cli/json-output.js +60 -0
- package/src/cli/model.js +571 -0
- package/src/cli/onboard.js +232 -0
- package/src/cli/rank.js +176 -0
- package/src/cli/ratelimit.js +160 -0
- package/src/cli/tag.js +105 -0
- package/src/lib/assets/alibaba.svg +1 -0
- package/src/lib/assets/anthropic.svg +5 -0
- package/src/lib/assets/deepseek.svg +1 -0
- package/src/lib/assets/gemini.svg +1 -0
- package/src/lib/assets/google.svg +2 -0
- package/src/lib/assets/kwaipilot.svg +1 -0
- package/src/lib/assets/meta.svg +1 -0
- package/src/lib/assets/minimax.svg +9 -0
- package/src/lib/assets/moonshotai.svg +4 -0
- package/src/lib/assets/openai.svg +5 -0
- package/src/lib/assets/xai.svg +1 -0
- package/src/lib/assets/xiaomi.svg +2 -0
- package/src/lib/assets/zai.svg +219 -0
- package/src/lib/benchmark-score.js +215 -0
- package/src/lib/benchmark-truth.js +68 -0
- package/src/lib/cache.js +76 -0
- package/src/lib/common.js +208 -0
- package/src/lib/cooldown.js +63 -0
- package/src/lib/creators.js +71 -0
- package/src/lib/curated-cache.js +146 -0
- package/src/lib/errors.js +126 -0
- package/src/lib/index.js +726 -0
- package/src/lib/logger.js +29 -0
- package/src/lib/providers.js +87 -0
- package/src/lib/rank.js +390 -0
- package/src/lib/rate-limiter.js +50 -0
- package/src/lib/schema.js +150 -0
- package/src/lib/select.js +474 -0
- package/src/lib/tracing.js +62 -0
- package/src/lib/utils.js +85 -0
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OTel span helpers for the session process.
|
|
3
|
+
*
|
|
4
|
+
* Re-exports the traceparent parser, span lifecycle, and
|
|
5
|
+
* `gen_ai.*` attribute helpers from `src/lib/tracing.js` so adapter
|
|
6
|
+
* code has one canonical import path. `ensureOtelInitialized()`
|
|
7
|
+
* lazily wires `@opentelemetry/sdk-node` when
|
|
8
|
+
* `OTEL_EXPORTER_OTLP_ENDPOINT` is set — without it, the no-op
|
|
9
|
+
* tracer is fine: span IDs still come from the parsed traceparent
|
|
10
|
+
* (via `remoteParentFromTraceparent` + fresh spanId), so log lines
|
|
11
|
+
* still carry valid correlation even without an exporter.
|
|
12
|
+
*
|
|
13
|
+
* @module session/tracing
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
export {
|
|
17
|
+
startSpan,
|
|
18
|
+
endSpanOk,
|
|
19
|
+
endSpanError,
|
|
20
|
+
parseTraceparent,
|
|
21
|
+
remoteParentFromTraceparent
|
|
22
|
+
} from '../../src/lib/tracing.js'
|
|
23
|
+
|
|
24
|
+
let otelInitialized = false
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Lazy OTel SDK setup. Safe to call multiple times — idempotent and
|
|
28
|
+
* no-op when the host app has already registered a tracer provider
|
|
29
|
+
* or when the env doesn't request one.
|
|
30
|
+
*/
|
|
31
|
+
export async function ensureOtelInitialized () {
|
|
32
|
+
if (otelInitialized) return
|
|
33
|
+
const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
|
|
34
|
+
if (!endpoint) {
|
|
35
|
+
otelInitialized = true
|
|
36
|
+
return
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
try {
|
|
40
|
+
const [{ NodeSDK }, { OTLPTraceExporter }] = await Promise.all([
|
|
41
|
+
import('@opentelemetry/sdk-node'),
|
|
42
|
+
import('@opentelemetry/exporter-trace-otlp-grpc')
|
|
43
|
+
])
|
|
44
|
+
const sdk = new NodeSDK({
|
|
45
|
+
traceExporter: new OTLPTraceExporter({ url: endpoint }),
|
|
46
|
+
serviceName: process.env.OTEL_SERVICE_NAME || 'mohdel-session'
|
|
47
|
+
})
|
|
48
|
+
sdk.start()
|
|
49
|
+
otelInitialized = true
|
|
50
|
+
} catch (e) {
|
|
51
|
+
// Leave the flag false so a retry is possible. The current caller
|
|
52
|
+
// (bin.js::main) runs this once, so "retry" in practice means a
|
|
53
|
+
// process restart — but the semantics should match the flag name.
|
|
54
|
+
process.stderr.write(
|
|
55
|
+
`${JSON.stringify({ level: 'warn', time: Date.now(), msg: '[mohdel:tracing] OTel SDK init failed', err: { message: e.message } })}\n`
|
|
56
|
+
)
|
|
57
|
+
}
|
|
58
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared `cancelledDone` helper for adapters that need to synthesize
|
|
3
|
+
* a terminal `done` event on `signal.aborted` mid-stream.
|
|
4
|
+
*
|
|
5
|
+
* Three adapters (openai, anthropic, gemini) had byte-identical
|
|
6
|
+
* copies of this before F58; consolidated here. `_chat_completions.js`
|
|
7
|
+
* and `run.js` have their own cancel paths — don't migrate them
|
|
8
|
+
* here unless you're certain the shape matches (thinkingTokens,
|
|
9
|
+
* cost, tool_calls semantics can all differ).
|
|
10
|
+
*
|
|
11
|
+
* @module session/adapters/_cancelled
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { STATUS_INCOMPLETE, WARNING_CANCELLED } from '#core/status.js'
|
|
15
|
+
import { costFor } from './_pricing.js'
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* @param {string} start hrtime-bigint-as-string at call entry
|
|
19
|
+
* @param {string | null} first first-token hrtime, or null if never streamed
|
|
20
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
21
|
+
* @param {string} output
|
|
22
|
+
* @param {number} inputTokens
|
|
23
|
+
* @param {number} outputTokens
|
|
24
|
+
* @returns {import('#core/events.js').DoneEvent}
|
|
25
|
+
*/
|
|
26
|
+
export function cancelledDone (start, first, envelope, output, inputTokens, outputTokens) {
|
|
27
|
+
const end = String(process.hrtime.bigint())
|
|
28
|
+
return {
|
|
29
|
+
type: 'done',
|
|
30
|
+
result: {
|
|
31
|
+
status: STATUS_INCOMPLETE,
|
|
32
|
+
output: output || null,
|
|
33
|
+
inputTokens,
|
|
34
|
+
outputTokens,
|
|
35
|
+
thinkingTokens: 0,
|
|
36
|
+
cost: costFor(
|
|
37
|
+
`${envelope.provider}/${envelope.model}`,
|
|
38
|
+
{ inputTokens, outputTokens, thinkingTokens: 0 }
|
|
39
|
+
),
|
|
40
|
+
timestamps: { start, first: first ?? end, end },
|
|
41
|
+
warning: WARNING_CANCELLED
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Curated catalog reader. Single source of truth for per-model
|
|
3
|
+
* metadata used by adapters: pricing, thinking effort levels,
|
|
4
|
+
* output token limits, etc. Lives in `~/.config/mohdel/curated.json`;
|
|
5
|
+
* each entry keyed by `<provider>/<model>` carries the model spec.
|
|
6
|
+
*
|
|
7
|
+
* Adapters and `_pricing.js` both consume this — no parallel cache,
|
|
8
|
+
* no parallel file read.
|
|
9
|
+
*
|
|
10
|
+
* @module session/adapters/_catalog
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import envPaths from 'env-paths'
|
|
14
|
+
|
|
15
|
+
import { createLazyJsonFileCache } from './_lazy_json_cache.js'
|
|
16
|
+
|
|
17
|
+
// `{ suffix: null }` mirrors `src/lib/common.js::CONFIG_DIR` so the
|
|
18
|
+
// session subprocess reads the same `~/.config/mohdel/curated.json`
|
|
19
|
+
// the `mo` CLI writes. Without the override, env-paths appends
|
|
20
|
+
// `-nodejs` and the cache silently loads empty.
|
|
21
|
+
const cache = createLazyJsonFileCache(() => `${envPaths('mohdel', { suffix: null }).config}/curated.json`)
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* @param {string} path
|
|
25
|
+
* @returns {Record<string, any>}
|
|
26
|
+
*/
|
|
27
|
+
export function loadCatalog (path) {
|
|
28
|
+
return cache.loadSync(path)
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Eager async initialization from the default curated path. Called from
|
|
33
|
+
* `bin.js::main` before `drive()` so the first `getSpec` doesn't stall
|
|
34
|
+
* the event loop on a sync read mid-call. Idempotent; respects a
|
|
35
|
+
* prior `setCatalog` (tests).
|
|
36
|
+
*/
|
|
37
|
+
export async function initCatalogFromDefault () {
|
|
38
|
+
await cache.initAsync()
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Replace the active catalog. Test injection point and the extension
|
|
43
|
+
* seam for deployments that source spec from somewhere other than
|
|
44
|
+
* curated.json.
|
|
45
|
+
*
|
|
46
|
+
* @param {Record<string, any>} table
|
|
47
|
+
*/
|
|
48
|
+
export function setCatalog (table) {
|
|
49
|
+
cache.set(table)
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
/**
|
|
53
|
+
* @param {string} model Fully-qualified `<provider>/<model>` key.
|
|
54
|
+
* @returns {any | undefined}
|
|
55
|
+
*/
|
|
56
|
+
export function getSpec (model) {
|
|
57
|
+
return cache.get(model)
|
|
58
|
+
}
|
|
@@ -0,0 +1,439 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared chat.completions core — handles both streaming and
|
|
3
|
+
* non-streaming variants.
|
|
4
|
+
*
|
|
5
|
+
* Used by all providers that speak the classic OpenAI Chat
|
|
6
|
+
* Completions shape (Groq, Cerebras, DeepSeek, Mistral, OpenRouter,
|
|
7
|
+
* Fireworks). Each provider supplies an SDK client factory plus a
|
|
8
|
+
* small config block (tool-choice flavor, identifier field name,
|
|
9
|
+
* DSML fallback, reasoning field mapping, arg mutation hook); this
|
|
10
|
+
* module owns the request/response shape.
|
|
11
|
+
*
|
|
12
|
+
* Non-streaming mode emits a single synthetic `delta` for the
|
|
13
|
+
* visible message content followed by a terminal `done`. Streaming
|
|
14
|
+
* mode emits real per-chunk deltas.
|
|
15
|
+
*
|
|
16
|
+
* @module session/adapters/_chat_completions
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { getSpec } from './_catalog.js'
|
|
20
|
+
import { classifyProviderError } from './_errors.js'
|
|
21
|
+
import { costFor } from './_pricing.js'
|
|
22
|
+
import {
|
|
23
|
+
STATUS_COMPLETED,
|
|
24
|
+
STATUS_INCOMPLETE,
|
|
25
|
+
STATUS_TOOL_USE,
|
|
26
|
+
WARNING_INSUFFICIENT_OUTPUT_BUDGET
|
|
27
|
+
} from '#core/status.js'
|
|
28
|
+
import {
|
|
29
|
+
toCerebrasTools,
|
|
30
|
+
fromCerebrasToolCalls,
|
|
31
|
+
toToolChoice
|
|
32
|
+
} from './_tools.js'
|
|
33
|
+
|
|
34
|
+
// DSML parse regexes. These run against untrusted model output, so
|
|
35
|
+
// worst-case match time MUST stay linear in input length (no ReDoS).
|
|
36
|
+
// Rules to preserve that:
|
|
37
|
+
// - Lazy quantifiers (`*?`) only with bounded character classes
|
|
38
|
+
// (`[\s\S]`, `[^"]`, `[^<]`) and followed by a literal/anchor.
|
|
39
|
+
// - No nested alternations (e.g. `(a|b)*?`) inside a quantifier —
|
|
40
|
+
// that's what flips `*?` to exponential worst-case.
|
|
41
|
+
// - No backreferences.
|
|
42
|
+
// If you add a new pattern here, benchmark it against a 1 MiB
|
|
43
|
+
// adversarial input before merging.
|
|
44
|
+
const DSML_RE = /<\uFF5CDSML\uFF5Cfunction_calls>/
|
|
45
|
+
const DSML_BLOCK_RE = /<\uFF5CDSML\uFF5Cfunction_calls>[\s\S]*?<\/\uFF5CDSML\uFF5Cfunction_calls>/g
|
|
46
|
+
const DSML_INVOKE_RE = /<\uFF5CDSML\uFF5Cinvoke\s+name="([^"]+)">([\s\S]*?)<\/\uFF5CDSML\uFF5Cinvoke>/g
|
|
47
|
+
const DSML_PARAM_RE = /<\uFF5CDSML\uFF5Cparameter\s+name="([^"]+)"(?:\s+string="([^"]*)")?>([^<]*)<\/\uFF5CDSML\uFF5Cparameter>/g
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* @typedef {object} ChatCompletionsConfig
|
|
51
|
+
* @property {string} provider
|
|
52
|
+
* Registry key. Also used as tool-choice flavor unless
|
|
53
|
+
* `toolChoiceFlavor` is set.
|
|
54
|
+
* @property {'openai'|'mistral'|'cerebras'} [toolChoiceFlavor]
|
|
55
|
+
* @property {'user'|'safety_identifier'} [identifierField]
|
|
56
|
+
* Defaults to 'user'.
|
|
57
|
+
* @property {'reasoning_effort'|'cerebras_zai'} [reasoningField]
|
|
58
|
+
* How to wire outputEffort into the request. `reasoning_effort`
|
|
59
|
+
* sets `args.reasoning_effort = effort`. `cerebras_zai` flips
|
|
60
|
+
* `args.disable_reasoning = false` instead (zai-family only).
|
|
61
|
+
* @property {boolean} [parseDsml]
|
|
62
|
+
* Extract DeepSeek DSML function-call blocks from message content
|
|
63
|
+
* when native `tool_calls` is absent.
|
|
64
|
+
* @property {boolean} [stream]
|
|
65
|
+
* Use chat.completions streaming. Emits real delta events per SSE
|
|
66
|
+
* chunk; usage reported on the final chunk via
|
|
67
|
+
* `stream_options.include_usage`.
|
|
68
|
+
* @property {(envelope: any, args: any) => void} [mutateArgs]
|
|
69
|
+
* Last-mile hook to splice provider-specific fields into the
|
|
70
|
+
* request (e.g. OpenRouter routing prefs).
|
|
71
|
+
*/
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
75
|
+
* @param {any} client
|
|
76
|
+
* @param {ChatCompletionsConfig} config
|
|
77
|
+
* @param {{signal?: AbortSignal, log?: any, span?: any}} [deps]
|
|
78
|
+
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
79
|
+
*/
|
|
80
|
+
export async function * runChatCompletions (envelope, client, config, deps = {}) {
|
|
81
|
+
const spec = getSpec(`${envelope.provider}/${envelope.model}`) || {}
|
|
82
|
+
const start = String(process.hrtime.bigint())
|
|
83
|
+
|
|
84
|
+
const args = buildRequest(envelope, spec, config)
|
|
85
|
+
if (config.mutateArgs) config.mutateArgs(envelope, args)
|
|
86
|
+
|
|
87
|
+
if (config.stream) {
|
|
88
|
+
yield * runStreaming(envelope, client, args, config, start, deps)
|
|
89
|
+
return
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
let response
|
|
93
|
+
try {
|
|
94
|
+
response = await client.chat.completions.create(args, { signal: deps.signal })
|
|
95
|
+
} catch (e) {
|
|
96
|
+
deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
|
|
97
|
+
yield { type: 'error', error: classifyProviderError(e) }
|
|
98
|
+
return
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
const first = String(process.hrtime.bigint())
|
|
102
|
+
const choice = Array.isArray(response?.choices) ? response.choices[0] : null
|
|
103
|
+
const message = choice?.message || {}
|
|
104
|
+
const usage = response?.usage || {}
|
|
105
|
+
const finishReason = choice?.finish_reason
|
|
106
|
+
|
|
107
|
+
let content = message.content || ''
|
|
108
|
+
let toolCalls = message.tool_calls
|
|
109
|
+
|
|
110
|
+
if (config.parseDsml && content && (!toolCalls || !toolCalls.length)) {
|
|
111
|
+
const dsml = parseDsmlToolCalls(content)
|
|
112
|
+
if (dsml) {
|
|
113
|
+
toolCalls = dsml
|
|
114
|
+
content = stripDsml(content)
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
if (content) {
|
|
119
|
+
yield { type: 'delta', delta: { type: 'message', delta: content } }
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
yield finalize({
|
|
123
|
+
envelope, content, toolCalls, usage, finishReason, start, first
|
|
124
|
+
})
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
/**
|
|
128
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
129
|
+
* @param {any} client
|
|
130
|
+
* @param {any} args
|
|
131
|
+
* @param {ChatCompletionsConfig} config
|
|
132
|
+
* @param {string} start
|
|
133
|
+
* @param {{signal?: AbortSignal}} deps
|
|
134
|
+
*/
|
|
135
|
+
async function * runStreaming (envelope, client, args, config, start, deps) {
|
|
136
|
+
args.stream = true
|
|
137
|
+
args.stream_options = { include_usage: true }
|
|
138
|
+
|
|
139
|
+
// F53: accumulate via array + join to avoid per-delta V8 cons-string
|
|
140
|
+
// churn on long streams.
|
|
141
|
+
const contentParts = []
|
|
142
|
+
let first = null
|
|
143
|
+
let finishReason = null
|
|
144
|
+
let usage = {}
|
|
145
|
+
const toolCallAccum = {}
|
|
146
|
+
// Cross-reference: tc.id observed on the opener → slot index. Lets
|
|
147
|
+
// us correlate continuation chunks that carry only `id` (some
|
|
148
|
+
// OpenAI-compat providers omit `index` after the opener) back to
|
|
149
|
+
// their original slot. Without this, chunks defaulting to
|
|
150
|
+
// `index ?? 0` silently merge multiple calls into slot 0.
|
|
151
|
+
const idToIndex = {}
|
|
152
|
+
|
|
153
|
+
let stream
|
|
154
|
+
try {
|
|
155
|
+
stream = await client.chat.completions.create(args, { signal: deps.signal })
|
|
156
|
+
} catch (e) {
|
|
157
|
+
deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
|
|
158
|
+
yield { type: 'error', error: classifyProviderError(e) }
|
|
159
|
+
return
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
try {
|
|
163
|
+
for await (const chunk of stream) {
|
|
164
|
+
const choice = chunk.choices?.[0]
|
|
165
|
+
if (choice?.delta?.content) {
|
|
166
|
+
if (first === null) first = String(process.hrtime.bigint())
|
|
167
|
+
contentParts.push(choice.delta.content)
|
|
168
|
+
yield { type: 'delta', delta: { type: 'message', delta: choice.delta.content } }
|
|
169
|
+
}
|
|
170
|
+
if (choice?.delta?.tool_calls) {
|
|
171
|
+
for (const tc of choice.delta.tool_calls) {
|
|
172
|
+
// Resolve which slot this chunk belongs to:
|
|
173
|
+
// 1. explicit `tc.index` always wins (normal case);
|
|
174
|
+
// 2. missing `tc.index` but a `tc.id` we've seen before
|
|
175
|
+
// → use the cross-ref we recorded on the opener;
|
|
176
|
+
// 3. new `tc.id` with no index → allocate next slot;
|
|
177
|
+
// 4. neither id nor index → can't correlate; drop + warn.
|
|
178
|
+
let idx = tc.index
|
|
179
|
+
if (idx == null && tc.id != null && idToIndex[tc.id] != null) {
|
|
180
|
+
idx = idToIndex[tc.id]
|
|
181
|
+
}
|
|
182
|
+
if (idx == null && tc.id != null) {
|
|
183
|
+
idx = Object.keys(toolCallAccum).length
|
|
184
|
+
}
|
|
185
|
+
if (idx == null) {
|
|
186
|
+
deps.log?.warn(
|
|
187
|
+
{ tc },
|
|
188
|
+
`[mohdel:${config.provider}] tool_call chunk with no id or index; skipped`
|
|
189
|
+
)
|
|
190
|
+
continue
|
|
191
|
+
}
|
|
192
|
+
const slot = toolCallAccum[idx] || (toolCallAccum[idx] = { id: '', name: '', arguments: '' })
|
|
193
|
+
if (tc.id) {
|
|
194
|
+
slot.id = tc.id
|
|
195
|
+
idToIndex[tc.id] = idx
|
|
196
|
+
}
|
|
197
|
+
if (tc.function?.name) slot.name += tc.function.name
|
|
198
|
+
if (tc.function?.arguments) {
|
|
199
|
+
slot.arguments += tc.function.arguments
|
|
200
|
+
if (first === null) first = String(process.hrtime.bigint())
|
|
201
|
+
yield {
|
|
202
|
+
type: 'delta',
|
|
203
|
+
delta: { type: 'function_call', delta: tc.function.arguments }
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
if (choice?.finish_reason) finishReason = choice.finish_reason
|
|
209
|
+
if (chunk.usage) usage = chunk.usage
|
|
210
|
+
}
|
|
211
|
+
} catch (e) {
|
|
212
|
+
deps.log?.warn({ err: e }, `[mohdel:${config.provider}] stream failed`)
|
|
213
|
+
yield { type: 'error', error: classifyProviderError(e) }
|
|
214
|
+
return
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
const collectedToolCalls = Object.values(toolCallAccum).map(tc => ({
|
|
218
|
+
id: tc.id,
|
|
219
|
+
function: { name: tc.name, arguments: tc.arguments }
|
|
220
|
+
}))
|
|
221
|
+
|
|
222
|
+
yield finalize({
|
|
223
|
+
envelope,
|
|
224
|
+
content: contentParts.join(''),
|
|
225
|
+
toolCalls: collectedToolCalls.length ? collectedToolCalls : null,
|
|
226
|
+
usage,
|
|
227
|
+
finishReason,
|
|
228
|
+
start,
|
|
229
|
+
first
|
|
230
|
+
})
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/**
|
|
234
|
+
* @param {{
|
|
235
|
+
* envelope: any,
|
|
236
|
+
* content: string,
|
|
237
|
+
* toolCalls: any[] | null,
|
|
238
|
+
* usage: any,
|
|
239
|
+
* finishReason: string | null,
|
|
240
|
+
* start: string,
|
|
241
|
+
* first: string | null
|
|
242
|
+
* }} p
|
|
243
|
+
* @returns {import('#core/events.js').DoneEvent}
|
|
244
|
+
*/
|
|
245
|
+
function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first }) {
|
|
246
|
+
const end = String(process.hrtime.bigint())
|
|
247
|
+
const inputTokens = usage.prompt_tokens || 0
|
|
248
|
+
const totalOutputTokens = usage.completion_tokens || 0
|
|
249
|
+
const thinkingTokens = usage.completion_tokens_details?.reasoning_tokens || 0
|
|
250
|
+
const visibleOutputTokens = Math.max(0, totalOutputTokens - thinkingTokens)
|
|
251
|
+
|
|
252
|
+
const truncated = finishReason === 'length'
|
|
253
|
+
let status = truncated ? STATUS_INCOMPLETE : STATUS_COMPLETED
|
|
254
|
+
if (toolCalls && toolCalls.length > 0) status = STATUS_TOOL_USE
|
|
255
|
+
|
|
256
|
+
/** @type {import('#core/events.js').DoneEvent} */
|
|
257
|
+
const done = {
|
|
258
|
+
type: 'done',
|
|
259
|
+
result: {
|
|
260
|
+
status,
|
|
261
|
+
output: content || null,
|
|
262
|
+
inputTokens,
|
|
263
|
+
outputTokens: visibleOutputTokens,
|
|
264
|
+
thinkingTokens,
|
|
265
|
+
cost: costFor(
|
|
266
|
+
`${envelope.provider}/${envelope.model}`,
|
|
267
|
+
{ inputTokens, outputTokens: visibleOutputTokens, thinkingTokens }
|
|
268
|
+
),
|
|
269
|
+
timestamps: { start, first: first ?? end, end }
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
if (truncated) done.result.warning = WARNING_INSUFFICIENT_OUTPUT_BUDGET
|
|
273
|
+
if (toolCalls && toolCalls.length > 0) {
|
|
274
|
+
done.result.toolCalls = fromCerebrasToolCalls(toolCalls)
|
|
275
|
+
}
|
|
276
|
+
return done
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
281
|
+
* @param {any} spec
|
|
282
|
+
* @param {ChatCompletionsConfig} config
|
|
283
|
+
*/
|
|
284
|
+
function buildRequest (envelope, spec, config) {
|
|
285
|
+
/** @type {Record<string, any>} */
|
|
286
|
+
const args = {
|
|
287
|
+
model: envelope.model,
|
|
288
|
+
temperature: 0,
|
|
289
|
+
messages: toChatMessages(envelope.prompt)
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (envelope.outputBudget !== undefined) {
|
|
293
|
+
args.max_tokens = envelope.outputBudget
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (envelope.images?.length) {
|
|
297
|
+
injectImages(args, envelope.images)
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
if (envelope.tools?.length) {
|
|
301
|
+
args.tools = toCerebrasTools(envelope.tools)
|
|
302
|
+
if (envelope.toolChoice) {
|
|
303
|
+
args.tool_choice = toToolChoice(config.toolChoiceFlavor || 'openai', envelope.toolChoice)
|
|
304
|
+
}
|
|
305
|
+
if (envelope.parallelToolCalls === false) {
|
|
306
|
+
args.parallel_tool_calls = false
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if (envelope.outputType === 'json') {
|
|
311
|
+
args.response_format = { type: 'json_object' }
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
if (spec.thinkingEffortLevels) {
|
|
315
|
+
const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
|
|
316
|
+
if (effort && effort !== 'none') {
|
|
317
|
+
const headroom = spec.thinkingEffortLevels[effort]
|
|
318
|
+
if (args.max_tokens && typeof headroom === 'number') {
|
|
319
|
+
args.max_tokens += headroom
|
|
320
|
+
}
|
|
321
|
+
delete args.temperature
|
|
322
|
+
if (config.reasoningField === 'cerebras_zai' && /zai/i.test(envelope.model)) {
|
|
323
|
+
args.disable_reasoning = false
|
|
324
|
+
} else {
|
|
325
|
+
args.reasoning_effort = effort
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
if (envelope.identifier) {
|
|
331
|
+
args[config.identifierField || 'user'] = envelope.identifier
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
return args
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
/**
|
|
338
|
+
* @param {string | import('#core/envelope.js').Message[]} prompt
|
|
339
|
+
* @returns {Array<any>}
|
|
340
|
+
*/
|
|
341
|
+
function toChatMessages (prompt) {
|
|
342
|
+
if (typeof prompt === 'string') return [{ role: 'user', content: prompt }]
|
|
343
|
+
return prompt.map(m => {
|
|
344
|
+
if (m.role === 'tool') {
|
|
345
|
+
return {
|
|
346
|
+
role: 'tool',
|
|
347
|
+
tool_call_id: m.toolCallId,
|
|
348
|
+
content: flattenText(m.content)
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
if (m.role === 'assistant' && m.toolCalls?.length) {
|
|
352
|
+
// Chat Completions assistant turn: optional `content` + the
|
|
353
|
+
// `tool_calls` array. `arguments` must be a JSON string on
|
|
354
|
+
// the wire.
|
|
355
|
+
return {
|
|
356
|
+
role: 'assistant',
|
|
357
|
+
content: flattenText(m.content) || '',
|
|
358
|
+
tool_calls: m.toolCalls.map(tc => ({
|
|
359
|
+
id: tc.id,
|
|
360
|
+
type: 'function',
|
|
361
|
+
function: {
|
|
362
|
+
name: tc.name,
|
|
363
|
+
arguments: stringifyToolArgs(tc.arguments)
|
|
364
|
+
}
|
|
365
|
+
}))
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
return { role: m.role, content: flattenText(m.content) }
|
|
369
|
+
})
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/** @param {unknown} args */
|
|
373
|
+
function stringifyToolArgs (args) {
|
|
374
|
+
if (typeof args === 'string' && args) return args
|
|
375
|
+
try { return JSON.stringify(args ?? {}) } catch { return '{}' }
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/** @param {string | import('#core/envelope.js').MessagePart[]} content */
|
|
379
|
+
function flattenText (content) {
|
|
380
|
+
if (typeof content === 'string') return content
|
|
381
|
+
if (!Array.isArray(content)) return ''
|
|
382
|
+
return content.filter(p => p.type === 'text' && p.text).map(p => p.text).join('\n')
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/**
|
|
386
|
+
* @param {any} args
|
|
387
|
+
* @param {import('#core/envelope.js').MediaRef[]} images
|
|
388
|
+
*/
|
|
389
|
+
function injectImages (args, images) {
|
|
390
|
+
const blocks = images
|
|
391
|
+
.filter(i => i?.fileUri && i?.mimeType)
|
|
392
|
+
.map(i => ({
|
|
393
|
+
type: 'image_url',
|
|
394
|
+
image_url: { url: i.fileUri, detail: 'high' }
|
|
395
|
+
}))
|
|
396
|
+
if (!blocks.length) return
|
|
397
|
+
|
|
398
|
+
// Append to the last user message; fallback to creating one.
|
|
399
|
+
for (let i = args.messages.length - 1; i >= 0; i--) {
|
|
400
|
+
const m = args.messages[i]
|
|
401
|
+
if (m.role !== 'user') continue
|
|
402
|
+
if (typeof m.content === 'string') {
|
|
403
|
+
m.content = [{ type: 'text', text: m.content }, ...blocks]
|
|
404
|
+
} else if (Array.isArray(m.content)) {
|
|
405
|
+
m.content = [...m.content, ...blocks]
|
|
406
|
+
}
|
|
407
|
+
return
|
|
408
|
+
}
|
|
409
|
+
args.messages.push({ role: 'user', content: blocks })
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
/** @param {string} text */
|
|
413
|
+
function parseDsmlToolCalls (text) {
|
|
414
|
+
if (!text || !DSML_RE.test(text)) return null
|
|
415
|
+
const calls = []
|
|
416
|
+
let m
|
|
417
|
+
while ((m = DSML_INVOKE_RE.exec(text)) !== null) {
|
|
418
|
+
const args = {}
|
|
419
|
+
let p
|
|
420
|
+
while ((p = DSML_PARAM_RE.exec(m[2])) !== null) {
|
|
421
|
+
const raw = p[3].trim()
|
|
422
|
+
args[p[1]] = p[2] === 'false' && raw !== '' && !isNaN(Number(raw))
|
|
423
|
+
? Number(raw)
|
|
424
|
+
: raw
|
|
425
|
+
}
|
|
426
|
+
calls.push({
|
|
427
|
+
id: `dsml_${Date.now()}_${calls.length}`,
|
|
428
|
+
type: 'function',
|
|
429
|
+
function: { name: m[1], arguments: JSON.stringify(args) }
|
|
430
|
+
})
|
|
431
|
+
}
|
|
432
|
+
return calls.length > 0 ? calls : null
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
/** @param {string} text */
|
|
436
|
+
function stripDsml (text) {
|
|
437
|
+
if (!text) return text
|
|
438
|
+
return text.replace(DSML_BLOCK_RE, '').trim() || ''
|
|
439
|
+
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared error classification for provider adapters.
|
|
3
|
+
*
|
|
4
|
+
* Maps SDK errors (by HTTP status) to a canonical `TypedError` with
|
|
5
|
+
* stable `type` tags (AUTH_INVALID, RATE_LIMIT, PROVIDER_COOLDOWN,
|
|
6
|
+
* PROVIDER_UNAVAILABLE, …). 401/403 messages stay generic to avoid
|
|
7
|
+
* echoing provider bodies that may contain the API key back on the
|
|
8
|
+
* wire; for other statuses the provider's own detail is preserved
|
|
9
|
+
* on `.detail` so callers can debug 400s (schema rejects, etc).
|
|
10
|
+
*
|
|
11
|
+
* @module session/adapters/_errors
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const DETAIL_CAP = 500
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Extract a short human-readable detail from an SDK error. Trimmed
|
|
18
|
+
* to `DETAIL_CAP` chars so a verbose provider body doesn't blow up
|
|
19
|
+
* log pipelines.
|
|
20
|
+
* @param {any} err
|
|
21
|
+
* @returns {string | undefined}
|
|
22
|
+
*/
|
|
23
|
+
function extractDetail (err) {
|
|
24
|
+
if (!err) return undefined
|
|
25
|
+
// OpenAI SDK: err.error.message; Google SDK: err.message is the full
|
|
26
|
+
// body sometimes. Prefer the structured field when present.
|
|
27
|
+
const nested = err.error?.message || err.response?.data?.error?.message
|
|
28
|
+
const raw = nested || err.message
|
|
29
|
+
if (!raw) return undefined
|
|
30
|
+
const str = typeof raw === 'string' ? raw : JSON.stringify(raw)
|
|
31
|
+
return str.length > DETAIL_CAP ? str.slice(0, DETAIL_CAP) + '…' : str
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* @param {unknown} e
|
|
36
|
+
* @returns {import('#core/errors.js').TypedError}
|
|
37
|
+
*/
|
|
38
|
+
export function classifyProviderError (e) {
|
|
39
|
+
const err = /** @type {any} */(e)
|
|
40
|
+
const status = err?.status
|
|
41
|
+
|
|
42
|
+
if (status === 401 || status === 403) {
|
|
43
|
+
// Deliberately no detail — 401/403 bodies can echo the key.
|
|
44
|
+
return {
|
|
45
|
+
message: 'authentication failed',
|
|
46
|
+
severity: 'error',
|
|
47
|
+
retryable: false,
|
|
48
|
+
type: 'AUTH_INVALID'
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
if (status === 429) {
|
|
52
|
+
return {
|
|
53
|
+
message: 'rate limit exceeded',
|
|
54
|
+
severity: 'warn',
|
|
55
|
+
retryable: true,
|
|
56
|
+
type: 'RATE_LIMIT',
|
|
57
|
+
detail: extractDetail(err)
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (typeof status === 'number' && status >= 500) {
|
|
61
|
+
return {
|
|
62
|
+
message: `provider error ${status}`,
|
|
63
|
+
severity: 'warn',
|
|
64
|
+
retryable: true,
|
|
65
|
+
type: 'PROVIDER_UNAVAILABLE',
|
|
66
|
+
detail: extractDetail(err)
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (typeof status === 'number' && status >= 400) {
|
|
70
|
+
return {
|
|
71
|
+
message: `provider error ${status}`,
|
|
72
|
+
severity: 'error',
|
|
73
|
+
retryable: false,
|
|
74
|
+
type: 'PROVIDER_ERROR',
|
|
75
|
+
detail: extractDetail(err)
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
return {
|
|
79
|
+
message: err?.message ? String(err.message).slice(0, 200) : 'network error',
|
|
80
|
+
severity: 'warn',
|
|
81
|
+
retryable: true,
|
|
82
|
+
type: 'NET_ERROR',
|
|
83
|
+
detail: extractDetail(err)
|
|
84
|
+
}
|
|
85
|
+
}
|