mohdel 0.90.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +377 -0
  3. package/config/benchmarks.json +39 -0
  4. package/js/client/call.js +75 -0
  5. package/js/client/call_image.js +82 -0
  6. package/js/client/gate-binary.js +72 -0
  7. package/js/client/index.js +16 -0
  8. package/js/client/ndjson.js +29 -0
  9. package/js/client/transport.js +48 -0
  10. package/js/core/envelope.js +141 -0
  11. package/js/core/errors.js +75 -0
  12. package/js/core/events.js +96 -0
  13. package/js/core/image.js +58 -0
  14. package/js/core/index.js +10 -0
  15. package/js/core/status.js +48 -0
  16. package/js/factory/bridge.js +372 -0
  17. package/js/session/_cooldown.js +114 -0
  18. package/js/session/_logger.js +138 -0
  19. package/js/session/_rate_limiter.js +77 -0
  20. package/js/session/_tracing.js +58 -0
  21. package/js/session/adapters/_cancelled.js +44 -0
  22. package/js/session/adapters/_catalog.js +58 -0
  23. package/js/session/adapters/_chat_completions.js +439 -0
  24. package/js/session/adapters/_errors.js +85 -0
  25. package/js/session/adapters/_images.js +60 -0
  26. package/js/session/adapters/_lazy_json_cache.js +76 -0
  27. package/js/session/adapters/_pricing.js +67 -0
  28. package/js/session/adapters/_providers.js +60 -0
  29. package/js/session/adapters/_tools.js +185 -0
  30. package/js/session/adapters/_videos.js +283 -0
  31. package/js/session/adapters/anthropic.js +397 -0
  32. package/js/session/adapters/cerebras.js +28 -0
  33. package/js/session/adapters/deepseek.js +32 -0
  34. package/js/session/adapters/echo.js +51 -0
  35. package/js/session/adapters/fake.js +262 -0
  36. package/js/session/adapters/fireworks.js +46 -0
  37. package/js/session/adapters/gemini.js +381 -0
  38. package/js/session/adapters/groq.js +23 -0
  39. package/js/session/adapters/image/fake.js +55 -0
  40. package/js/session/adapters/image/index.js +40 -0
  41. package/js/session/adapters/image/novita.js +135 -0
  42. package/js/session/adapters/image/openai.js +50 -0
  43. package/js/session/adapters/index.js +53 -0
  44. package/js/session/adapters/mistral.js +31 -0
  45. package/js/session/adapters/novita.js +29 -0
  46. package/js/session/adapters/openai.js +381 -0
  47. package/js/session/adapters/openrouter.js +66 -0
  48. package/js/session/adapters/xai.js +27 -0
  49. package/js/session/bin.js +54 -0
  50. package/js/session/driver.js +160 -0
  51. package/js/session/index.js +18 -0
  52. package/js/session/run.js +393 -0
  53. package/js/session/run_image.js +61 -0
  54. package/package.json +107 -0
  55. package/src/cli/ask.js +160 -0
  56. package/src/cli/backup.js +107 -0
  57. package/src/cli/bench.js +262 -0
  58. package/src/cli/check.js +123 -0
  59. package/src/cli/colored-logger.js +67 -0
  60. package/src/cli/colors.js +13 -0
  61. package/src/cli/default.js +39 -0
  62. package/src/cli/index.js +150 -0
  63. package/src/cli/json-output.js +60 -0
  64. package/src/cli/model.js +571 -0
  65. package/src/cli/onboard.js +232 -0
  66. package/src/cli/rank.js +176 -0
  67. package/src/cli/ratelimit.js +160 -0
  68. package/src/cli/tag.js +105 -0
  69. package/src/lib/assets/alibaba.svg +1 -0
  70. package/src/lib/assets/anthropic.svg +5 -0
  71. package/src/lib/assets/deepseek.svg +1 -0
  72. package/src/lib/assets/gemini.svg +1 -0
  73. package/src/lib/assets/google.svg +2 -0
  74. package/src/lib/assets/kwaipilot.svg +1 -0
  75. package/src/lib/assets/meta.svg +1 -0
  76. package/src/lib/assets/minimax.svg +9 -0
  77. package/src/lib/assets/moonshotai.svg +4 -0
  78. package/src/lib/assets/openai.svg +5 -0
  79. package/src/lib/assets/xai.svg +1 -0
  80. package/src/lib/assets/xiaomi.svg +2 -0
  81. package/src/lib/assets/zai.svg +219 -0
  82. package/src/lib/benchmark-score.js +215 -0
  83. package/src/lib/benchmark-truth.js +68 -0
  84. package/src/lib/cache.js +76 -0
  85. package/src/lib/common.js +208 -0
  86. package/src/lib/cooldown.js +63 -0
  87. package/src/lib/creators.js +71 -0
  88. package/src/lib/curated-cache.js +146 -0
  89. package/src/lib/errors.js +126 -0
  90. package/src/lib/index.js +726 -0
  91. package/src/lib/logger.js +29 -0
  92. package/src/lib/providers.js +87 -0
  93. package/src/lib/rank.js +390 -0
  94. package/src/lib/rate-limiter.js +50 -0
  95. package/src/lib/schema.js +150 -0
  96. package/src/lib/select.js +474 -0
  97. package/src/lib/tracing.js +62 -0
  98. package/src/lib/utils.js +85 -0
@@ -0,0 +1,58 @@
1
+ /**
2
+ * OTel span helpers for the session process.
3
+ *
4
+ * Re-exports the traceparent parser, span lifecycle, and
5
+ * `gen_ai.*` attribute helpers from `src/lib/tracing.js` so adapter
6
+ * code has one canonical import path. `ensureOtelInitialized()`
7
+ * lazily wires `@opentelemetry/sdk-node` when
8
+ * `OTEL_EXPORTER_OTLP_ENDPOINT` is set — without it, the no-op
9
+ * tracer is fine: span IDs still come from the parsed traceparent
10
+ * (via `remoteParentFromTraceparent` + fresh spanId), so log lines
11
+ * still carry valid correlation even without an exporter.
12
+ *
13
+ * @module session/tracing
14
+ */
15
+
16
+ export {
17
+ startSpan,
18
+ endSpanOk,
19
+ endSpanError,
20
+ parseTraceparent,
21
+ remoteParentFromTraceparent
22
+ } from '../../src/lib/tracing.js'
23
+
24
+ let otelInitialized = false
25
+
26
+ /**
27
+ * Lazy OTel SDK setup. Safe to call multiple times — idempotent and
28
+ * no-op when the host app has already registered a tracer provider
29
+ * or when the env doesn't request one.
30
+ */
31
+ export async function ensureOtelInitialized () {
32
+ if (otelInitialized) return
33
+ const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
34
+ if (!endpoint) {
35
+ otelInitialized = true
36
+ return
37
+ }
38
+
39
+ try {
40
+ const [{ NodeSDK }, { OTLPTraceExporter }] = await Promise.all([
41
+ import('@opentelemetry/sdk-node'),
42
+ import('@opentelemetry/exporter-trace-otlp-grpc')
43
+ ])
44
+ const sdk = new NodeSDK({
45
+ traceExporter: new OTLPTraceExporter({ url: endpoint }),
46
+ serviceName: process.env.OTEL_SERVICE_NAME || 'mohdel-session'
47
+ })
48
+ sdk.start()
49
+ otelInitialized = true
50
+ } catch (e) {
51
+ // Leave the flag false so a retry is possible. The current caller
52
+ // (bin.js::main) runs this once, so "retry" in practice means a
53
+ // process restart — but the semantics should match the flag name.
54
+ process.stderr.write(
55
+ `${JSON.stringify({ level: 'warn', time: Date.now(), msg: '[mohdel:tracing] OTel SDK init failed', err: { message: e.message } })}\n`
56
+ )
57
+ }
58
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Shared `cancelledDone` helper for adapters that need to synthesize
3
+ * a terminal `done` event on `signal.aborted` mid-stream.
4
+ *
5
+ * Three adapters (openai, anthropic, gemini) had byte-identical
6
+ * copies of this before F58; consolidated here. `_chat_completions.js`
7
+ * and `run.js` have their own cancel paths — don't migrate them
8
+ * here unless you're certain the shape matches (thinkingTokens,
9
+ * cost, tool_calls semantics can all differ).
10
+ *
11
+ * @module session/adapters/_cancelled
12
+ */
13
+
14
+ import { STATUS_INCOMPLETE, WARNING_CANCELLED } from '#core/status.js'
15
+ import { costFor } from './_pricing.js'
16
+
17
+ /**
18
+ * @param {string} start hrtime-bigint-as-string at call entry
19
+ * @param {string | null} first first-token hrtime, or null if never streamed
20
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
21
+ * @param {string} output
22
+ * @param {number} inputTokens
23
+ * @param {number} outputTokens
24
+ * @returns {import('#core/events.js').DoneEvent}
25
+ */
26
+ export function cancelledDone (start, first, envelope, output, inputTokens, outputTokens) {
27
+ const end = String(process.hrtime.bigint())
28
+ return {
29
+ type: 'done',
30
+ result: {
31
+ status: STATUS_INCOMPLETE,
32
+ output: output || null,
33
+ inputTokens,
34
+ outputTokens,
35
+ thinkingTokens: 0,
36
+ cost: costFor(
37
+ `${envelope.provider}/${envelope.model}`,
38
+ { inputTokens, outputTokens, thinkingTokens: 0 }
39
+ ),
40
+ timestamps: { start, first: first ?? end, end },
41
+ warning: WARNING_CANCELLED
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,58 @@
1
+ /**
2
+ * Curated catalog reader. Single source of truth for per-model
3
+ * metadata used by adapters: pricing, thinking effort levels,
4
+ * output token limits, etc. Lives in `~/.config/mohdel/curated.json`;
5
+ * each entry keyed by `<provider>/<model>` carries the model spec.
6
+ *
7
+ * Adapters and `_pricing.js` both consume this — no parallel cache,
8
+ * no parallel file read.
9
+ *
10
+ * @module session/adapters/_catalog
11
+ */
12
+
13
+ import envPaths from 'env-paths'
14
+
15
+ import { createLazyJsonFileCache } from './_lazy_json_cache.js'
16
+
17
+ // `{ suffix: null }` mirrors `src/lib/common.js::CONFIG_DIR` so the
18
+ // session subprocess reads the same `~/.config/mohdel/curated.json`
19
+ // the `mo` CLI writes. Without the override, env-paths appends
20
+ // `-nodejs` and the cache silently loads empty.
21
+ const cache = createLazyJsonFileCache(() => `${envPaths('mohdel', { suffix: null }).config}/curated.json`)
22
+
23
+ /**
24
+ * @param {string} path
25
+ * @returns {Record<string, any>}
26
+ */
27
+ export function loadCatalog (path) {
28
+ return cache.loadSync(path)
29
+ }
30
+
31
+ /**
32
+ * Eager async initialization from the default curated path. Called from
33
+ * `bin.js::main` before `drive()` so the first `getSpec` doesn't stall
34
+ * the event loop on a sync read mid-call. Idempotent; respects a
35
+ * prior `setCatalog` (tests).
36
+ */
37
+ export async function initCatalogFromDefault () {
38
+ await cache.initAsync()
39
+ }
40
+
41
+ /**
42
+ * Replace the active catalog. Test injection point and the extension
43
+ * seam for deployments that source spec from somewhere other than
44
+ * curated.json.
45
+ *
46
+ * @param {Record<string, any>} table
47
+ */
48
+ export function setCatalog (table) {
49
+ cache.set(table)
50
+ }
51
+
52
+ /**
53
+ * @param {string} model Fully-qualified `<provider>/<model>` key.
54
+ * @returns {any | undefined}
55
+ */
56
+ export function getSpec (model) {
57
+ return cache.get(model)
58
+ }
@@ -0,0 +1,439 @@
1
+ /**
2
+ * Shared chat.completions core — handles both streaming and
3
+ * non-streaming variants.
4
+ *
5
+ * Used by all providers that speak the classic OpenAI Chat
6
+ * Completions shape (Groq, Cerebras, DeepSeek, Mistral, OpenRouter,
7
+ * Fireworks). Each provider supplies an SDK client factory plus a
8
+ * small config block (tool-choice flavor, identifier field name,
9
+ * DSML fallback, reasoning field mapping, arg mutation hook); this
10
+ * module owns the request/response shape.
11
+ *
12
+ * Non-streaming mode emits a single synthetic `delta` for the
13
+ * visible message content followed by a terminal `done`. Streaming
14
+ * mode emits real per-chunk deltas.
15
+ *
16
+ * @module session/adapters/_chat_completions
17
+ */
18
+
19
+ import { getSpec } from './_catalog.js'
20
+ import { classifyProviderError } from './_errors.js'
21
+ import { costFor } from './_pricing.js'
22
+ import {
23
+ STATUS_COMPLETED,
24
+ STATUS_INCOMPLETE,
25
+ STATUS_TOOL_USE,
26
+ WARNING_INSUFFICIENT_OUTPUT_BUDGET
27
+ } from '#core/status.js'
28
+ import {
29
+ toCerebrasTools,
30
+ fromCerebrasToolCalls,
31
+ toToolChoice
32
+ } from './_tools.js'
33
+
34
+ // DSML parse regexes. These run against untrusted model output, so
35
+ // worst-case match time MUST stay linear in input length (no ReDoS).
36
+ // Rules to preserve that:
37
+ // - Lazy quantifiers (`*?`) only with bounded character classes
38
+ // (`[\s\S]`, `[^"]`, `[^<]`) and followed by a literal/anchor.
39
+ // - No nested alternations (e.g. `(a|b)*?`) inside a quantifier —
40
+ // that's what flips `*?` to exponential worst-case.
41
+ // - No backreferences.
42
+ // If you add a new pattern here, benchmark it against a 1 MiB
43
+ // adversarial input before merging.
44
+ const DSML_RE = /<\uFF5CDSML\uFF5Cfunction_calls>/
45
+ const DSML_BLOCK_RE = /<\uFF5CDSML\uFF5Cfunction_calls>[\s\S]*?<\/\uFF5CDSML\uFF5Cfunction_calls>/g
46
+ const DSML_INVOKE_RE = /<\uFF5CDSML\uFF5Cinvoke\s+name="([^"]+)">([\s\S]*?)<\/\uFF5CDSML\uFF5Cinvoke>/g
47
+ const DSML_PARAM_RE = /<\uFF5CDSML\uFF5Cparameter\s+name="([^"]+)"(?:\s+string="([^"]*)")?>([^<]*)<\/\uFF5CDSML\uFF5Cparameter>/g
48
+
49
+ /**
50
+ * @typedef {object} ChatCompletionsConfig
51
+ * @property {string} provider
52
+ * Registry key. Also used as tool-choice flavor unless
53
+ * `toolChoiceFlavor` is set.
54
+ * @property {'openai'|'mistral'|'cerebras'} [toolChoiceFlavor]
55
+ * @property {'user'|'safety_identifier'} [identifierField]
56
+ * Defaults to 'user'.
57
+ * @property {'reasoning_effort'|'cerebras_zai'} [reasoningField]
58
+ * How to wire outputEffort into the request. `reasoning_effort`
59
+ * sets `args.reasoning_effort = effort`. `cerebras_zai` flips
60
+ * `args.disable_reasoning = false` instead (zai-family only).
61
+ * @property {boolean} [parseDsml]
62
+ * Extract DeepSeek DSML function-call blocks from message content
63
+ * when native `tool_calls` is absent.
64
+ * @property {boolean} [stream]
65
+ * Use chat.completions streaming. Emits real delta events per SSE
66
+ * chunk; usage reported on the final chunk via
67
+ * `stream_options.include_usage`.
68
+ * @property {(envelope: any, args: any) => void} [mutateArgs]
69
+ * Last-mile hook to splice provider-specific fields into the
70
+ * request (e.g. OpenRouter routing prefs).
71
+ */
72
+
73
+ /**
74
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
75
+ * @param {any} client
76
+ * @param {ChatCompletionsConfig} config
77
+ * @param {{signal?: AbortSignal, log?: any, span?: any}} [deps]
78
+ * @returns {AsyncGenerator<import('#core/events.js').Event>}
79
+ */
80
+ export async function * runChatCompletions (envelope, client, config, deps = {}) {
81
+ const spec = getSpec(`${envelope.provider}/${envelope.model}`) || {}
82
+ const start = String(process.hrtime.bigint())
83
+
84
+ const args = buildRequest(envelope, spec, config)
85
+ if (config.mutateArgs) config.mutateArgs(envelope, args)
86
+
87
+ if (config.stream) {
88
+ yield * runStreaming(envelope, client, args, config, start, deps)
89
+ return
90
+ }
91
+
92
+ let response
93
+ try {
94
+ response = await client.chat.completions.create(args, { signal: deps.signal })
95
+ } catch (e) {
96
+ deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
97
+ yield { type: 'error', error: classifyProviderError(e) }
98
+ return
99
+ }
100
+
101
+ const first = String(process.hrtime.bigint())
102
+ const choice = Array.isArray(response?.choices) ? response.choices[0] : null
103
+ const message = choice?.message || {}
104
+ const usage = response?.usage || {}
105
+ const finishReason = choice?.finish_reason
106
+
107
+ let content = message.content || ''
108
+ let toolCalls = message.tool_calls
109
+
110
+ if (config.parseDsml && content && (!toolCalls || !toolCalls.length)) {
111
+ const dsml = parseDsmlToolCalls(content)
112
+ if (dsml) {
113
+ toolCalls = dsml
114
+ content = stripDsml(content)
115
+ }
116
+ }
117
+
118
+ if (content) {
119
+ yield { type: 'delta', delta: { type: 'message', delta: content } }
120
+ }
121
+
122
+ yield finalize({
123
+ envelope, content, toolCalls, usage, finishReason, start, first
124
+ })
125
+ }
126
+
127
+ /**
128
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
129
+ * @param {any} client
130
+ * @param {any} args
131
+ * @param {ChatCompletionsConfig} config
132
+ * @param {string} start
133
+ * @param {{signal?: AbortSignal}} deps
134
+ */
135
+ async function * runStreaming (envelope, client, args, config, start, deps) {
136
+ args.stream = true
137
+ args.stream_options = { include_usage: true }
138
+
139
+ // F53: accumulate via array + join to avoid per-delta V8 cons-string
140
+ // churn on long streams.
141
+ const contentParts = []
142
+ let first = null
143
+ let finishReason = null
144
+ let usage = {}
145
+ const toolCallAccum = {}
146
+ // Cross-reference: tc.id observed on the opener → slot index. Lets
147
+ // us correlate continuation chunks that carry only `id` (some
148
+ // OpenAI-compat providers omit `index` after the opener) back to
149
+ // their original slot. Without this, chunks defaulting to
150
+ // `index ?? 0` silently merge multiple calls into slot 0.
151
+ const idToIndex = {}
152
+
153
+ let stream
154
+ try {
155
+ stream = await client.chat.completions.create(args, { signal: deps.signal })
156
+ } catch (e) {
157
+ deps.log?.warn({ err: e }, `[mohdel:${config.provider}] request failed`)
158
+ yield { type: 'error', error: classifyProviderError(e) }
159
+ return
160
+ }
161
+
162
+ try {
163
+ for await (const chunk of stream) {
164
+ const choice = chunk.choices?.[0]
165
+ if (choice?.delta?.content) {
166
+ if (first === null) first = String(process.hrtime.bigint())
167
+ contentParts.push(choice.delta.content)
168
+ yield { type: 'delta', delta: { type: 'message', delta: choice.delta.content } }
169
+ }
170
+ if (choice?.delta?.tool_calls) {
171
+ for (const tc of choice.delta.tool_calls) {
172
+ // Resolve which slot this chunk belongs to:
173
+ // 1. explicit `tc.index` always wins (normal case);
174
+ // 2. missing `tc.index` but a `tc.id` we've seen before
175
+ // → use the cross-ref we recorded on the opener;
176
+ // 3. new `tc.id` with no index → allocate next slot;
177
+ // 4. neither id nor index → can't correlate; drop + warn.
178
+ let idx = tc.index
179
+ if (idx == null && tc.id != null && idToIndex[tc.id] != null) {
180
+ idx = idToIndex[tc.id]
181
+ }
182
+ if (idx == null && tc.id != null) {
183
+ idx = Object.keys(toolCallAccum).length
184
+ }
185
+ if (idx == null) {
186
+ deps.log?.warn(
187
+ { tc },
188
+ `[mohdel:${config.provider}] tool_call chunk with no id or index; skipped`
189
+ )
190
+ continue
191
+ }
192
+ const slot = toolCallAccum[idx] || (toolCallAccum[idx] = { id: '', name: '', arguments: '' })
193
+ if (tc.id) {
194
+ slot.id = tc.id
195
+ idToIndex[tc.id] = idx
196
+ }
197
+ if (tc.function?.name) slot.name += tc.function.name
198
+ if (tc.function?.arguments) {
199
+ slot.arguments += tc.function.arguments
200
+ if (first === null) first = String(process.hrtime.bigint())
201
+ yield {
202
+ type: 'delta',
203
+ delta: { type: 'function_call', delta: tc.function.arguments }
204
+ }
205
+ }
206
+ }
207
+ }
208
+ if (choice?.finish_reason) finishReason = choice.finish_reason
209
+ if (chunk.usage) usage = chunk.usage
210
+ }
211
+ } catch (e) {
212
+ deps.log?.warn({ err: e }, `[mohdel:${config.provider}] stream failed`)
213
+ yield { type: 'error', error: classifyProviderError(e) }
214
+ return
215
+ }
216
+
217
+ const collectedToolCalls = Object.values(toolCallAccum).map(tc => ({
218
+ id: tc.id,
219
+ function: { name: tc.name, arguments: tc.arguments }
220
+ }))
221
+
222
+ yield finalize({
223
+ envelope,
224
+ content: contentParts.join(''),
225
+ toolCalls: collectedToolCalls.length ? collectedToolCalls : null,
226
+ usage,
227
+ finishReason,
228
+ start,
229
+ first
230
+ })
231
+ }
232
+
233
+ /**
234
+ * @param {{
235
+ * envelope: any,
236
+ * content: string,
237
+ * toolCalls: any[] | null,
238
+ * usage: any,
239
+ * finishReason: string | null,
240
+ * start: string,
241
+ * first: string | null
242
+ * }} p
243
+ * @returns {import('#core/events.js').DoneEvent}
244
+ */
245
+ function finalize ({ envelope, content, toolCalls, usage, finishReason, start, first }) {
246
+ const end = String(process.hrtime.bigint())
247
+ const inputTokens = usage.prompt_tokens || 0
248
+ const totalOutputTokens = usage.completion_tokens || 0
249
+ const thinkingTokens = usage.completion_tokens_details?.reasoning_tokens || 0
250
+ const visibleOutputTokens = Math.max(0, totalOutputTokens - thinkingTokens)
251
+
252
+ const truncated = finishReason === 'length'
253
+ let status = truncated ? STATUS_INCOMPLETE : STATUS_COMPLETED
254
+ if (toolCalls && toolCalls.length > 0) status = STATUS_TOOL_USE
255
+
256
+ /** @type {import('#core/events.js').DoneEvent} */
257
+ const done = {
258
+ type: 'done',
259
+ result: {
260
+ status,
261
+ output: content || null,
262
+ inputTokens,
263
+ outputTokens: visibleOutputTokens,
264
+ thinkingTokens,
265
+ cost: costFor(
266
+ `${envelope.provider}/${envelope.model}`,
267
+ { inputTokens, outputTokens: visibleOutputTokens, thinkingTokens }
268
+ ),
269
+ timestamps: { start, first: first ?? end, end }
270
+ }
271
+ }
272
+ if (truncated) done.result.warning = WARNING_INSUFFICIENT_OUTPUT_BUDGET
273
+ if (toolCalls && toolCalls.length > 0) {
274
+ done.result.toolCalls = fromCerebrasToolCalls(toolCalls)
275
+ }
276
+ return done
277
+ }
278
+
279
+ /**
280
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
281
+ * @param {any} spec
282
+ * @param {ChatCompletionsConfig} config
283
+ */
284
+ function buildRequest (envelope, spec, config) {
285
+ /** @type {Record<string, any>} */
286
+ const args = {
287
+ model: envelope.model,
288
+ temperature: 0,
289
+ messages: toChatMessages(envelope.prompt)
290
+ }
291
+
292
+ if (envelope.outputBudget !== undefined) {
293
+ args.max_tokens = envelope.outputBudget
294
+ }
295
+
296
+ if (envelope.images?.length) {
297
+ injectImages(args, envelope.images)
298
+ }
299
+
300
+ if (envelope.tools?.length) {
301
+ args.tools = toCerebrasTools(envelope.tools)
302
+ if (envelope.toolChoice) {
303
+ args.tool_choice = toToolChoice(config.toolChoiceFlavor || 'openai', envelope.toolChoice)
304
+ }
305
+ if (envelope.parallelToolCalls === false) {
306
+ args.parallel_tool_calls = false
307
+ }
308
+ }
309
+
310
+ if (envelope.outputType === 'json') {
311
+ args.response_format = { type: 'json_object' }
312
+ }
313
+
314
+ if (spec.thinkingEffortLevels) {
315
+ const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
316
+ if (effort && effort !== 'none') {
317
+ const headroom = spec.thinkingEffortLevels[effort]
318
+ if (args.max_tokens && typeof headroom === 'number') {
319
+ args.max_tokens += headroom
320
+ }
321
+ delete args.temperature
322
+ if (config.reasoningField === 'cerebras_zai' && /zai/i.test(envelope.model)) {
323
+ args.disable_reasoning = false
324
+ } else {
325
+ args.reasoning_effort = effort
326
+ }
327
+ }
328
+ }
329
+
330
+ if (envelope.identifier) {
331
+ args[config.identifierField || 'user'] = envelope.identifier
332
+ }
333
+
334
+ return args
335
+ }
336
+
337
+ /**
338
+ * @param {string | import('#core/envelope.js').Message[]} prompt
339
+ * @returns {Array<any>}
340
+ */
341
+ function toChatMessages (prompt) {
342
+ if (typeof prompt === 'string') return [{ role: 'user', content: prompt }]
343
+ return prompt.map(m => {
344
+ if (m.role === 'tool') {
345
+ return {
346
+ role: 'tool',
347
+ tool_call_id: m.toolCallId,
348
+ content: flattenText(m.content)
349
+ }
350
+ }
351
+ if (m.role === 'assistant' && m.toolCalls?.length) {
352
+ // Chat Completions assistant turn: optional `content` + the
353
+ // `tool_calls` array. `arguments` must be a JSON string on
354
+ // the wire.
355
+ return {
356
+ role: 'assistant',
357
+ content: flattenText(m.content) || '',
358
+ tool_calls: m.toolCalls.map(tc => ({
359
+ id: tc.id,
360
+ type: 'function',
361
+ function: {
362
+ name: tc.name,
363
+ arguments: stringifyToolArgs(tc.arguments)
364
+ }
365
+ }))
366
+ }
367
+ }
368
+ return { role: m.role, content: flattenText(m.content) }
369
+ })
370
+ }
371
+
372
+ /** @param {unknown} args */
373
+ function stringifyToolArgs (args) {
374
+ if (typeof args === 'string' && args) return args
375
+ try { return JSON.stringify(args ?? {}) } catch { return '{}' }
376
+ }
377
+
378
+ /** @param {string | import('#core/envelope.js').MessagePart[]} content */
379
+ function flattenText (content) {
380
+ if (typeof content === 'string') return content
381
+ if (!Array.isArray(content)) return ''
382
+ return content.filter(p => p.type === 'text' && p.text).map(p => p.text).join('\n')
383
+ }
384
+
385
+ /**
386
+ * @param {any} args
387
+ * @param {import('#core/envelope.js').MediaRef[]} images
388
+ */
389
+ function injectImages (args, images) {
390
+ const blocks = images
391
+ .filter(i => i?.fileUri && i?.mimeType)
392
+ .map(i => ({
393
+ type: 'image_url',
394
+ image_url: { url: i.fileUri, detail: 'high' }
395
+ }))
396
+ if (!blocks.length) return
397
+
398
+ // Append to the last user message; fallback to creating one.
399
+ for (let i = args.messages.length - 1; i >= 0; i--) {
400
+ const m = args.messages[i]
401
+ if (m.role !== 'user') continue
402
+ if (typeof m.content === 'string') {
403
+ m.content = [{ type: 'text', text: m.content }, ...blocks]
404
+ } else if (Array.isArray(m.content)) {
405
+ m.content = [...m.content, ...blocks]
406
+ }
407
+ return
408
+ }
409
+ args.messages.push({ role: 'user', content: blocks })
410
+ }
411
+
412
+ /** @param {string} text */
413
+ function parseDsmlToolCalls (text) {
414
+ if (!text || !DSML_RE.test(text)) return null
415
+ const calls = []
416
+ let m
417
+ while ((m = DSML_INVOKE_RE.exec(text)) !== null) {
418
+ const args = {}
419
+ let p
420
+ while ((p = DSML_PARAM_RE.exec(m[2])) !== null) {
421
+ const raw = p[3].trim()
422
+ args[p[1]] = p[2] === 'false' && raw !== '' && !isNaN(Number(raw))
423
+ ? Number(raw)
424
+ : raw
425
+ }
426
+ calls.push({
427
+ id: `dsml_${Date.now()}_${calls.length}`,
428
+ type: 'function',
429
+ function: { name: m[1], arguments: JSON.stringify(args) }
430
+ })
431
+ }
432
+ return calls.length > 0 ? calls : null
433
+ }
434
+
435
+ /** @param {string} text */
436
+ function stripDsml (text) {
437
+ if (!text) return text
438
+ return text.replace(DSML_BLOCK_RE, '').trim() || ''
439
+ }
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Shared error classification for provider adapters.
3
+ *
4
+ * Maps SDK errors (by HTTP status) to a canonical `TypedError` with
5
+ * stable `type` tags (AUTH_INVALID, RATE_LIMIT, PROVIDER_COOLDOWN,
6
+ * PROVIDER_UNAVAILABLE, …). 401/403 messages stay generic to avoid
7
+ * echoing provider bodies that may contain the API key back on the
8
+ * wire; for other statuses the provider's own detail is preserved
9
+ * on `.detail` so callers can debug 400s (schema rejects, etc).
10
+ *
11
+ * @module session/adapters/_errors
12
+ */
13
+
14
+ const DETAIL_CAP = 500
15
+
16
+ /**
17
+ * Extract a short human-readable detail from an SDK error. Trimmed
18
+ * to `DETAIL_CAP` chars so a verbose provider body doesn't blow up
19
+ * log pipelines.
20
+ * @param {any} err
21
+ * @returns {string | undefined}
22
+ */
23
+ function extractDetail (err) {
24
+ if (!err) return undefined
25
+ // OpenAI SDK: err.error.message; Google SDK: err.message is the full
26
+ // body sometimes. Prefer the structured field when present.
27
+ const nested = err.error?.message || err.response?.data?.error?.message
28
+ const raw = nested || err.message
29
+ if (!raw) return undefined
30
+ const str = typeof raw === 'string' ? raw : JSON.stringify(raw)
31
+ return str.length > DETAIL_CAP ? str.slice(0, DETAIL_CAP) + '…' : str
32
+ }
33
+
34
+ /**
35
+ * @param {unknown} e
36
+ * @returns {import('#core/errors.js').TypedError}
37
+ */
38
+ export function classifyProviderError (e) {
39
+ const err = /** @type {any} */(e)
40
+ const status = err?.status
41
+
42
+ if (status === 401 || status === 403) {
43
+ // Deliberately no detail — 401/403 bodies can echo the key.
44
+ return {
45
+ message: 'authentication failed',
46
+ severity: 'error',
47
+ retryable: false,
48
+ type: 'AUTH_INVALID'
49
+ }
50
+ }
51
+ if (status === 429) {
52
+ return {
53
+ message: 'rate limit exceeded',
54
+ severity: 'warn',
55
+ retryable: true,
56
+ type: 'RATE_LIMIT',
57
+ detail: extractDetail(err)
58
+ }
59
+ }
60
+ if (typeof status === 'number' && status >= 500) {
61
+ return {
62
+ message: `provider error ${status}`,
63
+ severity: 'warn',
64
+ retryable: true,
65
+ type: 'PROVIDER_UNAVAILABLE',
66
+ detail: extractDetail(err)
67
+ }
68
+ }
69
+ if (typeof status === 'number' && status >= 400) {
70
+ return {
71
+ message: `provider error ${status}`,
72
+ severity: 'error',
73
+ retryable: false,
74
+ type: 'PROVIDER_ERROR',
75
+ detail: extractDetail(err)
76
+ }
77
+ }
78
+ return {
79
+ message: err?.message ? String(err.message).slice(0, 200) : 'network error',
80
+ severity: 'warn',
81
+ retryable: true,
82
+ type: 'NET_ERROR',
83
+ detail: extractDetail(err)
84
+ }
85
+ }