mohdel 0.90.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +377 -0
  3. package/config/benchmarks.json +39 -0
  4. package/js/client/call.js +75 -0
  5. package/js/client/call_image.js +82 -0
  6. package/js/client/gate-binary.js +72 -0
  7. package/js/client/index.js +16 -0
  8. package/js/client/ndjson.js +29 -0
  9. package/js/client/transport.js +48 -0
  10. package/js/core/envelope.js +141 -0
  11. package/js/core/errors.js +75 -0
  12. package/js/core/events.js +96 -0
  13. package/js/core/image.js +58 -0
  14. package/js/core/index.js +10 -0
  15. package/js/core/status.js +48 -0
  16. package/js/factory/bridge.js +372 -0
  17. package/js/session/_cooldown.js +114 -0
  18. package/js/session/_logger.js +138 -0
  19. package/js/session/_rate_limiter.js +77 -0
  20. package/js/session/_tracing.js +58 -0
  21. package/js/session/adapters/_cancelled.js +44 -0
  22. package/js/session/adapters/_catalog.js +58 -0
  23. package/js/session/adapters/_chat_completions.js +439 -0
  24. package/js/session/adapters/_errors.js +85 -0
  25. package/js/session/adapters/_images.js +60 -0
  26. package/js/session/adapters/_lazy_json_cache.js +76 -0
  27. package/js/session/adapters/_pricing.js +67 -0
  28. package/js/session/adapters/_providers.js +60 -0
  29. package/js/session/adapters/_tools.js +185 -0
  30. package/js/session/adapters/_videos.js +283 -0
  31. package/js/session/adapters/anthropic.js +397 -0
  32. package/js/session/adapters/cerebras.js +28 -0
  33. package/js/session/adapters/deepseek.js +32 -0
  34. package/js/session/adapters/echo.js +51 -0
  35. package/js/session/adapters/fake.js +262 -0
  36. package/js/session/adapters/fireworks.js +46 -0
  37. package/js/session/adapters/gemini.js +381 -0
  38. package/js/session/adapters/groq.js +23 -0
  39. package/js/session/adapters/image/fake.js +55 -0
  40. package/js/session/adapters/image/index.js +40 -0
  41. package/js/session/adapters/image/novita.js +135 -0
  42. package/js/session/adapters/image/openai.js +50 -0
  43. package/js/session/adapters/index.js +53 -0
  44. package/js/session/adapters/mistral.js +31 -0
  45. package/js/session/adapters/novita.js +29 -0
  46. package/js/session/adapters/openai.js +381 -0
  47. package/js/session/adapters/openrouter.js +66 -0
  48. package/js/session/adapters/xai.js +27 -0
  49. package/js/session/bin.js +54 -0
  50. package/js/session/driver.js +160 -0
  51. package/js/session/index.js +18 -0
  52. package/js/session/run.js +393 -0
  53. package/js/session/run_image.js +61 -0
  54. package/package.json +107 -0
  55. package/src/cli/ask.js +160 -0
  56. package/src/cli/backup.js +107 -0
  57. package/src/cli/bench.js +262 -0
  58. package/src/cli/check.js +123 -0
  59. package/src/cli/colored-logger.js +67 -0
  60. package/src/cli/colors.js +13 -0
  61. package/src/cli/default.js +39 -0
  62. package/src/cli/index.js +150 -0
  63. package/src/cli/json-output.js +60 -0
  64. package/src/cli/model.js +571 -0
  65. package/src/cli/onboard.js +232 -0
  66. package/src/cli/rank.js +176 -0
  67. package/src/cli/ratelimit.js +160 -0
  68. package/src/cli/tag.js +105 -0
  69. package/src/lib/assets/alibaba.svg +1 -0
  70. package/src/lib/assets/anthropic.svg +5 -0
  71. package/src/lib/assets/deepseek.svg +1 -0
  72. package/src/lib/assets/gemini.svg +1 -0
  73. package/src/lib/assets/google.svg +2 -0
  74. package/src/lib/assets/kwaipilot.svg +1 -0
  75. package/src/lib/assets/meta.svg +1 -0
  76. package/src/lib/assets/minimax.svg +9 -0
  77. package/src/lib/assets/moonshotai.svg +4 -0
  78. package/src/lib/assets/openai.svg +5 -0
  79. package/src/lib/assets/xai.svg +1 -0
  80. package/src/lib/assets/xiaomi.svg +2 -0
  81. package/src/lib/assets/zai.svg +219 -0
  82. package/src/lib/benchmark-score.js +215 -0
  83. package/src/lib/benchmark-truth.js +68 -0
  84. package/src/lib/cache.js +76 -0
  85. package/src/lib/common.js +208 -0
  86. package/src/lib/cooldown.js +63 -0
  87. package/src/lib/creators.js +71 -0
  88. package/src/lib/curated-cache.js +146 -0
  89. package/src/lib/errors.js +126 -0
  90. package/src/lib/index.js +726 -0
  91. package/src/lib/logger.js +29 -0
  92. package/src/lib/providers.js +87 -0
  93. package/src/lib/rank.js +390 -0
  94. package/src/lib/rate-limiter.js +50 -0
  95. package/src/lib/schema.js +150 -0
  96. package/src/lib/select.js +474 -0
  97. package/src/lib/tracing.js +62 -0
  98. package/src/lib/utils.js +85 -0
@@ -0,0 +1,283 @@
1
+ /**
2
+ * Video upload + inline handling shared across adapters that
3
+ * currently support video (Gemini today; more could follow the same
4
+ * shape).
5
+ *
6
+ * Three code paths per envelope video ref:
7
+ * 1. `file://` / local path, ≤20MB, no cache flag → read + base64
8
+ * inline as `inlineData`.
9
+ * 2. `file://` / local path, >20MB or `cache: true` → upload via
10
+ * the provider SDK (Gemini `ai.files.upload`), poll until the
11
+ * file is ACTIVE, return a `fileData` part. Content-hash +
12
+ * mtime-keyed cache at `~/.cache/mohdel/uploaded-files.json`
13
+ * short-circuits repeat uploads.
14
+ * 3. `https://` → passthrough as `fileData.fileUri` (Gemini fetches
15
+ * it directly).
16
+ *
17
+ * @module session/adapters/_videos
18
+ */
19
+
20
+ import fs from 'node:fs/promises'
21
+ import { existsSync } from 'node:fs'
22
+ import { createHash } from 'node:crypto'
23
+ import { join } from 'node:path'
24
+
25
+ import envPaths from 'env-paths'
26
+
27
+ const CACHE_DIR = envPaths('mohdel', { suffix: null }).cache
28
+ const CACHE_PATH = join(CACHE_DIR, 'uploaded-files.json')
29
+
30
+ const INLINE_MAX_BYTES = 20 * 1024 * 1024
31
+ const VIDEO_UPLOAD_POLL_INTERVAL_MS = 5_000
32
+ /** Hard deadline on the PROCESSING → ACTIVE wait. Videos occasionally
33
+ * take a while; 5 min is generous enough that a stuck file ≠ slow
34
+ * file, but short enough that a pool slot doesn't hang forever. */
35
+ const MAX_UPLOAD_POLL_MS = 300_000
36
+
37
+ /**
38
+ * @typedef {object} UploadedFileRecord
39
+ * @property {string} hash
40
+ * @property {{uri: string, name: string, mimeType?: string, state?: string}} data
41
+ * @property {string} filePath
42
+ * @property {string} provider
43
+ * @property {string} cachedAt
44
+ */
45
+
46
+ // ---------- cache ----------
47
+
48
+ async function ensureCacheDir () {
49
+ if (!existsSync(CACHE_DIR)) {
50
+ await fs.mkdir(CACHE_DIR, { recursive: true })
51
+ }
52
+ }
53
+
54
+ /**
55
+ * Content-hash keyed by `sha256(bytes + filePath + mtime)` so an
56
+ * edited file forces re-upload even if the path stays the same.
57
+ *
58
+ * @param {string} filePath
59
+ * @returns {Promise<string>}
60
+ */
61
+ async function hashFile (filePath) {
62
+ const [buf, st] = await Promise.all([
63
+ fs.readFile(filePath),
64
+ fs.stat(filePath)
65
+ ])
66
+ const h = createHash('sha256')
67
+ h.update(buf)
68
+ h.update(filePath)
69
+ h.update(st.mtime.toISOString())
70
+ return h.digest('hex')
71
+ }
72
+
73
+ async function loadCache () {
74
+ try {
75
+ if (!existsSync(CACHE_PATH)) return {}
76
+ const text = await fs.readFile(CACHE_PATH, 'utf8')
77
+ return JSON.parse(text)
78
+ } catch {
79
+ return {}
80
+ }
81
+ }
82
+
83
+ async function saveCache (cache) {
84
+ try {
85
+ await ensureCacheDir()
86
+ await fs.writeFile(CACHE_PATH, JSON.stringify(cache, null, 2))
87
+ } catch {
88
+ // cache write failures shouldn't bring down a call
89
+ }
90
+ }
91
+
92
+ /**
93
+ * @param {string} filePath
94
+ * @param {string} provider
95
+ * @returns {Promise<UploadedFileRecord | undefined>}
96
+ */
97
+ export async function getCachedFile (filePath, provider = 'gemini') {
98
+ try {
99
+ const hash = await hashFile(filePath)
100
+ const cache = await loadCache()
101
+ return cache[`${provider}:${hash}`]
102
+ } catch {
103
+ return undefined
104
+ }
105
+ }
106
+
107
+ /**
108
+ * @param {string} filePath
109
+ * @param {object} data
110
+ * @param {string} provider
111
+ */
112
+ export async function setCachedFile (filePath, data, provider = 'gemini') {
113
+ try {
114
+ const hash = await hashFile(filePath)
115
+ const cache = await loadCache()
116
+ cache[`${provider}:${hash}`] = {
117
+ hash,
118
+ data,
119
+ filePath,
120
+ provider,
121
+ cachedAt: new Date().toISOString()
122
+ }
123
+ await saveCache(cache)
124
+ } catch {
125
+ // best effort
126
+ }
127
+ }
128
+
129
+ // ---------- loader ----------
130
+
131
+ /**
132
+ * @typedef {object} VideoPart
133
+ * @property {{data: string, mimeType: string}} [inlineData]
134
+ * @property {{fileUri: string, mimeType: string}} [fileData]
135
+ */
136
+
137
+ /**
138
+ * @param {import('#core/envelope.js').MediaRef[]} videos
139
+ * @param {{
140
+ * client: {files: {upload: (args: any) => Promise<any>, get: (args: {name: string}) => Promise<any>}},
141
+ * useCache?: boolean,
142
+ * sleep?: (ms: number) => Promise<void>,
143
+ * now?: () => number,
144
+ * readFile?: (path: string) => Promise<Buffer>,
145
+ * stat?: (path: string) => Promise<{size: number}>,
146
+ * signal?: AbortSignal,
147
+ * provider?: string
148
+ * }} deps
149
+ * @returns {Promise<VideoPart[]>}
150
+ */
151
+ export async function loadVideos (videos, deps) {
152
+ const out = []
153
+ if (!videos || !Array.isArray(videos)) return out
154
+ const ctx = {
155
+ client: deps.client,
156
+ useCache: !!deps.useCache,
157
+ sleep: deps.sleep ?? defaultSleep,
158
+ now: deps.now ?? Date.now,
159
+ readFileFn: deps.readFile ?? fs.readFile,
160
+ statFn: deps.stat ?? fs.stat,
161
+ signal: deps.signal,
162
+ provider: deps.provider ?? 'gemini'
163
+ }
164
+
165
+ for (const v of videos) {
166
+ if (!v?.fileUri || !v?.mimeType) continue
167
+ throwIfAborted(ctx.signal)
168
+ const part = await toPart(v, ctx)
169
+ if (part) out.push(part)
170
+ }
171
+ return out
172
+ }
173
+
174
+ async function toPart (ref, ctx) {
175
+ const { fileUri, mimeType } = ref
176
+
177
+ // https:// → Gemini fetches it directly
178
+ if (/^https?:\/\//i.test(fileUri)) {
179
+ return { fileData: { fileUri, mimeType } }
180
+ }
181
+
182
+ // data: URI → inline the base64 payload
183
+ if (fileUri.startsWith('data:')) {
184
+ const comma = fileUri.indexOf(',')
185
+ if (comma < 0) return null
186
+ return { inlineData: { data: fileUri.slice(comma + 1), mimeType } }
187
+ }
188
+
189
+ // file:// or local path
190
+ const filePath = fileUri.replace(/^file:\/\//, '')
191
+ let stats
192
+ try {
193
+ stats = await ctx.statFn(filePath)
194
+ } catch {
195
+ return null
196
+ }
197
+
198
+ if (stats.size > INLINE_MAX_BYTES || ctx.useCache) {
199
+ const uri = await uploadFile(filePath, mimeType, ctx)
200
+ return { fileData: { fileUri: uri, mimeType } }
201
+ }
202
+
203
+ const buf = await ctx.readFileFn(filePath)
204
+ return { inlineData: { data: buf.toString('base64'), mimeType } }
205
+ }
206
+
207
+ /**
208
+ * Upload + poll until active. Honors the on-disk cache so repeat
209
+ * calls (same bytes + mtime) skip the network round trip. A stuck
210
+ * PROCESSING file is bounded by `MAX_UPLOAD_POLL_MS`; an aborted
211
+ * signal breaks out immediately.
212
+ */
213
+ async function uploadFile (filePath, mimeType, ctx) {
214
+ const cached = await getCachedFile(filePath, ctx.provider)
215
+ if (cached?.data?.uri) return cached.data.uri
216
+ throwIfAborted(ctx.signal)
217
+
218
+ let file = await ctx.client.files.upload({
219
+ file: filePath,
220
+ config: { mimeType }
221
+ })
222
+
223
+ const deadline = ctx.now() + MAX_UPLOAD_POLL_MS
224
+
225
+ while (file?.state === 'PROCESSING') {
226
+ if (ctx.now() >= deadline) {
227
+ throw typedError(
228
+ `gemini file upload did not become ACTIVE within ${MAX_UPLOAD_POLL_MS / 1000}s`,
229
+ 'PROVIDER_UNAVAILABLE',
230
+ true
231
+ )
232
+ }
233
+ throwIfAborted(ctx.signal)
234
+ await ctx.sleep(VIDEO_UPLOAD_POLL_INTERVAL_MS)
235
+ throwIfAborted(ctx.signal)
236
+ file = await ctx.client.files.get({ name: file.name })
237
+ }
238
+ if (file?.state === 'FAILED') {
239
+ throw new Error('gemini file processing failed')
240
+ }
241
+ if (!file?.uri) {
242
+ throw new Error('gemini upload returned no uri')
243
+ }
244
+
245
+ await setCachedFile(filePath, file, ctx.provider)
246
+ return file.uri
247
+ }
248
+
249
+ /**
250
+ * Raise an `AbortError` when `signal` is aborted. The gemini
251
+ * adapter's video-load catch block already converts this shape to
252
+ * the standard cancelled terminal via the outer `signal?.aborted`
253
+ * check in `run.js`.
254
+ *
255
+ * @param {AbortSignal | undefined} signal
256
+ */
257
+ function throwIfAborted (signal) {
258
+ if (signal?.aborted) {
259
+ const err = new Error('aborted')
260
+ err.name = 'AbortError'
261
+ throw err
262
+ }
263
+ }
264
+
265
+ /**
266
+ * @param {string} message
267
+ * @param {string} type
268
+ * @param {boolean} retryable
269
+ */
270
+ function typedError (message, type, retryable) {
271
+ const err = new Error(message)
272
+ err.typed = {
273
+ message,
274
+ severity: retryable ? 'warn' : 'error',
275
+ retryable,
276
+ type
277
+ }
278
+ return err
279
+ }
280
+
281
+ function defaultSleep (ms) {
282
+ return new Promise((resolve) => setTimeout(resolve, ms))
283
+ }
@@ -0,0 +1,397 @@
1
+ /**
2
+ * Anthropic Messages API adapter.
3
+ *
4
+ * Scope:
5
+ * - Text in, text out, streaming
6
+ * - Status contract (incomplete + warning on max_tokens)
7
+ * - Tools: unified format → anthropic input_schema; streaming
8
+ * function_call deltas; tool_use terminal state; tool_result
9
+ * messages on the way back in
10
+ * - AbortSignal forwarded to SDK
11
+ *
12
+ * Deferred: vision, thinking/reasoning control (outputEffort).
13
+ *
14
+ * @module session/adapters/anthropic
15
+ */
16
+
17
+ import Anthropic from '@anthropic-ai/sdk'
18
+
19
+ import {
20
+ STATUS_COMPLETED,
21
+ STATUS_INCOMPLETE,
22
+ STATUS_TOOL_USE,
23
+ WARNING_INSUFFICIENT_OUTPUT_BUDGET
24
+ } from '#core/status.js'
25
+
26
+ import { cancelledDone } from './_cancelled.js'
27
+ import { getSpec } from './_catalog.js'
28
+ import { classifyProviderError } from './_errors.js'
29
+ import { loadImages } from './_images.js'
30
+ import { costFor } from './_pricing.js'
31
+ import {
32
+ toAnthropicTools,
33
+ fromAnthropicToolCalls,
34
+ toToolChoice
35
+ } from './_tools.js'
36
+
37
+ /**
38
+ * Approximate chars-per-token used to estimate Anthropic thinking
39
+ * tokens (the API doesn't report them separately in `usage`).
40
+ *
41
+ * ## Known limitations (cost accuracy)
42
+ *
43
+ * The estimate structurally under-counts in three ways:
44
+ * 1. **Signatures.** Thinking blocks have shape
45
+ * `{type: 'thinking', thinking: '<text>', signature: '<hash>'}`.
46
+ * Signatures consume output tokens but aren't streamed as
47
+ * `thinking_delta` — we never see them.
48
+ * 2. **Redacted thinking.** When Anthropic returns
49
+ * `{type: 'redacted_thinking', data: '<encrypted>'}` in place
50
+ * of a plain thinking block, zero `thinking_delta` events are
51
+ * emitted even though the block still consumes output tokens.
52
+ * 3. **BPE variance.** 4 chars/token is an English-text average;
53
+ * dense reasoning prose can compress differently.
54
+ *
55
+ * **Cost impact:** provably zero when `thinkingPrice == outputPrice`
56
+ * (true for every Anthropic entry in the curated catalog today) —
57
+ * the heuristic error cancels in `cost = i*ip + o*op + t*tp` because
58
+ * `o*op + t*op = (o+t)*op = totalOutput*op`. If a catalog maintainer
59
+ * ever sets asymmetric Anthropic pricing, cost drifts by
60
+ * `estimate_error × (thinkingPrice − outputPrice)` — that's a
61
+ * catalog-editor awareness item until Anthropic exposes a real
62
+ * `thinking_tokens` field in `usage`. (They'll almost certainly do
63
+ * that the day they introduce asymmetric pricing.)
64
+ */
65
+ const ANTHROPIC_THINKING_CHARS_PER_TOKEN = 4
66
+
67
+ /**
68
+ * Fallback `max_tokens` when the caller supplied no `outputBudget`
69
+ * and the model spec has no `outputTokenLimit`. Anthropic's
70
+ * `max_tokens` is required on every request; 4096 matches the
71
+ * smallest Claude output ceiling and keeps calls cheap on unknown
72
+ * models. Tune via `spec.outputTokenLimit` or `envelope.outputBudget`.
73
+ */
74
+ const ANTHROPIC_DEFAULT_MAX_TOKENS = 4096
75
+
76
+ /**
77
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
78
+ * @param {{client?: Anthropic, signal?: AbortSignal, log?: any, span?: any}} [deps]
79
+ * @returns {AsyncGenerator<import('#core/events.js').Event>}
80
+ */
81
+ export async function * anthropic (envelope, deps = {}) {
82
+ const client = deps.client ?? new Anthropic({ apiKey: envelope.auth.key })
83
+ const signal = deps.signal
84
+ const log = deps.log
85
+ const start = String(process.hrtime.bigint())
86
+ let first = null
87
+
88
+ const { system, conversation } = splitPrompt(envelope.prompt)
89
+
90
+ // Attach images to the last user message before building the request.
91
+ if (envelope.images?.length) {
92
+ try {
93
+ const loaded = await loadImages(envelope.images)
94
+ const blocks = loaded.map(toAnthropicImageBlock).filter(Boolean)
95
+ if (blocks.length) injectImageBlocks(conversation, blocks)
96
+ } catch (e) {
97
+ log?.warn({ err: e }, '[mohdel:anthropic] image load failed')
98
+ yield { type: 'error', error: classifyProviderError(e) }
99
+ return
100
+ }
101
+ }
102
+
103
+ const request = buildRequest(envelope, conversation, system)
104
+
105
+ // F53: accumulate via array + join to avoid per-delta V8 cons-string
106
+ // churn. Materialized at each exit point.
107
+ const outputParts = []
108
+ const currentOutput = () => outputParts.join('')
109
+ let inputTokens = 0
110
+ let outputTokens = 0
111
+ let thinkingChars = 0
112
+ let status = STATUS_COMPLETED
113
+ /** @type {string | undefined} */
114
+ let warning
115
+
116
+ // Tool-use accumulation state
117
+ /** @type {Map<number, {id: string, name: string, inputJson: string}>} */
118
+ const toolBlocks = new Map()
119
+
120
+ try {
121
+ const stream = await client.messages.stream(request, { signal })
122
+
123
+ for await (const event of stream) {
124
+ if (signal?.aborted) {
125
+ yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
126
+ return
127
+ }
128
+ switch (event.type) {
129
+ case 'message_start':
130
+ if (event.message?.usage?.input_tokens) {
131
+ inputTokens = event.message.usage.input_tokens
132
+ }
133
+ break
134
+
135
+ case 'content_block_start':
136
+ if (event.content_block?.type === 'tool_use') {
137
+ toolBlocks.set(event.index, {
138
+ id: event.content_block.id,
139
+ name: event.content_block.name,
140
+ inputJson: ''
141
+ })
142
+ }
143
+ break
144
+
145
+ case 'content_block_delta':
146
+ if (event.delta?.type === 'text_delta' && event.delta.text) {
147
+ if (first === null) first = String(process.hrtime.bigint())
148
+ outputParts.push(event.delta.text)
149
+ yield { type: 'delta', delta: { type: 'message', delta: event.delta.text } }
150
+ } else if (event.delta?.type === 'thinking_delta') {
151
+ // Thinking content is not surfaced via `delta` events —
152
+ // only `message` and `function_call` deltas stream to
153
+ // consumers. Accumulate char count to estimate
154
+ // thinking_tokens at the end.
155
+ thinkingChars += (event.delta.thinking || '').length
156
+ } else if (event.delta?.type === 'input_json_delta') {
157
+ const block = toolBlocks.get(event.index)
158
+ if (block) {
159
+ block.inputJson += event.delta.partial_json ?? ''
160
+ if (first === null) first = String(process.hrtime.bigint())
161
+ yield {
162
+ type: 'delta',
163
+ delta: { type: 'function_call', delta: event.delta.partial_json ?? '' }
164
+ }
165
+ }
166
+ }
167
+ break
168
+
169
+ case 'message_delta':
170
+ if (event.delta?.stop_reason === 'max_tokens') {
171
+ status = STATUS_INCOMPLETE
172
+ warning = WARNING_INSUFFICIENT_OUTPUT_BUDGET
173
+ } else if (event.delta?.stop_reason === 'tool_use') {
174
+ status = STATUS_TOOL_USE
175
+ }
176
+ if (event.usage?.output_tokens) {
177
+ outputTokens = event.usage.output_tokens
178
+ }
179
+ break
180
+
181
+ default:
182
+ break
183
+ }
184
+ }
185
+ } catch (e) {
186
+ if (signal?.aborted) {
187
+ yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
188
+ return
189
+ }
190
+ log?.warn({ err: e }, '[mohdel:anthropic] stream failed')
191
+ yield { type: 'error', error: classifyProviderError(e) }
192
+ return
193
+ }
194
+
195
+ if (signal?.aborted) {
196
+ yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
197
+ return
198
+ }
199
+
200
+ const end = String(process.hrtime.bigint())
201
+ // Estimate thinking tokens from streamed thinking_delta char count
202
+ // (Anthropic API doesn't report them separately). Cap at total
203
+ // output tokens reported by usage.
204
+ const estimatedThinkingTokens = thinkingChars > 0
205
+ ? Math.min(Math.ceil(thinkingChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN), outputTokens)
206
+ : 0
207
+ const messageOutputTokens = Math.max(0, outputTokens - estimatedThinkingTokens)
208
+
209
+ /** @type {import('#core/events.js').DoneEvent} */
210
+ const done = {
211
+ type: 'done',
212
+ result: {
213
+ status,
214
+ output: currentOutput() || null,
215
+ inputTokens,
216
+ outputTokens: messageOutputTokens,
217
+ thinkingTokens: estimatedThinkingTokens,
218
+ cost: costFor(
219
+ `${envelope.provider}/${envelope.model}`,
220
+ { inputTokens, outputTokens: messageOutputTokens, thinkingTokens: estimatedThinkingTokens }
221
+ ),
222
+ timestamps: { start, first: first ?? end, end }
223
+ }
224
+ }
225
+ if (warning) done.result.warning = warning
226
+ if (toolBlocks.size > 0) {
227
+ done.result.toolCalls = finalizeToolCalls(toolBlocks)
228
+ }
229
+ yield done
230
+ }
231
+
232
+ /**
233
+ * @param {Map<number, {id: string, name: string, inputJson: string}>} toolBlocks
234
+ */
235
+ function finalizeToolCalls (toolBlocks) {
236
+ const blocks = Array.from(toolBlocks.values()).map(b => ({
237
+ id: b.id,
238
+ name: b.name,
239
+ input: safeParseJson(b.inputJson)
240
+ }))
241
+ return fromAnthropicToolCalls(blocks)
242
+ }
243
+
244
+ /** @param {string} s */
245
+ function safeParseJson (s) {
246
+ if (!s) return {}
247
+ try { return JSON.parse(s) } catch { return s }
248
+ }
249
+
250
+ /**
251
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
252
+ * @param {Array<{role: string, content: any}>} conversation
253
+ * @param {string} system
254
+ */
255
+ function buildRequest (envelope, conversation, system) {
256
+ const spec = getSpec(`${envelope.provider}/${envelope.model}`)
257
+ const outputTokenLimit = spec?.outputTokenLimit
258
+
259
+ /** @type {Record<string, any>} */
260
+ const request = {
261
+ model: envelope.model,
262
+ max_tokens: envelope.outputBudget ?? outputTokenLimit ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
263
+ messages: conversation
264
+ }
265
+ if (system) request.system = system
266
+ if (envelope.tools?.length) {
267
+ request.tools = toAnthropicTools(envelope.tools)
268
+ }
269
+ if (envelope.toolChoice) {
270
+ const choice = toToolChoice('anthropic', envelope.toolChoice)
271
+ if (envelope.parallelToolCalls === false && choice) {
272
+ choice.disable_parallel_tool_use = true
273
+ }
274
+ request.tool_choice = choice
275
+ }
276
+
277
+ // Thinking — adaptive mode, with an optional effort hint when the
278
+ // spec defines `thinkingEffortLevels`. `outputEffort: 'none'`
279
+ // opts out of thinking entirely (`thinking.type: 'disabled'` —
280
+ // adaptive thinking must not be enabled when the caller has
281
+ // explicitly disabled it, otherwise the call silently clobbers
282
+ // `outputBudget` with the full model limit).
283
+ if (spec?.thinkingEffortLevels) {
284
+ const effort = envelope.outputEffort ?? spec.defaultThinkingEffort
285
+ if (effort && effort !== 'none') {
286
+ request.thinking = { type: 'adaptive' }
287
+ if (spec.thinkingEffortLevels[effort] != null) {
288
+ request.output_config = { effort }
289
+ }
290
+ // Thinking tokens share the output budget — give them the full
291
+ // model limit instead of just the requested outputBudget.
292
+ if (outputTokenLimit) request.max_tokens = outputTokenLimit
293
+ }
294
+ }
295
+
296
+ return request
297
+ }
298
+
299
+ /** @param {string | import('#core/envelope.js').Message[]} prompt */
300
+ function splitPrompt (prompt) {
301
+ if (typeof prompt === 'string') {
302
+ return { system: '', conversation: [{ role: 'user', content: prompt }] }
303
+ }
304
+ /** @type {string[]} */
305
+ const systemParts = []
306
+ /** @type {Array<{role: string, content: any}>} */
307
+ const conversation = []
308
+ for (const m of prompt) {
309
+ if (m.role === 'system') {
310
+ systemParts.push(flattenText(m.content))
311
+ } else if (m.role === 'tool') {
312
+ // Tool results go in a user-role message with tool_result blocks.
313
+ conversation.push({
314
+ role: 'user',
315
+ content: [{
316
+ type: 'tool_result',
317
+ tool_use_id: m.toolCallId ?? '',
318
+ content: flattenText(m.content)
319
+ }]
320
+ })
321
+ } else if (m.role === 'assistant' && m.toolCalls?.length) {
322
+ // Assistant + tool_use: optional text block followed by one
323
+ // tool_use block per call.
324
+ const content = []
325
+ const text = flattenText(m.content)
326
+ if (text) content.push({ type: 'text', text })
327
+ for (const tc of m.toolCalls) {
328
+ content.push({
329
+ type: 'tool_use',
330
+ id: tc.id,
331
+ name: tc.name,
332
+ input: tc.arguments ?? {}
333
+ })
334
+ }
335
+ conversation.push({ role: 'assistant', content })
336
+ } else {
337
+ conversation.push({
338
+ role: m.role,
339
+ content: toAnthropicContent(m.content)
340
+ })
341
+ }
342
+ }
343
+ return { system: systemParts.filter(Boolean).join('\n\n'), conversation }
344
+ }
345
+
346
+ /** @param {string | import('#core/envelope.js').MessagePart[]} content */
347
+ function flattenText (content) {
348
+ if (typeof content === 'string') return content
349
+ return content.filter(p => p.type === 'text' && p.text).map(p => p.text).join('\n')
350
+ }
351
+
352
+ /** @param {string | import('#core/envelope.js').MessagePart[]} content */
353
+ function toAnthropicContent (content) {
354
+ if (typeof content === 'string') return content
355
+ return content.map(p => {
356
+ if (p.type === 'text') return { type: 'text', text: p.text ?? '' }
357
+ throw new Error(`unsupported content part type: ${p.type}`)
358
+ })
359
+ }
360
+
361
+ /** @param {import('./_images.js').LoadedImage} img */
362
+ function toAnthropicImageBlock (img) {
363
+ if (img.base64) {
364
+ return {
365
+ type: 'image',
366
+ source: { type: 'base64', media_type: img.mimeType, data: img.base64 }
367
+ }
368
+ }
369
+ if (img.url) {
370
+ return {
371
+ type: 'image',
372
+ source: { type: 'url', url: img.url }
373
+ }
374
+ }
375
+ return null
376
+ }
377
+
378
+ /**
379
+ * Inject image content blocks into the LAST user message in the
380
+ * conversation (or append a new user message if none exists).
381
+ *
382
+ * @param {Array<{role: string, content: any}>} conversation
383
+ * @param {Array<any>} blocks
384
+ */
385
+ function injectImageBlocks (conversation, blocks) {
386
+ for (let i = conversation.length - 1; i >= 0; i--) {
387
+ if (conversation[i].role !== 'user') continue
388
+ const msg = conversation[i]
389
+ if (typeof msg.content === 'string') {
390
+ msg.content = [{ type: 'text', text: msg.content }, ...blocks]
391
+ } else if (Array.isArray(msg.content)) {
392
+ msg.content = [...msg.content, ...blocks]
393
+ }
394
+ return
395
+ }
396
+ conversation.push({ role: 'user', content: blocks })
397
+ }
@@ -0,0 +1,28 @@
1
+ /**
2
+ * Cerebras adapter — non-streaming chat completions with
3
+ * Cerebras-specific reasoning toggle for zai-family models.
4
+ *
5
+ * @module session/adapters/cerebras
6
+ */
7
+
8
+ import Cerebras from '@cerebras/cerebras_cloud_sdk'
9
+
10
+ import { runChatCompletions } from './_chat_completions.js'
11
+
12
+ /**
13
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
14
+ * @param {{client?: any, signal?: AbortSignal, log?: any, span?: any}} [deps]
15
+ * @returns {AsyncGenerator<import('#core/events.js').Event>}
16
+ */
17
+ export async function * cerebras (envelope, deps = {}) {
18
+ const client = deps.client ?? new Cerebras({ apiKey: envelope.auth.key })
19
+ yield * runChatCompletions(envelope, client, {
20
+ provider: 'cerebras',
21
+ toolChoiceFlavor: 'cerebras',
22
+ reasoningField: 'cerebras_zai'
23
+ }, {
24
+ signal: deps.signal,
25
+ log: deps.log,
26
+ span: deps.span
27
+ })
28
+ }