mohdel 0.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +377 -0
- package/config/benchmarks.json +39 -0
- package/js/client/call.js +75 -0
- package/js/client/call_image.js +82 -0
- package/js/client/gate-binary.js +72 -0
- package/js/client/index.js +16 -0
- package/js/client/ndjson.js +29 -0
- package/js/client/transport.js +48 -0
- package/js/core/envelope.js +141 -0
- package/js/core/errors.js +75 -0
- package/js/core/events.js +96 -0
- package/js/core/image.js +58 -0
- package/js/core/index.js +10 -0
- package/js/core/status.js +48 -0
- package/js/factory/bridge.js +372 -0
- package/js/session/_cooldown.js +114 -0
- package/js/session/_logger.js +138 -0
- package/js/session/_rate_limiter.js +77 -0
- package/js/session/_tracing.js +58 -0
- package/js/session/adapters/_cancelled.js +44 -0
- package/js/session/adapters/_catalog.js +58 -0
- package/js/session/adapters/_chat_completions.js +439 -0
- package/js/session/adapters/_errors.js +85 -0
- package/js/session/adapters/_images.js +60 -0
- package/js/session/adapters/_lazy_json_cache.js +76 -0
- package/js/session/adapters/_pricing.js +67 -0
- package/js/session/adapters/_providers.js +60 -0
- package/js/session/adapters/_tools.js +185 -0
- package/js/session/adapters/_videos.js +283 -0
- package/js/session/adapters/anthropic.js +397 -0
- package/js/session/adapters/cerebras.js +28 -0
- package/js/session/adapters/deepseek.js +32 -0
- package/js/session/adapters/echo.js +51 -0
- package/js/session/adapters/fake.js +262 -0
- package/js/session/adapters/fireworks.js +46 -0
- package/js/session/adapters/gemini.js +381 -0
- package/js/session/adapters/groq.js +23 -0
- package/js/session/adapters/image/fake.js +55 -0
- package/js/session/adapters/image/index.js +40 -0
- package/js/session/adapters/image/novita.js +135 -0
- package/js/session/adapters/image/openai.js +50 -0
- package/js/session/adapters/index.js +53 -0
- package/js/session/adapters/mistral.js +31 -0
- package/js/session/adapters/novita.js +29 -0
- package/js/session/adapters/openai.js +381 -0
- package/js/session/adapters/openrouter.js +66 -0
- package/js/session/adapters/xai.js +27 -0
- package/js/session/bin.js +54 -0
- package/js/session/driver.js +160 -0
- package/js/session/index.js +18 -0
- package/js/session/run.js +393 -0
- package/js/session/run_image.js +61 -0
- package/package.json +107 -0
- package/src/cli/ask.js +160 -0
- package/src/cli/backup.js +107 -0
- package/src/cli/bench.js +262 -0
- package/src/cli/check.js +123 -0
- package/src/cli/colored-logger.js +67 -0
- package/src/cli/colors.js +13 -0
- package/src/cli/default.js +39 -0
- package/src/cli/index.js +150 -0
- package/src/cli/json-output.js +60 -0
- package/src/cli/model.js +571 -0
- package/src/cli/onboard.js +232 -0
- package/src/cli/rank.js +176 -0
- package/src/cli/ratelimit.js +160 -0
- package/src/cli/tag.js +105 -0
- package/src/lib/assets/alibaba.svg +1 -0
- package/src/lib/assets/anthropic.svg +5 -0
- package/src/lib/assets/deepseek.svg +1 -0
- package/src/lib/assets/gemini.svg +1 -0
- package/src/lib/assets/google.svg +2 -0
- package/src/lib/assets/kwaipilot.svg +1 -0
- package/src/lib/assets/meta.svg +1 -0
- package/src/lib/assets/minimax.svg +9 -0
- package/src/lib/assets/moonshotai.svg +4 -0
- package/src/lib/assets/openai.svg +5 -0
- package/src/lib/assets/xai.svg +1 -0
- package/src/lib/assets/xiaomi.svg +2 -0
- package/src/lib/assets/zai.svg +219 -0
- package/src/lib/benchmark-score.js +215 -0
- package/src/lib/benchmark-truth.js +68 -0
- package/src/lib/cache.js +76 -0
- package/src/lib/common.js +208 -0
- package/src/lib/cooldown.js +63 -0
- package/src/lib/creators.js +71 -0
- package/src/lib/curated-cache.js +146 -0
- package/src/lib/errors.js +126 -0
- package/src/lib/index.js +726 -0
- package/src/lib/logger.js +29 -0
- package/src/lib/providers.js +87 -0
- package/src/lib/rank.js +390 -0
- package/src/lib/rate-limiter.js +50 -0
- package/src/lib/schema.js +150 -0
- package/src/lib/select.js +474 -0
- package/src/lib/tracing.js +62 -0
- package/src/lib/utils.js +85 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Video upload + inline handling shared across adapters that
|
|
3
|
+
* currently support video (Gemini today; more could follow the same
|
|
4
|
+
* shape).
|
|
5
|
+
*
|
|
6
|
+
* Three code paths per envelope video ref:
|
|
7
|
+
* 1. `file://` / local path, ≤20MB, no cache flag → read + base64
|
|
8
|
+
* inline as `inlineData`.
|
|
9
|
+
* 2. `file://` / local path, >20MB or `cache: true` → upload via
|
|
10
|
+
* the provider SDK (Gemini `ai.files.upload`), poll until the
|
|
11
|
+
* file is ACTIVE, return a `fileData` part. Content-hash +
|
|
12
|
+
* mtime-keyed cache at `~/.cache/mohdel/uploaded-files.json`
|
|
13
|
+
* short-circuits repeat uploads.
|
|
14
|
+
* 3. `https://` → passthrough as `fileData.fileUri` (Gemini fetches
|
|
15
|
+
* it directly).
|
|
16
|
+
*
|
|
17
|
+
* @module session/adapters/_videos
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import fs from 'node:fs/promises'
|
|
21
|
+
import { existsSync } from 'node:fs'
|
|
22
|
+
import { createHash } from 'node:crypto'
|
|
23
|
+
import { join } from 'node:path'
|
|
24
|
+
|
|
25
|
+
import envPaths from 'env-paths'
|
|
26
|
+
|
|
27
|
+
const CACHE_DIR = envPaths('mohdel', { suffix: null }).cache
|
|
28
|
+
const CACHE_PATH = join(CACHE_DIR, 'uploaded-files.json')
|
|
29
|
+
|
|
30
|
+
const INLINE_MAX_BYTES = 20 * 1024 * 1024
|
|
31
|
+
const VIDEO_UPLOAD_POLL_INTERVAL_MS = 5_000
|
|
32
|
+
/** Hard deadline on the PROCESSING → ACTIVE wait. Videos occasionally
|
|
33
|
+
* take a while; 5 min is generous enough that a stuck file ≠ slow
|
|
34
|
+
* file, but short enough that a pool slot doesn't hang forever. */
|
|
35
|
+
const MAX_UPLOAD_POLL_MS = 300_000
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* @typedef {object} UploadedFileRecord
|
|
39
|
+
* @property {string} hash
|
|
40
|
+
* @property {{uri: string, name: string, mimeType?: string, state?: string}} data
|
|
41
|
+
* @property {string} filePath
|
|
42
|
+
* @property {string} provider
|
|
43
|
+
* @property {string} cachedAt
|
|
44
|
+
*/
|
|
45
|
+
|
|
46
|
+
// ---------- cache ----------
|
|
47
|
+
|
|
48
|
+
async function ensureCacheDir () {
|
|
49
|
+
if (!existsSync(CACHE_DIR)) {
|
|
50
|
+
await fs.mkdir(CACHE_DIR, { recursive: true })
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Content-hash keyed by `sha256(bytes + filePath + mtime)` so an
|
|
56
|
+
* edited file forces re-upload even if the path stays the same.
|
|
57
|
+
*
|
|
58
|
+
* @param {string} filePath
|
|
59
|
+
* @returns {Promise<string>}
|
|
60
|
+
*/
|
|
61
|
+
async function hashFile (filePath) {
|
|
62
|
+
const [buf, st] = await Promise.all([
|
|
63
|
+
fs.readFile(filePath),
|
|
64
|
+
fs.stat(filePath)
|
|
65
|
+
])
|
|
66
|
+
const h = createHash('sha256')
|
|
67
|
+
h.update(buf)
|
|
68
|
+
h.update(filePath)
|
|
69
|
+
h.update(st.mtime.toISOString())
|
|
70
|
+
return h.digest('hex')
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
async function loadCache () {
|
|
74
|
+
try {
|
|
75
|
+
if (!existsSync(CACHE_PATH)) return {}
|
|
76
|
+
const text = await fs.readFile(CACHE_PATH, 'utf8')
|
|
77
|
+
return JSON.parse(text)
|
|
78
|
+
} catch {
|
|
79
|
+
return {}
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
async function saveCache (cache) {
|
|
84
|
+
try {
|
|
85
|
+
await ensureCacheDir()
|
|
86
|
+
await fs.writeFile(CACHE_PATH, JSON.stringify(cache, null, 2))
|
|
87
|
+
} catch {
|
|
88
|
+
// cache write failures shouldn't bring down a call
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* @param {string} filePath
|
|
94
|
+
* @param {string} provider
|
|
95
|
+
* @returns {Promise<UploadedFileRecord | undefined>}
|
|
96
|
+
*/
|
|
97
|
+
export async function getCachedFile (filePath, provider = 'gemini') {
|
|
98
|
+
try {
|
|
99
|
+
const hash = await hashFile(filePath)
|
|
100
|
+
const cache = await loadCache()
|
|
101
|
+
return cache[`${provider}:${hash}`]
|
|
102
|
+
} catch {
|
|
103
|
+
return undefined
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
* @param {string} filePath
|
|
109
|
+
* @param {object} data
|
|
110
|
+
* @param {string} provider
|
|
111
|
+
*/
|
|
112
|
+
export async function setCachedFile (filePath, data, provider = 'gemini') {
|
|
113
|
+
try {
|
|
114
|
+
const hash = await hashFile(filePath)
|
|
115
|
+
const cache = await loadCache()
|
|
116
|
+
cache[`${provider}:${hash}`] = {
|
|
117
|
+
hash,
|
|
118
|
+
data,
|
|
119
|
+
filePath,
|
|
120
|
+
provider,
|
|
121
|
+
cachedAt: new Date().toISOString()
|
|
122
|
+
}
|
|
123
|
+
await saveCache(cache)
|
|
124
|
+
} catch {
|
|
125
|
+
// best effort
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
// ---------- loader ----------
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* @typedef {object} VideoPart
|
|
133
|
+
* @property {{data: string, mimeType: string}} [inlineData]
|
|
134
|
+
* @property {{fileUri: string, mimeType: string}} [fileData]
|
|
135
|
+
*/
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
* @param {import('#core/envelope.js').MediaRef[]} videos
|
|
139
|
+
* @param {{
|
|
140
|
+
* client: {files: {upload: (args: any) => Promise<any>, get: (args: {name: string}) => Promise<any>}},
|
|
141
|
+
* useCache?: boolean,
|
|
142
|
+
* sleep?: (ms: number) => Promise<void>,
|
|
143
|
+
* now?: () => number,
|
|
144
|
+
* readFile?: (path: string) => Promise<Buffer>,
|
|
145
|
+
* stat?: (path: string) => Promise<{size: number}>,
|
|
146
|
+
* signal?: AbortSignal,
|
|
147
|
+
* provider?: string
|
|
148
|
+
* }} deps
|
|
149
|
+
* @returns {Promise<VideoPart[]>}
|
|
150
|
+
*/
|
|
151
|
+
export async function loadVideos (videos, deps) {
|
|
152
|
+
const out = []
|
|
153
|
+
if (!videos || !Array.isArray(videos)) return out
|
|
154
|
+
const ctx = {
|
|
155
|
+
client: deps.client,
|
|
156
|
+
useCache: !!deps.useCache,
|
|
157
|
+
sleep: deps.sleep ?? defaultSleep,
|
|
158
|
+
now: deps.now ?? Date.now,
|
|
159
|
+
readFileFn: deps.readFile ?? fs.readFile,
|
|
160
|
+
statFn: deps.stat ?? fs.stat,
|
|
161
|
+
signal: deps.signal,
|
|
162
|
+
provider: deps.provider ?? 'gemini'
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
for (const v of videos) {
|
|
166
|
+
if (!v?.fileUri || !v?.mimeType) continue
|
|
167
|
+
throwIfAborted(ctx.signal)
|
|
168
|
+
const part = await toPart(v, ctx)
|
|
169
|
+
if (part) out.push(part)
|
|
170
|
+
}
|
|
171
|
+
return out
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
async function toPart (ref, ctx) {
|
|
175
|
+
const { fileUri, mimeType } = ref
|
|
176
|
+
|
|
177
|
+
// https:// → Gemini fetches it directly
|
|
178
|
+
if (/^https?:\/\//i.test(fileUri)) {
|
|
179
|
+
return { fileData: { fileUri, mimeType } }
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
// data: URI → inline the base64 payload
|
|
183
|
+
if (fileUri.startsWith('data:')) {
|
|
184
|
+
const comma = fileUri.indexOf(',')
|
|
185
|
+
if (comma < 0) return null
|
|
186
|
+
return { inlineData: { data: fileUri.slice(comma + 1), mimeType } }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
// file:// or local path
|
|
190
|
+
const filePath = fileUri.replace(/^file:\/\//, '')
|
|
191
|
+
let stats
|
|
192
|
+
try {
|
|
193
|
+
stats = await ctx.statFn(filePath)
|
|
194
|
+
} catch {
|
|
195
|
+
return null
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
if (stats.size > INLINE_MAX_BYTES || ctx.useCache) {
|
|
199
|
+
const uri = await uploadFile(filePath, mimeType, ctx)
|
|
200
|
+
return { fileData: { fileUri: uri, mimeType } }
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const buf = await ctx.readFileFn(filePath)
|
|
204
|
+
return { inlineData: { data: buf.toString('base64'), mimeType } }
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
/**
|
|
208
|
+
* Upload + poll until active. Honors the on-disk cache so repeat
|
|
209
|
+
* calls (same bytes + mtime) skip the network round trip. A stuck
|
|
210
|
+
* PROCESSING file is bounded by `MAX_UPLOAD_POLL_MS`; an aborted
|
|
211
|
+
* signal breaks out immediately.
|
|
212
|
+
*/
|
|
213
|
+
async function uploadFile (filePath, mimeType, ctx) {
|
|
214
|
+
const cached = await getCachedFile(filePath, ctx.provider)
|
|
215
|
+
if (cached?.data?.uri) return cached.data.uri
|
|
216
|
+
throwIfAborted(ctx.signal)
|
|
217
|
+
|
|
218
|
+
let file = await ctx.client.files.upload({
|
|
219
|
+
file: filePath,
|
|
220
|
+
config: { mimeType }
|
|
221
|
+
})
|
|
222
|
+
|
|
223
|
+
const deadline = ctx.now() + MAX_UPLOAD_POLL_MS
|
|
224
|
+
|
|
225
|
+
while (file?.state === 'PROCESSING') {
|
|
226
|
+
if (ctx.now() >= deadline) {
|
|
227
|
+
throw typedError(
|
|
228
|
+
`gemini file upload did not become ACTIVE within ${MAX_UPLOAD_POLL_MS / 1000}s`,
|
|
229
|
+
'PROVIDER_UNAVAILABLE',
|
|
230
|
+
true
|
|
231
|
+
)
|
|
232
|
+
}
|
|
233
|
+
throwIfAborted(ctx.signal)
|
|
234
|
+
await ctx.sleep(VIDEO_UPLOAD_POLL_INTERVAL_MS)
|
|
235
|
+
throwIfAborted(ctx.signal)
|
|
236
|
+
file = await ctx.client.files.get({ name: file.name })
|
|
237
|
+
}
|
|
238
|
+
if (file?.state === 'FAILED') {
|
|
239
|
+
throw new Error('gemini file processing failed')
|
|
240
|
+
}
|
|
241
|
+
if (!file?.uri) {
|
|
242
|
+
throw new Error('gemini upload returned no uri')
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
await setCachedFile(filePath, file, ctx.provider)
|
|
246
|
+
return file.uri
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
/**
|
|
250
|
+
* Raise an `AbortError` when `signal` is aborted. The gemini
|
|
251
|
+
* adapter's video-load catch block already converts this shape to
|
|
252
|
+
* the standard cancelled terminal via the outer `signal?.aborted`
|
|
253
|
+
* check in `run.js`.
|
|
254
|
+
*
|
|
255
|
+
* @param {AbortSignal | undefined} signal
|
|
256
|
+
*/
|
|
257
|
+
function throwIfAborted (signal) {
|
|
258
|
+
if (signal?.aborted) {
|
|
259
|
+
const err = new Error('aborted')
|
|
260
|
+
err.name = 'AbortError'
|
|
261
|
+
throw err
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
/**
|
|
266
|
+
* @param {string} message
|
|
267
|
+
* @param {string} type
|
|
268
|
+
* @param {boolean} retryable
|
|
269
|
+
*/
|
|
270
|
+
function typedError (message, type, retryable) {
|
|
271
|
+
const err = new Error(message)
|
|
272
|
+
err.typed = {
|
|
273
|
+
message,
|
|
274
|
+
severity: retryable ? 'warn' : 'error',
|
|
275
|
+
retryable,
|
|
276
|
+
type
|
|
277
|
+
}
|
|
278
|
+
return err
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
function defaultSleep (ms) {
|
|
282
|
+
return new Promise((resolve) => setTimeout(resolve, ms))
|
|
283
|
+
}
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Messages API adapter.
|
|
3
|
+
*
|
|
4
|
+
* Scope:
|
|
5
|
+
* - Text in, text out, streaming
|
|
6
|
+
* - Status contract (incomplete + warning on max_tokens)
|
|
7
|
+
* - Tools: unified format → anthropic input_schema; streaming
|
|
8
|
+
* function_call deltas; tool_use terminal state; tool_result
|
|
9
|
+
* messages on the way back in
|
|
10
|
+
* - AbortSignal forwarded to SDK
|
|
11
|
+
*
|
|
12
|
+
* Deferred: vision, thinking/reasoning control (outputEffort).
|
|
13
|
+
*
|
|
14
|
+
* @module session/adapters/anthropic
|
|
15
|
+
*/
|
|
16
|
+
|
|
17
|
+
import Anthropic from '@anthropic-ai/sdk'
|
|
18
|
+
|
|
19
|
+
import {
|
|
20
|
+
STATUS_COMPLETED,
|
|
21
|
+
STATUS_INCOMPLETE,
|
|
22
|
+
STATUS_TOOL_USE,
|
|
23
|
+
WARNING_INSUFFICIENT_OUTPUT_BUDGET
|
|
24
|
+
} from '#core/status.js'
|
|
25
|
+
|
|
26
|
+
import { cancelledDone } from './_cancelled.js'
|
|
27
|
+
import { getSpec } from './_catalog.js'
|
|
28
|
+
import { classifyProviderError } from './_errors.js'
|
|
29
|
+
import { loadImages } from './_images.js'
|
|
30
|
+
import { costFor } from './_pricing.js'
|
|
31
|
+
import {
|
|
32
|
+
toAnthropicTools,
|
|
33
|
+
fromAnthropicToolCalls,
|
|
34
|
+
toToolChoice
|
|
35
|
+
} from './_tools.js'
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Approximate chars-per-token used to estimate Anthropic thinking
|
|
39
|
+
* tokens (the API doesn't report them separately in `usage`).
|
|
40
|
+
*
|
|
41
|
+
* ## Known limitations (cost accuracy)
|
|
42
|
+
*
|
|
43
|
+
* The estimate structurally under-counts in three ways:
|
|
44
|
+
* 1. **Signatures.** Thinking blocks have shape
|
|
45
|
+
* `{type: 'thinking', thinking: '<text>', signature: '<hash>'}`.
|
|
46
|
+
* Signatures consume output tokens but aren't streamed as
|
|
47
|
+
* `thinking_delta` — we never see them.
|
|
48
|
+
* 2. **Redacted thinking.** When Anthropic returns
|
|
49
|
+
* `{type: 'redacted_thinking', data: '<encrypted>'}` in place
|
|
50
|
+
* of a plain thinking block, zero `thinking_delta` events are
|
|
51
|
+
* emitted even though the block still consumes output tokens.
|
|
52
|
+
* 3. **BPE variance.** 4 chars/token is an English-text average;
|
|
53
|
+
* dense reasoning prose can compress differently.
|
|
54
|
+
*
|
|
55
|
+
* **Cost impact:** provably zero when `thinkingPrice == outputPrice`
|
|
56
|
+
* (true for every Anthropic entry in the curated catalog today) —
|
|
57
|
+
* the heuristic error cancels in `cost = i*ip + o*op + t*tp` because
|
|
58
|
+
* `o*op + t*op = (o+t)*op = totalOutput*op`. If a catalog maintainer
|
|
59
|
+
* ever sets asymmetric Anthropic pricing, cost drifts by
|
|
60
|
+
* `estimate_error × (thinkingPrice − outputPrice)` — that's a
|
|
61
|
+
* catalog-editor awareness item until Anthropic exposes a real
|
|
62
|
+
* `thinking_tokens` field in `usage`. (They'll almost certainly do
|
|
63
|
+
* that the day they introduce asymmetric pricing.)
|
|
64
|
+
*/
|
|
65
|
+
const ANTHROPIC_THINKING_CHARS_PER_TOKEN = 4
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Fallback `max_tokens` when the caller supplied no `outputBudget`
|
|
69
|
+
* and the model spec has no `outputTokenLimit`. Anthropic's
|
|
70
|
+
* `max_tokens` is required on every request; 4096 matches the
|
|
71
|
+
* smallest Claude output ceiling and keeps calls cheap on unknown
|
|
72
|
+
* models. Tune via `spec.outputTokenLimit` or `envelope.outputBudget`.
|
|
73
|
+
*/
|
|
74
|
+
const ANTHROPIC_DEFAULT_MAX_TOKENS = 4096
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
78
|
+
* @param {{client?: Anthropic, signal?: AbortSignal, log?: any, span?: any}} [deps]
|
|
79
|
+
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
80
|
+
*/
|
|
81
|
+
export async function * anthropic (envelope, deps = {}) {
|
|
82
|
+
const client = deps.client ?? new Anthropic({ apiKey: envelope.auth.key })
|
|
83
|
+
const signal = deps.signal
|
|
84
|
+
const log = deps.log
|
|
85
|
+
const start = String(process.hrtime.bigint())
|
|
86
|
+
let first = null
|
|
87
|
+
|
|
88
|
+
const { system, conversation } = splitPrompt(envelope.prompt)
|
|
89
|
+
|
|
90
|
+
// Attach images to the last user message before building the request.
|
|
91
|
+
if (envelope.images?.length) {
|
|
92
|
+
try {
|
|
93
|
+
const loaded = await loadImages(envelope.images)
|
|
94
|
+
const blocks = loaded.map(toAnthropicImageBlock).filter(Boolean)
|
|
95
|
+
if (blocks.length) injectImageBlocks(conversation, blocks)
|
|
96
|
+
} catch (e) {
|
|
97
|
+
log?.warn({ err: e }, '[mohdel:anthropic] image load failed')
|
|
98
|
+
yield { type: 'error', error: classifyProviderError(e) }
|
|
99
|
+
return
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const request = buildRequest(envelope, conversation, system)
|
|
104
|
+
|
|
105
|
+
// F53: accumulate via array + join to avoid per-delta V8 cons-string
|
|
106
|
+
// churn. Materialized at each exit point.
|
|
107
|
+
const outputParts = []
|
|
108
|
+
const currentOutput = () => outputParts.join('')
|
|
109
|
+
let inputTokens = 0
|
|
110
|
+
let outputTokens = 0
|
|
111
|
+
let thinkingChars = 0
|
|
112
|
+
let status = STATUS_COMPLETED
|
|
113
|
+
/** @type {string | undefined} */
|
|
114
|
+
let warning
|
|
115
|
+
|
|
116
|
+
// Tool-use accumulation state
|
|
117
|
+
/** @type {Map<number, {id: string, name: string, inputJson: string}>} */
|
|
118
|
+
const toolBlocks = new Map()
|
|
119
|
+
|
|
120
|
+
try {
|
|
121
|
+
const stream = await client.messages.stream(request, { signal })
|
|
122
|
+
|
|
123
|
+
for await (const event of stream) {
|
|
124
|
+
if (signal?.aborted) {
|
|
125
|
+
yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
|
|
126
|
+
return
|
|
127
|
+
}
|
|
128
|
+
switch (event.type) {
|
|
129
|
+
case 'message_start':
|
|
130
|
+
if (event.message?.usage?.input_tokens) {
|
|
131
|
+
inputTokens = event.message.usage.input_tokens
|
|
132
|
+
}
|
|
133
|
+
break
|
|
134
|
+
|
|
135
|
+
case 'content_block_start':
|
|
136
|
+
if (event.content_block?.type === 'tool_use') {
|
|
137
|
+
toolBlocks.set(event.index, {
|
|
138
|
+
id: event.content_block.id,
|
|
139
|
+
name: event.content_block.name,
|
|
140
|
+
inputJson: ''
|
|
141
|
+
})
|
|
142
|
+
}
|
|
143
|
+
break
|
|
144
|
+
|
|
145
|
+
case 'content_block_delta':
|
|
146
|
+
if (event.delta?.type === 'text_delta' && event.delta.text) {
|
|
147
|
+
if (first === null) first = String(process.hrtime.bigint())
|
|
148
|
+
outputParts.push(event.delta.text)
|
|
149
|
+
yield { type: 'delta', delta: { type: 'message', delta: event.delta.text } }
|
|
150
|
+
} else if (event.delta?.type === 'thinking_delta') {
|
|
151
|
+
// Thinking content is not surfaced via `delta` events —
|
|
152
|
+
// only `message` and `function_call` deltas stream to
|
|
153
|
+
// consumers. Accumulate char count to estimate
|
|
154
|
+
// thinking_tokens at the end.
|
|
155
|
+
thinkingChars += (event.delta.thinking || '').length
|
|
156
|
+
} else if (event.delta?.type === 'input_json_delta') {
|
|
157
|
+
const block = toolBlocks.get(event.index)
|
|
158
|
+
if (block) {
|
|
159
|
+
block.inputJson += event.delta.partial_json ?? ''
|
|
160
|
+
if (first === null) first = String(process.hrtime.bigint())
|
|
161
|
+
yield {
|
|
162
|
+
type: 'delta',
|
|
163
|
+
delta: { type: 'function_call', delta: event.delta.partial_json ?? '' }
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
break
|
|
168
|
+
|
|
169
|
+
case 'message_delta':
|
|
170
|
+
if (event.delta?.stop_reason === 'max_tokens') {
|
|
171
|
+
status = STATUS_INCOMPLETE
|
|
172
|
+
warning = WARNING_INSUFFICIENT_OUTPUT_BUDGET
|
|
173
|
+
} else if (event.delta?.stop_reason === 'tool_use') {
|
|
174
|
+
status = STATUS_TOOL_USE
|
|
175
|
+
}
|
|
176
|
+
if (event.usage?.output_tokens) {
|
|
177
|
+
outputTokens = event.usage.output_tokens
|
|
178
|
+
}
|
|
179
|
+
break
|
|
180
|
+
|
|
181
|
+
default:
|
|
182
|
+
break
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
} catch (e) {
|
|
186
|
+
if (signal?.aborted) {
|
|
187
|
+
yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
|
|
188
|
+
return
|
|
189
|
+
}
|
|
190
|
+
log?.warn({ err: e }, '[mohdel:anthropic] stream failed')
|
|
191
|
+
yield { type: 'error', error: classifyProviderError(e) }
|
|
192
|
+
return
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
if (signal?.aborted) {
|
|
196
|
+
yield cancelledDone(start, first, envelope, currentOutput(), inputTokens, outputTokens)
|
|
197
|
+
return
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
const end = String(process.hrtime.bigint())
|
|
201
|
+
// Estimate thinking tokens from streamed thinking_delta char count
|
|
202
|
+
// (Anthropic API doesn't report them separately). Cap at total
|
|
203
|
+
// output tokens reported by usage.
|
|
204
|
+
const estimatedThinkingTokens = thinkingChars > 0
|
|
205
|
+
? Math.min(Math.ceil(thinkingChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN), outputTokens)
|
|
206
|
+
: 0
|
|
207
|
+
const messageOutputTokens = Math.max(0, outputTokens - estimatedThinkingTokens)
|
|
208
|
+
|
|
209
|
+
/** @type {import('#core/events.js').DoneEvent} */
|
|
210
|
+
const done = {
|
|
211
|
+
type: 'done',
|
|
212
|
+
result: {
|
|
213
|
+
status,
|
|
214
|
+
output: currentOutput() || null,
|
|
215
|
+
inputTokens,
|
|
216
|
+
outputTokens: messageOutputTokens,
|
|
217
|
+
thinkingTokens: estimatedThinkingTokens,
|
|
218
|
+
cost: costFor(
|
|
219
|
+
`${envelope.provider}/${envelope.model}`,
|
|
220
|
+
{ inputTokens, outputTokens: messageOutputTokens, thinkingTokens: estimatedThinkingTokens }
|
|
221
|
+
),
|
|
222
|
+
timestamps: { start, first: first ?? end, end }
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
if (warning) done.result.warning = warning
|
|
226
|
+
if (toolBlocks.size > 0) {
|
|
227
|
+
done.result.toolCalls = finalizeToolCalls(toolBlocks)
|
|
228
|
+
}
|
|
229
|
+
yield done
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* @param {Map<number, {id: string, name: string, inputJson: string}>} toolBlocks
|
|
234
|
+
*/
|
|
235
|
+
function finalizeToolCalls (toolBlocks) {
|
|
236
|
+
const blocks = Array.from(toolBlocks.values()).map(b => ({
|
|
237
|
+
id: b.id,
|
|
238
|
+
name: b.name,
|
|
239
|
+
input: safeParseJson(b.inputJson)
|
|
240
|
+
}))
|
|
241
|
+
return fromAnthropicToolCalls(blocks)
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/** @param {string} s */
|
|
245
|
+
function safeParseJson (s) {
|
|
246
|
+
if (!s) return {}
|
|
247
|
+
try { return JSON.parse(s) } catch { return s }
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
/**
|
|
251
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
252
|
+
* @param {Array<{role: string, content: any}>} conversation
|
|
253
|
+
* @param {string} system
|
|
254
|
+
*/
|
|
255
|
+
function buildRequest (envelope, conversation, system) {
|
|
256
|
+
const spec = getSpec(`${envelope.provider}/${envelope.model}`)
|
|
257
|
+
const outputTokenLimit = spec?.outputTokenLimit
|
|
258
|
+
|
|
259
|
+
/** @type {Record<string, any>} */
|
|
260
|
+
const request = {
|
|
261
|
+
model: envelope.model,
|
|
262
|
+
max_tokens: envelope.outputBudget ?? outputTokenLimit ?? ANTHROPIC_DEFAULT_MAX_TOKENS,
|
|
263
|
+
messages: conversation
|
|
264
|
+
}
|
|
265
|
+
if (system) request.system = system
|
|
266
|
+
if (envelope.tools?.length) {
|
|
267
|
+
request.tools = toAnthropicTools(envelope.tools)
|
|
268
|
+
}
|
|
269
|
+
if (envelope.toolChoice) {
|
|
270
|
+
const choice = toToolChoice('anthropic', envelope.toolChoice)
|
|
271
|
+
if (envelope.parallelToolCalls === false && choice) {
|
|
272
|
+
choice.disable_parallel_tool_use = true
|
|
273
|
+
}
|
|
274
|
+
request.tool_choice = choice
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// Thinking — adaptive mode, with an optional effort hint when the
|
|
278
|
+
// spec defines `thinkingEffortLevels`. `outputEffort: 'none'`
|
|
279
|
+
// opts out of thinking entirely (`thinking.type: 'disabled'` —
|
|
280
|
+
// adaptive thinking must not be enabled when the caller has
|
|
281
|
+
// explicitly disabled it, otherwise the call silently clobbers
|
|
282
|
+
// `outputBudget` with the full model limit).
|
|
283
|
+
if (spec?.thinkingEffortLevels) {
|
|
284
|
+
const effort = envelope.outputEffort ?? spec.defaultThinkingEffort
|
|
285
|
+
if (effort && effort !== 'none') {
|
|
286
|
+
request.thinking = { type: 'adaptive' }
|
|
287
|
+
if (spec.thinkingEffortLevels[effort] != null) {
|
|
288
|
+
request.output_config = { effort }
|
|
289
|
+
}
|
|
290
|
+
// Thinking tokens share the output budget — give them the full
|
|
291
|
+
// model limit instead of just the requested outputBudget.
|
|
292
|
+
if (outputTokenLimit) request.max_tokens = outputTokenLimit
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
return request
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/** @param {string | import('#core/envelope.js').Message[]} prompt */
|
|
300
|
+
function splitPrompt (prompt) {
|
|
301
|
+
if (typeof prompt === 'string') {
|
|
302
|
+
return { system: '', conversation: [{ role: 'user', content: prompt }] }
|
|
303
|
+
}
|
|
304
|
+
/** @type {string[]} */
|
|
305
|
+
const systemParts = []
|
|
306
|
+
/** @type {Array<{role: string, content: any}>} */
|
|
307
|
+
const conversation = []
|
|
308
|
+
for (const m of prompt) {
|
|
309
|
+
if (m.role === 'system') {
|
|
310
|
+
systemParts.push(flattenText(m.content))
|
|
311
|
+
} else if (m.role === 'tool') {
|
|
312
|
+
// Tool results go in a user-role message with tool_result blocks.
|
|
313
|
+
conversation.push({
|
|
314
|
+
role: 'user',
|
|
315
|
+
content: [{
|
|
316
|
+
type: 'tool_result',
|
|
317
|
+
tool_use_id: m.toolCallId ?? '',
|
|
318
|
+
content: flattenText(m.content)
|
|
319
|
+
}]
|
|
320
|
+
})
|
|
321
|
+
} else if (m.role === 'assistant' && m.toolCalls?.length) {
|
|
322
|
+
// Assistant + tool_use: optional text block followed by one
|
|
323
|
+
// tool_use block per call.
|
|
324
|
+
const content = []
|
|
325
|
+
const text = flattenText(m.content)
|
|
326
|
+
if (text) content.push({ type: 'text', text })
|
|
327
|
+
for (const tc of m.toolCalls) {
|
|
328
|
+
content.push({
|
|
329
|
+
type: 'tool_use',
|
|
330
|
+
id: tc.id,
|
|
331
|
+
name: tc.name,
|
|
332
|
+
input: tc.arguments ?? {}
|
|
333
|
+
})
|
|
334
|
+
}
|
|
335
|
+
conversation.push({ role: 'assistant', content })
|
|
336
|
+
} else {
|
|
337
|
+
conversation.push({
|
|
338
|
+
role: m.role,
|
|
339
|
+
content: toAnthropicContent(m.content)
|
|
340
|
+
})
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
return { system: systemParts.filter(Boolean).join('\n\n'), conversation }
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
/** @param {string | import('#core/envelope.js').MessagePart[]} content */
|
|
347
|
+
function flattenText (content) {
|
|
348
|
+
if (typeof content === 'string') return content
|
|
349
|
+
return content.filter(p => p.type === 'text' && p.text).map(p => p.text).join('\n')
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
/** @param {string | import('#core/envelope.js').MessagePart[]} content */
|
|
353
|
+
function toAnthropicContent (content) {
|
|
354
|
+
if (typeof content === 'string') return content
|
|
355
|
+
return content.map(p => {
|
|
356
|
+
if (p.type === 'text') return { type: 'text', text: p.text ?? '' }
|
|
357
|
+
throw new Error(`unsupported content part type: ${p.type}`)
|
|
358
|
+
})
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/** @param {import('./_images.js').LoadedImage} img */
|
|
362
|
+
function toAnthropicImageBlock (img) {
|
|
363
|
+
if (img.base64) {
|
|
364
|
+
return {
|
|
365
|
+
type: 'image',
|
|
366
|
+
source: { type: 'base64', media_type: img.mimeType, data: img.base64 }
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
if (img.url) {
|
|
370
|
+
return {
|
|
371
|
+
type: 'image',
|
|
372
|
+
source: { type: 'url', url: img.url }
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return null
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
/**
|
|
379
|
+
* Inject image content blocks into the LAST user message in the
|
|
380
|
+
* conversation (or append a new user message if none exists).
|
|
381
|
+
*
|
|
382
|
+
* @param {Array<{role: string, content: any}>} conversation
|
|
383
|
+
* @param {Array<any>} blocks
|
|
384
|
+
*/
|
|
385
|
+
function injectImageBlocks (conversation, blocks) {
|
|
386
|
+
for (let i = conversation.length - 1; i >= 0; i--) {
|
|
387
|
+
if (conversation[i].role !== 'user') continue
|
|
388
|
+
const msg = conversation[i]
|
|
389
|
+
if (typeof msg.content === 'string') {
|
|
390
|
+
msg.content = [{ type: 'text', text: msg.content }, ...blocks]
|
|
391
|
+
} else if (Array.isArray(msg.content)) {
|
|
392
|
+
msg.content = [...msg.content, ...blocks]
|
|
393
|
+
}
|
|
394
|
+
return
|
|
395
|
+
}
|
|
396
|
+
conversation.push({ role: 'user', content: blocks })
|
|
397
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cerebras adapter — non-streaming chat completions with
|
|
3
|
+
* Cerebras-specific reasoning toggle for zai-family models.
|
|
4
|
+
*
|
|
5
|
+
* @module session/adapters/cerebras
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import Cerebras from '@cerebras/cerebras_cloud_sdk'
|
|
9
|
+
|
|
10
|
+
import { runChatCompletions } from './_chat_completions.js'
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
14
|
+
* @param {{client?: any, signal?: AbortSignal, log?: any, span?: any}} [deps]
|
|
15
|
+
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
16
|
+
*/
|
|
17
|
+
export async function * cerebras (envelope, deps = {}) {
|
|
18
|
+
const client = deps.client ?? new Cerebras({ apiKey: envelope.auth.key })
|
|
19
|
+
yield * runChatCompletions(envelope, client, {
|
|
20
|
+
provider: 'cerebras',
|
|
21
|
+
toolChoiceFlavor: 'cerebras',
|
|
22
|
+
reasoningField: 'cerebras_zai'
|
|
23
|
+
}, {
|
|
24
|
+
signal: deps.signal,
|
|
25
|
+
log: deps.log,
|
|
26
|
+
span: deps.span
|
|
27
|
+
})
|
|
28
|
+
}
|