mohdel 0.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +377 -0
- package/config/benchmarks.json +39 -0
- package/js/client/call.js +75 -0
- package/js/client/call_image.js +82 -0
- package/js/client/gate-binary.js +72 -0
- package/js/client/index.js +16 -0
- package/js/client/ndjson.js +29 -0
- package/js/client/transport.js +48 -0
- package/js/core/envelope.js +141 -0
- package/js/core/errors.js +75 -0
- package/js/core/events.js +96 -0
- package/js/core/image.js +58 -0
- package/js/core/index.js +10 -0
- package/js/core/status.js +48 -0
- package/js/factory/bridge.js +372 -0
- package/js/session/_cooldown.js +114 -0
- package/js/session/_logger.js +138 -0
- package/js/session/_rate_limiter.js +77 -0
- package/js/session/_tracing.js +58 -0
- package/js/session/adapters/_cancelled.js +44 -0
- package/js/session/adapters/_catalog.js +58 -0
- package/js/session/adapters/_chat_completions.js +439 -0
- package/js/session/adapters/_errors.js +85 -0
- package/js/session/adapters/_images.js +60 -0
- package/js/session/adapters/_lazy_json_cache.js +76 -0
- package/js/session/adapters/_pricing.js +67 -0
- package/js/session/adapters/_providers.js +60 -0
- package/js/session/adapters/_tools.js +185 -0
- package/js/session/adapters/_videos.js +283 -0
- package/js/session/adapters/anthropic.js +397 -0
- package/js/session/adapters/cerebras.js +28 -0
- package/js/session/adapters/deepseek.js +32 -0
- package/js/session/adapters/echo.js +51 -0
- package/js/session/adapters/fake.js +262 -0
- package/js/session/adapters/fireworks.js +46 -0
- package/js/session/adapters/gemini.js +381 -0
- package/js/session/adapters/groq.js +23 -0
- package/js/session/adapters/image/fake.js +55 -0
- package/js/session/adapters/image/index.js +40 -0
- package/js/session/adapters/image/novita.js +135 -0
- package/js/session/adapters/image/openai.js +50 -0
- package/js/session/adapters/index.js +53 -0
- package/js/session/adapters/mistral.js +31 -0
- package/js/session/adapters/novita.js +29 -0
- package/js/session/adapters/openai.js +381 -0
- package/js/session/adapters/openrouter.js +66 -0
- package/js/session/adapters/xai.js +27 -0
- package/js/session/bin.js +54 -0
- package/js/session/driver.js +160 -0
- package/js/session/index.js +18 -0
- package/js/session/run.js +393 -0
- package/js/session/run_image.js +61 -0
- package/package.json +107 -0
- package/src/cli/ask.js +160 -0
- package/src/cli/backup.js +107 -0
- package/src/cli/bench.js +262 -0
- package/src/cli/check.js +123 -0
- package/src/cli/colored-logger.js +67 -0
- package/src/cli/colors.js +13 -0
- package/src/cli/default.js +39 -0
- package/src/cli/index.js +150 -0
- package/src/cli/json-output.js +60 -0
- package/src/cli/model.js +571 -0
- package/src/cli/onboard.js +232 -0
- package/src/cli/rank.js +176 -0
- package/src/cli/ratelimit.js +160 -0
- package/src/cli/tag.js +105 -0
- package/src/lib/assets/alibaba.svg +1 -0
- package/src/lib/assets/anthropic.svg +5 -0
- package/src/lib/assets/deepseek.svg +1 -0
- package/src/lib/assets/gemini.svg +1 -0
- package/src/lib/assets/google.svg +2 -0
- package/src/lib/assets/kwaipilot.svg +1 -0
- package/src/lib/assets/meta.svg +1 -0
- package/src/lib/assets/minimax.svg +9 -0
- package/src/lib/assets/moonshotai.svg +4 -0
- package/src/lib/assets/openai.svg +5 -0
- package/src/lib/assets/xai.svg +1 -0
- package/src/lib/assets/xiaomi.svg +2 -0
- package/src/lib/assets/zai.svg +219 -0
- package/src/lib/benchmark-score.js +215 -0
- package/src/lib/benchmark-truth.js +68 -0
- package/src/lib/cache.js +76 -0
- package/src/lib/common.js +208 -0
- package/src/lib/cooldown.js +63 -0
- package/src/lib/creators.js +71 -0
- package/src/lib/curated-cache.js +146 -0
- package/src/lib/errors.js +126 -0
- package/src/lib/index.js +726 -0
- package/src/lib/logger.js +29 -0
- package/src/lib/providers.js +87 -0
- package/src/lib/rank.js +390 -0
- package/src/lib/rate-limiter.js +50 -0
- package/src/lib/schema.js +150 -0
- package/src/lib/select.js +474 -0
- package/src/lib/tracing.js +62 -0
- package/src/lib/utils.js +85 -0
package/package.json
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "mohdel",
|
|
3
|
+
"version": "0.90.0",
|
|
4
|
+
"license": "MIT",
|
|
5
|
+
"author": {
|
|
6
|
+
"name": "Christophe Le Bars",
|
|
7
|
+
"email": "clb@toort.net"
|
|
8
|
+
},
|
|
9
|
+
"description": "Self-hosted LLM gateway with an embeddable SDK. Process-isolated, OpenTelemetry-native inference across 11 providers — streaming, tools, thinking control — without orchestration. Use the Node factory in-process, or run thin-gate for fault isolation and any-language HTTP callers.",
|
|
10
|
+
"type": "module",
|
|
11
|
+
"repository": {
|
|
12
|
+
"type": "git",
|
|
13
|
+
"url": "git+https://github.com/clbrge/mohdel.git"
|
|
14
|
+
},
|
|
15
|
+
"homepage": "https://github.com/clbrge/mohdel#readme",
|
|
16
|
+
"bugs": {
|
|
17
|
+
"url": "https://github.com/clbrge/mohdel/issues"
|
|
18
|
+
},
|
|
19
|
+
"engines": {
|
|
20
|
+
"node": ">=22"
|
|
21
|
+
},
|
|
22
|
+
"main": "src/lib/index.js",
|
|
23
|
+
"exports": {
|
|
24
|
+
".": "./src/lib/index.js",
|
|
25
|
+
"./providers": "./src/lib/providers.js",
|
|
26
|
+
"./creators": "./src/lib/creators.js",
|
|
27
|
+
"./utils": "./src/lib/utils.js",
|
|
28
|
+
"./errors": "./src/lib/errors.js",
|
|
29
|
+
"./client": "./js/client/index.js",
|
|
30
|
+
"./session": "./js/session/index.js",
|
|
31
|
+
"./session/bin": "./js/session/bin.js"
|
|
32
|
+
},
|
|
33
|
+
"imports": {
|
|
34
|
+
"#core": "./js/core/index.js",
|
|
35
|
+
"#core/*": "./js/core/*"
|
|
36
|
+
},
|
|
37
|
+
"bin": {
|
|
38
|
+
"mo": "./src/cli/index.js"
|
|
39
|
+
},
|
|
40
|
+
"files": [
|
|
41
|
+
"js",
|
|
42
|
+
"src/lib",
|
|
43
|
+
"src/cli",
|
|
44
|
+
"config",
|
|
45
|
+
"README.md",
|
|
46
|
+
"LICENSE"
|
|
47
|
+
],
|
|
48
|
+
"publishConfig": {
|
|
49
|
+
"registry": "https://registry.npmjs.org",
|
|
50
|
+
"access": "public",
|
|
51
|
+
"provenance": true
|
|
52
|
+
},
|
|
53
|
+
"scripts": {
|
|
54
|
+
"lint": "standard",
|
|
55
|
+
"test": "vitest run test/unit",
|
|
56
|
+
"prerelease": "npm run lint && npm run test",
|
|
57
|
+
"release": "release-it",
|
|
58
|
+
"test:provider": "vitest run test/integration/provider.test.js",
|
|
59
|
+
"test:multiturn": "vitest run test/integration/multiturn.test.js",
|
|
60
|
+
"test:vision": "vitest run test/integration/vision.test.js",
|
|
61
|
+
"test:live": "vitest run test/live"
|
|
62
|
+
},
|
|
63
|
+
"release-it": {
|
|
64
|
+
"hooks": {
|
|
65
|
+
"after:bump": "node scripts/sync-version.js"
|
|
66
|
+
},
|
|
67
|
+
"git": {
|
|
68
|
+
"commitMessage": "release: v${version}",
|
|
69
|
+
"requireUpstream": false,
|
|
70
|
+
"tagName": "v${version}",
|
|
71
|
+
"push": true
|
|
72
|
+
},
|
|
73
|
+
"npm": {
|
|
74
|
+
"publish": false
|
|
75
|
+
},
|
|
76
|
+
"github": {
|
|
77
|
+
"release": true,
|
|
78
|
+
"releaseName": "mohdel v${version}",
|
|
79
|
+
"releaseNotes": "awk '/^## \\[${version}\\]/{flag=1;next}/^## \\[/{flag=0}flag' CHANGELOG.md"
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
"optionalDependencies": {
|
|
83
|
+
"@clack/prompts": "^1.2.0",
|
|
84
|
+
"@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
|
|
85
|
+
"@opentelemetry/sdk-node": "^0.215.0",
|
|
86
|
+
"chalk": "^5.4.0",
|
|
87
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.90.0"
|
|
88
|
+
},
|
|
89
|
+
"dependencies": {
|
|
90
|
+
"@anthropic-ai/sdk": "^0.90.0",
|
|
91
|
+
"@cerebras/cerebras_cloud_sdk": "^1.61.1",
|
|
92
|
+
"@google/genai": "^1.50.1",
|
|
93
|
+
"@opentelemetry/api": "^1.9.1",
|
|
94
|
+
"env-paths": "^4.0.0",
|
|
95
|
+
"groq-sdk": "^1.1.2",
|
|
96
|
+
"openai": "^6.34.0"
|
|
97
|
+
},
|
|
98
|
+
"lint-staged": {
|
|
99
|
+
"*.{js,cjs}": "standard"
|
|
100
|
+
},
|
|
101
|
+
"devDependencies": {
|
|
102
|
+
"lint-staged": "^16.4.0",
|
|
103
|
+
"release-it": "^20.0.0",
|
|
104
|
+
"standard": "^17.1.2",
|
|
105
|
+
"vitest": "^4.1.5"
|
|
106
|
+
}
|
|
107
|
+
}
|
package/src/cli/ask.js
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
import mohdel, { silent } from '../lib/index.js'
|
|
2
|
+
import { loadDefaultEnv } from '../lib/common.js'
|
|
3
|
+
|
|
4
|
+
const noop = () => {}
|
|
5
|
+
|
|
6
|
+
export async function runAsk (args) {
|
|
7
|
+
if (args.includes('-h') || args.includes('--help')) {
|
|
8
|
+
console.log(`mohdel ask — one-shot inference, pipeable
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
mo ask <model> [prompt] Prompt from args
|
|
12
|
+
echo "prompt" | mo ask <model> Prompt from stdin
|
|
13
|
+
mo ask <model> "question" < file Combined: args + stdin
|
|
14
|
+
|
|
15
|
+
Options:
|
|
16
|
+
--effort <level> Thinking effort: high, medium, low, none
|
|
17
|
+
--budget <tokens> Output token budget
|
|
18
|
+
--json Output full result as JSON
|
|
19
|
+
--stream Stream output to stdout in real time
|
|
20
|
+
-v, --verbose Show debug info on stderr (cooldown, rate limit, SDK calls)
|
|
21
|
+
|
|
22
|
+
Output:
|
|
23
|
+
stdout: model output text (raw, no formatting — or JSON with --json)
|
|
24
|
+
stderr: model name + token usage summary
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
mo ask gemini/gemini-3-flash-preview "why is the sky blue"
|
|
28
|
+
cat article.txt | mo ask anthropic/claude-sonnet-4-6 "summarize this"
|
|
29
|
+
mo ask openai/gpt-5.4 --effort high "explain monads" --json | jq .cost`)
|
|
30
|
+
process.exit(0)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
loadDefaultEnv()
|
|
34
|
+
|
|
35
|
+
// Parse flags
|
|
36
|
+
const flagVal = (name) => {
|
|
37
|
+
const idx = args.indexOf(name)
|
|
38
|
+
if (idx === -1) return undefined
|
|
39
|
+
const val = args[idx + 1]
|
|
40
|
+
args.splice(idx, 2)
|
|
41
|
+
return val
|
|
42
|
+
}
|
|
43
|
+
const flag = (name) => {
|
|
44
|
+
const idx = args.indexOf(name)
|
|
45
|
+
if (idx === -1) return false
|
|
46
|
+
args.splice(idx, 1)
|
|
47
|
+
return true
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const json = flag('--json')
|
|
51
|
+
const stream = flag('--stream')
|
|
52
|
+
const verbose = flag('--verbose') || flag('-v')
|
|
53
|
+
const effort = flagVal('--effort')
|
|
54
|
+
const budget = flagVal('--budget')
|
|
55
|
+
|
|
56
|
+
// First remaining arg is model
|
|
57
|
+
const modelId = args[0]
|
|
58
|
+
if (!modelId) {
|
|
59
|
+
console.error('Usage: mo ask <model> [prompt]')
|
|
60
|
+
process.exit(1)
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Remaining args form the prompt
|
|
64
|
+
const promptArgs = args.slice(1).join(' ').trim()
|
|
65
|
+
|
|
66
|
+
// Read stdin if piped
|
|
67
|
+
let stdinContent = ''
|
|
68
|
+
if (!process.stdin.isTTY) {
|
|
69
|
+
const chunks = []
|
|
70
|
+
for await (const chunk of process.stdin) chunks.push(chunk)
|
|
71
|
+
stdinContent = Buffer.concat(chunks).toString('utf8').trim()
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Build prompt: args + stdin
|
|
75
|
+
const parts = [promptArgs, stdinContent].filter(Boolean)
|
|
76
|
+
const prompt = parts.join('\n\n')
|
|
77
|
+
|
|
78
|
+
if (!prompt) {
|
|
79
|
+
console.error('No prompt provided. Pass as argument or pipe via stdin.')
|
|
80
|
+
process.exit(1)
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const log = verbose ? (...args) => process.stderr.write(`${args.map(a => typeof a === 'string' ? a : JSON.stringify(a)).join(' ')}\n`) : noop
|
|
84
|
+
// Verbose mode routes info+warn+error+fatal (and debug) to stderr; trace stays silent.
|
|
85
|
+
// Non-verbose: only error/fatal go to stderr (everything else silent).
|
|
86
|
+
const askLogger = {
|
|
87
|
+
...silent,
|
|
88
|
+
debug: verbose ? log : noop,
|
|
89
|
+
info: log,
|
|
90
|
+
warn: log,
|
|
91
|
+
error: log,
|
|
92
|
+
fatal: log
|
|
93
|
+
}
|
|
94
|
+
const mo = await mohdel({ logger: askLogger })
|
|
95
|
+
let model
|
|
96
|
+
try {
|
|
97
|
+
model = mo.use(modelId)
|
|
98
|
+
} catch (err) {
|
|
99
|
+
console.error(err.message)
|
|
100
|
+
process.exit(1)
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const options = {}
|
|
104
|
+
if (effort) options.outputEffort = effort
|
|
105
|
+
if (budget) options.outputBudget = parseInt(budget, 10)
|
|
106
|
+
if (stream && !json) {
|
|
107
|
+
options.realtimeHandler = (delta) => process.stdout.write(delta)
|
|
108
|
+
options.bufferOpts = { maxChars: 1, maxMs: 0 }
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
process.stderr.write(`${model.id}\n`)
|
|
112
|
+
|
|
113
|
+
try {
|
|
114
|
+
const result = await model.answer(prompt, options)
|
|
115
|
+
const output = typeof result === 'string' ? result : result?.output || ''
|
|
116
|
+
const tokens = typeof result === 'object' ? result : {}
|
|
117
|
+
|
|
118
|
+
if (json) {
|
|
119
|
+
console.log(JSON.stringify({
|
|
120
|
+
model: model.id,
|
|
121
|
+
output,
|
|
122
|
+
inputTokens: tokens.inputTokens || 0,
|
|
123
|
+
outputTokens: tokens.outputTokens || 0,
|
|
124
|
+
thinkingTokens: tokens.thinkingTokens || 0,
|
|
125
|
+
cost: tokens.cost ?? null,
|
|
126
|
+
status: tokens.status || 'completed'
|
|
127
|
+
}, null, 2))
|
|
128
|
+
} else if (!stream) {
|
|
129
|
+
process.stdout.write(output)
|
|
130
|
+
if (output && !output.endsWith('\n')) process.stdout.write('\n')
|
|
131
|
+
} else {
|
|
132
|
+
// Stream already wrote to stdout; ensure trailing newline
|
|
133
|
+
if (output && !output.endsWith('\n')) process.stdout.write('\n')
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// Token + timing summary to stderr
|
|
137
|
+
const summary = []
|
|
138
|
+
if (tokens.inputTokens) summary.push(`${tokens.inputTokens} in`)
|
|
139
|
+
if (tokens.outputTokens) summary.push(`${tokens.outputTokens} out`)
|
|
140
|
+
if (tokens.thinkingTokens) summary.push(`${tokens.thinkingTokens} think`)
|
|
141
|
+
if (tokens.cost != null) summary.push(`$${tokens.cost.toFixed(4)}`)
|
|
142
|
+
const ts = tokens.timestamps
|
|
143
|
+
if (ts) {
|
|
144
|
+
const toMs = (a, b) => {
|
|
145
|
+
if (!a || !b) return null
|
|
146
|
+
const na = typeof a === 'bigint' ? a : BigInt(a)
|
|
147
|
+
const nb = typeof b === 'bigint' ? b : BigInt(b)
|
|
148
|
+
return Number(nb - na) / 1e6
|
|
149
|
+
}
|
|
150
|
+
const ttft = toMs(ts.start, ts.first)
|
|
151
|
+
const total = toMs(ts.start, ts.end)
|
|
152
|
+
if (ttft != null) summary.push(`${Math.round(ttft)}ms ttft`)
|
|
153
|
+
if (total != null) summary.push(`${Math.round(total)}ms total`)
|
|
154
|
+
}
|
|
155
|
+
if (summary.length) process.stderr.write(`${summary.join(', ')}\n`)
|
|
156
|
+
} catch (err) {
|
|
157
|
+
console.error(`Error: ${err.detail || err.message}`)
|
|
158
|
+
process.exit(1)
|
|
159
|
+
}
|
|
160
|
+
}
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
import { existsSync } from 'fs'
|
|
2
|
+
import { readFile, copyFile, stat } from 'fs/promises'
|
|
3
|
+
import { CURATED_PATH, BACKUP_SLOTS } from '../lib/common.js'
|
|
4
|
+
import { id, meta, ok, err, warn } from './colors.js'
|
|
5
|
+
|
|
6
|
+
export async function runBackup (args) {
|
|
7
|
+
const [action, slot] = args
|
|
8
|
+
|
|
9
|
+
if (!action || action === '-h' || action === '--help') {
|
|
10
|
+
console.log(`mohdel model backup — manage catalog backups
|
|
11
|
+
|
|
12
|
+
Usage:
|
|
13
|
+
model backup list Show backup slots with timestamps
|
|
14
|
+
model backup restore <slot> Restore from a backup slot
|
|
15
|
+
model backup diff <slot> Show changes between current and slot
|
|
16
|
+
|
|
17
|
+
Slots: prev (last save), daily (first save of the day), weekly (first save of the week)`)
|
|
18
|
+
process.exit(0)
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
if (action === 'list') {
|
|
22
|
+
const current = existsSync(CURATED_PATH) ? await stat(CURATED_PATH) : null
|
|
23
|
+
if (current) {
|
|
24
|
+
const entries = JSON.parse(await readFile(CURATED_PATH, 'utf8'))
|
|
25
|
+
const count = Object.keys(entries).length
|
|
26
|
+
console.log(` ${ok('●')} current ${meta(fmtDate(current.mtimeMs))} ${meta(`${count} models`)}`)
|
|
27
|
+
} else {
|
|
28
|
+
console.log(` ${meta('○')} current ${meta('(no catalog)')}`)
|
|
29
|
+
}
|
|
30
|
+
for (const s of BACKUP_SLOTS) {
|
|
31
|
+
const path = CURATED_PATH + '.' + s
|
|
32
|
+
if (existsSync(path)) {
|
|
33
|
+
const st = await stat(path)
|
|
34
|
+
const entries = JSON.parse(await readFile(path, 'utf8'))
|
|
35
|
+
const count = Object.keys(entries).length
|
|
36
|
+
console.log(` ${ok('●')} ${id(s.padEnd(7))} ${meta(fmtDate(st.mtimeMs))} ${meta(`${count} models`)}`)
|
|
37
|
+
} else {
|
|
38
|
+
console.log(` ${meta('○')} ${meta(s.padEnd(7))} ${meta('(empty)')}`)
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
if (action === 'restore') {
|
|
45
|
+
if (!slot || !BACKUP_SLOTS.includes(slot)) {
|
|
46
|
+
console.error(`Usage: model backup restore <${BACKUP_SLOTS.join('|')}>`)
|
|
47
|
+
process.exit(1)
|
|
48
|
+
}
|
|
49
|
+
const backupPath = CURATED_PATH + '.' + slot
|
|
50
|
+
if (!existsSync(backupPath)) {
|
|
51
|
+
console.error(err(`No backup in slot "${slot}"`))
|
|
52
|
+
process.exit(1)
|
|
53
|
+
}
|
|
54
|
+
// Rotate current to .prev before restoring
|
|
55
|
+
if (existsSync(CURATED_PATH)) {
|
|
56
|
+
await copyFile(CURATED_PATH, CURATED_PATH + '.prev')
|
|
57
|
+
}
|
|
58
|
+
await copyFile(backupPath, CURATED_PATH)
|
|
59
|
+
const entries = JSON.parse(await readFile(CURATED_PATH, 'utf8'))
|
|
60
|
+
console.log(`${ok('✓')} Restored from ${id(slot)} (${Object.keys(entries).length} models). Previous state saved to ${meta('prev')}.`)
|
|
61
|
+
return
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (action === 'diff') {
|
|
65
|
+
if (!slot || !BACKUP_SLOTS.includes(slot)) {
|
|
66
|
+
console.error(`Usage: model backup diff <${BACKUP_SLOTS.join('|')}>`)
|
|
67
|
+
process.exit(1)
|
|
68
|
+
}
|
|
69
|
+
const backupPath = CURATED_PATH + '.' + slot
|
|
70
|
+
if (!existsSync(backupPath)) {
|
|
71
|
+
console.error(err(`No backup in slot "${slot}"`))
|
|
72
|
+
process.exit(1)
|
|
73
|
+
}
|
|
74
|
+
if (!existsSync(CURATED_PATH)) {
|
|
75
|
+
console.error(err('No current catalog'))
|
|
76
|
+
process.exit(1)
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
const current = JSON.parse(await readFile(CURATED_PATH, 'utf8'))
|
|
80
|
+
const backup = JSON.parse(await readFile(backupPath, 'utf8'))
|
|
81
|
+
const currentKeys = new Set(Object.keys(current))
|
|
82
|
+
const backupKeys = new Set(Object.keys(backup))
|
|
83
|
+
|
|
84
|
+
const added = [...currentKeys].filter(k => !backupKeys.has(k))
|
|
85
|
+
const removed = [...backupKeys].filter(k => !currentKeys.has(k))
|
|
86
|
+
const changed = [...currentKeys].filter(k => backupKeys.has(k) && JSON.stringify(current[k]) !== JSON.stringify(backup[k]))
|
|
87
|
+
|
|
88
|
+
if (!added.length && !removed.length && !changed.length) {
|
|
89
|
+
console.log(meta('No differences'))
|
|
90
|
+
return
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
for (const k of added) console.log(`${ok('+')} ${id(k)}`)
|
|
94
|
+
for (const k of removed) console.log(`${err('-')} ${id(k)}`)
|
|
95
|
+
for (const k of changed) console.log(`${warn('~')} ${id(k)}`)
|
|
96
|
+
console.log(meta(`\n${added.length} added, ${removed.length} removed, ${changed.length} changed`))
|
|
97
|
+
return
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
console.error(`Unknown action: ${action}. Run "model backup --help".`)
|
|
101
|
+
process.exit(1)
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function fmtDate (ms) {
|
|
105
|
+
const d = new Date(ms)
|
|
106
|
+
return d.toLocaleDateString() + ' ' + d.toLocaleTimeString()
|
|
107
|
+
}
|
package/src/cli/bench.js
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
import { id, label, meta, price, err } from './colors.js'
|
|
2
|
+
import fs from 'node:fs/promises'
|
|
3
|
+
import path from 'node:path'
|
|
4
|
+
import mohdel from '../lib/index.js'
|
|
5
|
+
import { loadDefaultEnv } from '../lib/common.js'
|
|
6
|
+
import {
|
|
7
|
+
loadPrompt, parseJson, scoreCorrectness, computeCost,
|
|
8
|
+
computeTiming, formatNumber
|
|
9
|
+
} from '../lib/benchmark-score.js'
|
|
10
|
+
|
|
11
|
+
export async function runBench (args) {
|
|
12
|
+
if (args.includes('-h') || args.includes('--help')) {
|
|
13
|
+
console.log(`mohdel model bench — benchmark models with live inference
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
model bench <model> [options] Benchmark a single model
|
|
17
|
+
model bench --tag <tag> [options] Benchmark all models with a tag
|
|
18
|
+
|
|
19
|
+
Options:
|
|
20
|
+
--effort <level> Thinking effort: high, medium, low, none
|
|
21
|
+
--budget <tokens> Output token budget (default: 12000)
|
|
22
|
+
--prompt <path> Prompt file (default: test/benchmark.md)
|
|
23
|
+
--save <path> Save results to JSON file
|
|
24
|
+
--json Output as JSON (single model only)
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
mo bench anthropic/claude-sonnet-4-6
|
|
28
|
+
mo bench --tag fast --effort low
|
|
29
|
+
mo bench openai/gpt-5 --budget 8000 --save results.json`)
|
|
30
|
+
process.exit(0)
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
loadDefaultEnv()
|
|
34
|
+
|
|
35
|
+
// Parse flags
|
|
36
|
+
const flag = (name) => {
|
|
37
|
+
const idx = args.indexOf(name)
|
|
38
|
+
if (idx === -1) return false
|
|
39
|
+
args.splice(idx, 1)
|
|
40
|
+
return true
|
|
41
|
+
}
|
|
42
|
+
const flagVal = (name) => {
|
|
43
|
+
const idx = args.indexOf(name)
|
|
44
|
+
if (idx === -1) return undefined
|
|
45
|
+
const val = args[idx + 1]
|
|
46
|
+
args.splice(idx, 2)
|
|
47
|
+
return val
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
const json = flag('--json')
|
|
51
|
+
const effort = flagVal('--effort')
|
|
52
|
+
const budget = parseInt(flagVal('--budget') || '12000', 10)
|
|
53
|
+
const promptPath = flagVal('--prompt') || 'test/benchmark.md'
|
|
54
|
+
const savePath = flagVal('--save')
|
|
55
|
+
const tags = []
|
|
56
|
+
let t
|
|
57
|
+
while ((t = flagVal('--tag'))) tags.push(t)
|
|
58
|
+
|
|
59
|
+
const mo = await mohdel()
|
|
60
|
+
|
|
61
|
+
if (tags.length) {
|
|
62
|
+
await runSuite(mo, { tags, effort, budget, promptPath, savePath })
|
|
63
|
+
} else {
|
|
64
|
+
const modelId = args[0]
|
|
65
|
+
if (!modelId) {
|
|
66
|
+
console.error('Provide a model ID or --tag. Run "mo model bench --help".')
|
|
67
|
+
process.exit(1)
|
|
68
|
+
}
|
|
69
|
+
await runSingle(mo, modelId, { effort, budget, promptPath, savePath, json })
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// --- Single model ---
|
|
74
|
+
|
|
75
|
+
async function runSingle (mo, modelId, { effort, budget, promptPath, savePath, json }) {
|
|
76
|
+
const prompt = await loadPrompt(promptPath)
|
|
77
|
+
const model = mo.use(modelId)
|
|
78
|
+
const info = model.info()
|
|
79
|
+
const pricing = resolvePricing(info)
|
|
80
|
+
|
|
81
|
+
const result = await benchmarkModel(model, prompt, { effort, budget, pricing })
|
|
82
|
+
|
|
83
|
+
if (savePath) {
|
|
84
|
+
await fs.writeFile(path.resolve(savePath), JSON.stringify(result, null, 2))
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
if (json) {
|
|
88
|
+
console.log(JSON.stringify(result, null, 2))
|
|
89
|
+
} else {
|
|
90
|
+
printSingleResult(result)
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// --- Suite (multi-model by tag) ---
|
|
95
|
+
|
|
96
|
+
async function runSuite (mo, { tags, effort, budget, promptPath, savePath }) {
|
|
97
|
+
const prompt = await loadPrompt(promptPath)
|
|
98
|
+
const seen = new Set()
|
|
99
|
+
const models = []
|
|
100
|
+
for (const tag of tags) {
|
|
101
|
+
for (const m of mo.list(tag)) {
|
|
102
|
+
if (!seen.has(m.value)) {
|
|
103
|
+
seen.add(m.value)
|
|
104
|
+
models.push(m)
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (!models.length) {
|
|
110
|
+
console.error(err(`No models found with tags: ${tags.join(', ')}`))
|
|
111
|
+
process.exit(1)
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
const results = []
|
|
115
|
+
for (let i = 0; i < models.length; i++) {
|
|
116
|
+
const { value, label } = models[i]
|
|
117
|
+
process.stderr.write(`[${i + 1}/${models.length}] ${value}...`)
|
|
118
|
+
|
|
119
|
+
try {
|
|
120
|
+
const model = mo.use(value)
|
|
121
|
+
const pricing = resolvePricing(model.info())
|
|
122
|
+
const result = await benchmarkModel(model, prompt, { effort, budget, pricing })
|
|
123
|
+
results.push(result)
|
|
124
|
+
process.stderr.write(` ${result.correctness.toFixed(3)}\n`)
|
|
125
|
+
} catch (e) {
|
|
126
|
+
process.stderr.write(` ${err('FAILED')}: ${e.message}\n`)
|
|
127
|
+
results.push({ model: value, label, correctness: null, cost: null, correctnessPerDollar: null, error: e.message })
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
results.sort((a, b) => {
|
|
132
|
+
if (a.correctnessPerDollar === null && b.correctnessPerDollar === null) return 0
|
|
133
|
+
if (a.correctnessPerDollar === null) return 1
|
|
134
|
+
if (b.correctnessPerDollar === null) return -1
|
|
135
|
+
return b.correctnessPerDollar - a.correctnessPerDollar
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
printSuiteTable(results)
|
|
139
|
+
|
|
140
|
+
if (savePath) {
|
|
141
|
+
await fs.writeFile(path.resolve(savePath), JSON.stringify(results, null, 2))
|
|
142
|
+
process.stderr.write(`\nResults saved to ${path.resolve(savePath)}\n`)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// --- Shared benchmark runner ---
|
|
147
|
+
|
|
148
|
+
async function benchmarkModel (model, prompt, { effort, budget, pricing }) {
|
|
149
|
+
const runTag = `[run:${Date.now()}-${Math.random().toString(36).slice(2, 8)}]`
|
|
150
|
+
const minimalBudget = Math.min(budget || 0, 32) || 32
|
|
151
|
+
|
|
152
|
+
const minimalResponse = await model.answer(`${runTag} say ack`, { outputBudget: minimalBudget, outputEffort: effort })
|
|
153
|
+
const response = await model.answer(`${runTag}\n${prompt}`, { outputBudget: budget, outputEffort: effort })
|
|
154
|
+
|
|
155
|
+
const rawOutput = typeof response === 'string' ? response : response?.output || ''
|
|
156
|
+
const parsed = parseJson(rawOutput)
|
|
157
|
+
const scoring = scoreCorrectness(parsed)
|
|
158
|
+
|
|
159
|
+
const minimalTiming = computeTiming(typeof minimalResponse === 'object' ? minimalResponse.timestamps : {})
|
|
160
|
+
const standardTiming = computeTiming(typeof response === 'object' ? response.timestamps : {})
|
|
161
|
+
|
|
162
|
+
const generationSeconds = standardTiming.generationMs !== null ? standardTiming.generationMs / 1000 : null
|
|
163
|
+
const outputTokens = Number.isFinite(response?.outputTokens) ? response.outputTokens : null
|
|
164
|
+
|
|
165
|
+
const tokens = {
|
|
166
|
+
input: response?.inputTokens ?? null,
|
|
167
|
+
output: response?.outputTokens ?? null,
|
|
168
|
+
thinking: response?.thinkingTokens ?? null
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
const costDollars = computeCost(tokens, pricing)
|
|
172
|
+
|
|
173
|
+
return {
|
|
174
|
+
model: model.id,
|
|
175
|
+
label: model.label,
|
|
176
|
+
correctness: formatNumber(scoring.correctness),
|
|
177
|
+
cost: costDollars !== null ? formatNumber(costDollars) : null,
|
|
178
|
+
correctnessPerDollar: costDollars > 0 ? formatNumber(scoring.correctness / costDollars) : null,
|
|
179
|
+
breakdown: Object.fromEntries(
|
|
180
|
+
Object.entries(scoring.breakdown).map(([k, v]) => [k, formatNumber(v)])
|
|
181
|
+
),
|
|
182
|
+
tokens,
|
|
183
|
+
pricing: pricing ? { inputPerMillion: pricing.input, outputPerMillion: pricing.output, thinkingPerMillion: pricing.thinking } : null,
|
|
184
|
+
parse: { ok: parsed.ok, error: parsed.ok ? null : parsed.error, extraneous: parsed.extraneous },
|
|
185
|
+
timing: { minimal: minimalTiming, standard: standardTiming },
|
|
186
|
+
throughput: {
|
|
187
|
+
outputTokensPerSecond: outputTokens !== null && generationSeconds > 0 ? formatNumber(outputTokens / generationSeconds) : null,
|
|
188
|
+
charactersPerSecond: generationSeconds > 0 ? formatNumber(rawOutput.length / generationSeconds) : null
|
|
189
|
+
},
|
|
190
|
+
latencyMs: { minimal: minimalTiming.latencyMs, standard: standardTiming.latencyMs },
|
|
191
|
+
requested: { outputBudget: budget, outputEffort: effort || null },
|
|
192
|
+
details: scoring.details,
|
|
193
|
+
raw: rawOutput
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// --- Helpers ---
|
|
198
|
+
|
|
199
|
+
function resolvePricing (info) {
|
|
200
|
+
if (!info) return null
|
|
201
|
+
const rp = p => typeof p === 'number' ? p : (p?.default ?? 0)
|
|
202
|
+
return { input: rp(info.inputPrice), output: rp(info.outputPrice), thinking: rp(info.thinkingPrice) }
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// --- Output ---
|
|
206
|
+
|
|
207
|
+
function printSingleResult (r) {
|
|
208
|
+
console.log(`\n${label(r.label)} ${meta(`(${r.model})`)}`)
|
|
209
|
+
console.log(`${meta('correctness:')} ${id(r.correctness?.toFixed(3) || '—')}`)
|
|
210
|
+
console.log(`${meta('cost:')} ${r.cost != null ? price('$' + r.cost.toFixed(4)) : '—'}`)
|
|
211
|
+
console.log(`${meta('corr/$:')} ${r.correctnessPerDollar?.toFixed(1) || '—'}`)
|
|
212
|
+
console.log(`${meta('latency:')} ${r.latencyMs.standard != null ? r.latencyMs.standard.toFixed(0) + 'ms' : '—'}`)
|
|
213
|
+
console.log(`${meta('throughput:')} ${r.throughput.outputTokensPerSecond || '—'} tok/s`)
|
|
214
|
+
console.log(`${meta('tokens:')} ${r.tokens.input || 0} in, ${r.tokens.output || 0} out, ${r.tokens.thinking || 0} thinking`)
|
|
215
|
+
|
|
216
|
+
if (r.breakdown) {
|
|
217
|
+
const parts = Object.entries(r.breakdown).map(([k, v]) => `${k}=${v?.toFixed(3) || '—'}`)
|
|
218
|
+
console.log(`${meta('breakdown:')} ${parts.join(' ')}`)
|
|
219
|
+
}
|
|
220
|
+
console.log()
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
const pad = (str, len) => {
|
|
224
|
+
const s = String(str)
|
|
225
|
+
return s.length >= len ? s : s + ' '.repeat(len - s.length)
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
const fmtNum = (v, width) => {
|
|
229
|
+
if (v === null || v === undefined) return pad('-', width)
|
|
230
|
+
return pad(v.toFixed(3), width)
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function printSuiteTable (results) {
|
|
234
|
+
const colModel = 35
|
|
235
|
+
const colNum = 9
|
|
236
|
+
const header = pad('Model', colModel) +
|
|
237
|
+
pad('Correct', colNum) +
|
|
238
|
+
pad('Cost($)', colNum) +
|
|
239
|
+
pad('Corr/$', colNum) +
|
|
240
|
+
pad('Entities', colNum) +
|
|
241
|
+
pad('Metrics', colNum) +
|
|
242
|
+
pad('Contrad.', colNum)
|
|
243
|
+
console.log('\n' + meta(header))
|
|
244
|
+
console.log(meta('─'.repeat(header.length)))
|
|
245
|
+
|
|
246
|
+
for (const r of results) {
|
|
247
|
+
if (r.error) {
|
|
248
|
+
console.log(pad(r.model, colModel) + err('FAILED: ' + r.error))
|
|
249
|
+
continue
|
|
250
|
+
}
|
|
251
|
+
console.log(
|
|
252
|
+
pad(r.model, colModel) +
|
|
253
|
+
fmtNum(r.correctness, colNum) +
|
|
254
|
+
fmtNum(r.cost, colNum) +
|
|
255
|
+
fmtNum(r.correctnessPerDollar, colNum) +
|
|
256
|
+
fmtNum(r.breakdown?.entities, colNum) +
|
|
257
|
+
fmtNum(r.breakdown?.metrics, colNum) +
|
|
258
|
+
fmtNum(r.breakdown?.contradictions, colNum)
|
|
259
|
+
)
|
|
260
|
+
}
|
|
261
|
+
console.log()
|
|
262
|
+
}
|