mohdel 0.101.0 → 0.103.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -330,14 +330,16 @@ function buildRequest (envelope, spec, config) {
|
|
|
330
330
|
|
|
331
331
|
if (spec.thinkingEffortLevels) {
|
|
332
332
|
const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
|
|
333
|
-
if (effort && effort
|
|
333
|
+
if (effort && spec.thinkingEffortLevels[effort] != null) {
|
|
334
334
|
const headroom = spec.thinkingEffortLevels[effort]
|
|
335
335
|
if (args.max_tokens && typeof headroom === 'number') {
|
|
336
336
|
args.max_tokens += headroom
|
|
337
337
|
}
|
|
338
|
-
|
|
338
|
+
// When reasoning is disabled ('none') the model accepts
|
|
339
|
+
// temperature again — only strip it when reasoning is on.
|
|
340
|
+
if (effort !== 'none') delete args.temperature
|
|
339
341
|
if (config.reasoningField === 'cerebras_zai' && /zai/i.test(bareOf(envelope.model))) {
|
|
340
|
-
args.disable_reasoning =
|
|
342
|
+
args.disable_reasoning = (effort === 'none')
|
|
341
343
|
} else {
|
|
342
344
|
args.reasoning_effort = effort
|
|
343
345
|
}
|
|
@@ -203,12 +203,18 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
203
203
|
}
|
|
204
204
|
|
|
205
205
|
const end = String(process.hrtime.bigint())
|
|
206
|
-
// Estimate thinking tokens
|
|
207
|
-
// (
|
|
208
|
-
//
|
|
206
|
+
// Estimate thinking tokens. Primary path: count streamed thinking_delta
|
|
207
|
+
// chars (sonnet emits these). Fallback: gap between Anthropic's reported
|
|
208
|
+
// output_tokens and what actually streamed as visible output (text +
|
|
209
|
+
// tool input JSON) — catches redacted_thinking blocks (opus 4.7 default)
|
|
210
|
+
// that consume output tokens but emit no streaming deltas.
|
|
211
|
+
const streamedOutput = currentOutput()
|
|
212
|
+
const streamedOutputChars = streamedOutput.length +
|
|
213
|
+
[...toolBlocks.values()].reduce((s, b) => s + b.inputJson.length, 0)
|
|
214
|
+
const streamedOutputTokens = Math.ceil(streamedOutputChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN)
|
|
209
215
|
const estimatedThinkingTokens = thinkingChars > 0
|
|
210
216
|
? Math.min(Math.ceil(thinkingChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN), outputTokens)
|
|
211
|
-
: 0
|
|
217
|
+
: Math.max(0, outputTokens - streamedOutputTokens)
|
|
212
218
|
const messageOutputTokens = Math.max(0, outputTokens - estimatedThinkingTokens)
|
|
213
219
|
|
|
214
220
|
/** @type {import('#core/events.js').DoneEvent} */
|
|
@@ -216,7 +222,7 @@ export async function * anthropic (envelope, deps = {}) {
|
|
|
216
222
|
type: 'done',
|
|
217
223
|
result: {
|
|
218
224
|
status,
|
|
219
|
-
output:
|
|
225
|
+
output: streamedOutput || null,
|
|
220
226
|
inputTokens,
|
|
221
227
|
outputTokens: messageOutputTokens,
|
|
222
228
|
thinkingTokens: estimatedThinkingTokens,
|
|
@@ -225,19 +225,18 @@ function buildRequest (envelope, input, instructions) {
|
|
|
225
225
|
|
|
226
226
|
// Thinking: when the spec has `thinkingEffortLevels`, set
|
|
227
227
|
// `reasoning.effort` and add the thinking-budget headroom on top
|
|
228
|
-
// of the user's `outputBudget`.
|
|
229
|
-
//
|
|
230
|
-
//
|
|
228
|
+
// of the user's `outputBudget`. Both OpenAI (gpt-5.x) and xAI
|
|
229
|
+
// (grok-4.3+) accept the same `reasoning: { effort }` shape on
|
|
230
|
+
// the Responses API, including the literal value 'none' to
|
|
231
|
+
// disable reasoning entirely.
|
|
231
232
|
if (spec?.thinkingEffortLevels) {
|
|
232
233
|
const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
|
|
233
|
-
if (effort && effort
|
|
234
|
+
if (effort && spec.thinkingEffortLevels[effort] != null) {
|
|
234
235
|
const headroom = spec.thinkingEffortLevels[effort]
|
|
235
236
|
if (request.max_output_tokens && typeof headroom === 'number') {
|
|
236
237
|
request.max_output_tokens += headroom
|
|
237
238
|
}
|
|
238
|
-
|
|
239
|
-
request.reasoning = { effort }
|
|
240
|
-
}
|
|
239
|
+
request.reasoning = { effort }
|
|
241
240
|
}
|
|
242
241
|
}
|
|
243
242
|
|
package/js/session/run.js
CHANGED
|
@@ -264,12 +264,16 @@ export async function * run (envelope, {
|
|
|
264
264
|
function normalizeModelEffort (envelope, resolveSpec) {
|
|
265
265
|
const candidate = effortOf(envelope.model)
|
|
266
266
|
if (!candidate) return { envelope }
|
|
267
|
-
if (envelope.outputEffort) return { envelope } // explicit wins
|
|
268
267
|
|
|
269
268
|
const base = catalogKey(envelope.model)
|
|
270
269
|
const baseSpec = resolveSpec(base)
|
|
271
270
|
if (!baseSpec) return { envelope } // base not known — let full string fall through to not-found
|
|
272
271
|
|
|
272
|
+
// Explicit outputEffort wins; still strip the suffix so spans/logs see the canonical id.
|
|
273
|
+
if (envelope.outputEffort) {
|
|
274
|
+
return { envelope: { ...envelope, model: base } }
|
|
275
|
+
}
|
|
276
|
+
|
|
273
277
|
if (!baseSpec.thinkingEffortLevels) {
|
|
274
278
|
return {
|
|
275
279
|
envelope,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "mohdel",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.103.0",
|
|
4
4
|
"license": "MIT",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "Christophe Le Bars",
|
|
@@ -84,19 +84,19 @@
|
|
|
84
84
|
},
|
|
85
85
|
"optionalDependencies": {
|
|
86
86
|
"@clack/prompts": "^1.3.0",
|
|
87
|
-
"@opentelemetry/exporter-trace-otlp-grpc": "^0.
|
|
88
|
-
"@opentelemetry/sdk-node": "^0.
|
|
87
|
+
"@opentelemetry/exporter-trace-otlp-grpc": "^0.217.0",
|
|
88
|
+
"@opentelemetry/sdk-node": "^0.217.0",
|
|
89
89
|
"chalk": "^5.4.0",
|
|
90
|
-
"mohdel-thin-gate-linux-x64-gnu": "0.
|
|
90
|
+
"mohdel-thin-gate-linux-x64-gnu": "0.103.0"
|
|
91
91
|
},
|
|
92
92
|
"dependencies": {
|
|
93
|
-
"@anthropic-ai/sdk": "^0.
|
|
93
|
+
"@anthropic-ai/sdk": "^0.95.1",
|
|
94
94
|
"@cerebras/cerebras_cloud_sdk": "^1.61.1",
|
|
95
|
-
"@google/genai": "^
|
|
95
|
+
"@google/genai": "^2.0.0",
|
|
96
96
|
"@opentelemetry/api": "^1.9.1",
|
|
97
97
|
"env-paths": "^4.0.0",
|
|
98
98
|
"groq-sdk": "^1.1.2",
|
|
99
|
-
"openai": "^6.
|
|
99
|
+
"openai": "^6.37.0",
|
|
100
100
|
"undici": "^7.24.5"
|
|
101
101
|
},
|
|
102
102
|
"lint-staged": {
|
|
@@ -104,7 +104,7 @@
|
|
|
104
104
|
},
|
|
105
105
|
"devDependencies": {
|
|
106
106
|
"gpt-tokenizer": "^3.4.0",
|
|
107
|
-
"lint-staged": "^
|
|
107
|
+
"lint-staged": "^17.0.3",
|
|
108
108
|
"release-it": "^20.0.1",
|
|
109
109
|
"standard": "^17.1.2",
|
|
110
110
|
"vitest": "^4.1.5"
|