mohdel 0.101.0 → 0.103.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -330,14 +330,16 @@ function buildRequest (envelope, spec, config) {
330
330
 
331
331
  if (spec.thinkingEffortLevels) {
332
332
  const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
333
- if (effort && effort !== 'none') {
333
+ if (effort && spec.thinkingEffortLevels[effort] != null) {
334
334
  const headroom = spec.thinkingEffortLevels[effort]
335
335
  if (args.max_tokens && typeof headroom === 'number') {
336
336
  args.max_tokens += headroom
337
337
  }
338
- delete args.temperature
338
+ // When reasoning is disabled ('none') the model accepts
339
+ // temperature again — only strip it when reasoning is on.
340
+ if (effort !== 'none') delete args.temperature
339
341
  if (config.reasoningField === 'cerebras_zai' && /zai/i.test(bareOf(envelope.model))) {
340
- args.disable_reasoning = false
342
+ args.disable_reasoning = (effort === 'none')
341
343
  } else {
342
344
  args.reasoning_effort = effort
343
345
  }
@@ -203,12 +203,18 @@ export async function * anthropic (envelope, deps = {}) {
203
203
  }
204
204
 
205
205
  const end = String(process.hrtime.bigint())
206
- // Estimate thinking tokens from streamed thinking_delta char count
207
- // (Anthropic API doesn't report them separately). Cap at total
208
- // output tokens reported by usage.
206
+ // Estimate thinking tokens. Primary path: count streamed thinking_delta
207
+ // chars (sonnet emits these). Fallback: gap between Anthropic's reported
208
+ // output_tokens and what actually streamed as visible output (text +
209
+ // tool input JSON) — catches redacted_thinking blocks (opus 4.7 default)
210
+ // that consume output tokens but emit no streaming deltas.
211
+ const streamedOutput = currentOutput()
212
+ const streamedOutputChars = streamedOutput.length +
213
+ [...toolBlocks.values()].reduce((s, b) => s + b.inputJson.length, 0)
214
+ const streamedOutputTokens = Math.ceil(streamedOutputChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN)
209
215
  const estimatedThinkingTokens = thinkingChars > 0
210
216
  ? Math.min(Math.ceil(thinkingChars / ANTHROPIC_THINKING_CHARS_PER_TOKEN), outputTokens)
211
- : 0
217
+ : Math.max(0, outputTokens - streamedOutputTokens)
212
218
  const messageOutputTokens = Math.max(0, outputTokens - estimatedThinkingTokens)
213
219
 
214
220
  /** @type {import('#core/events.js').DoneEvent} */
@@ -216,7 +222,7 @@ export async function * anthropic (envelope, deps = {}) {
216
222
  type: 'done',
217
223
  result: {
218
224
  status,
219
- output: currentOutput() || null,
225
+ output: streamedOutput || null,
220
226
  inputTokens,
221
227
  outputTokens: messageOutputTokens,
222
228
  thinkingTokens: estimatedThinkingTokens,
@@ -225,19 +225,18 @@ function buildRequest (envelope, input, instructions) {
225
225
 
226
226
  // Thinking: when the spec has `thinkingEffortLevels`, set
227
227
  // `reasoning.effort` and add the thinking-budget headroom on top
228
- // of the user's `outputBudget`. `reasoning` is an OpenAI-only
229
- // parameter xAI reasoning is automatic, so add the headroom
230
- // but skip the request field on xAI.
228
+ // of the user's `outputBudget`. Both OpenAI (gpt-5.x) and xAI
229
+ // (grok-4.3+) accept the same `reasoning: { effort }` shape on
230
+ // the Responses API, including the literal value 'none' to
231
+ // disable reasoning entirely.
231
232
  if (spec?.thinkingEffortLevels) {
232
233
  const effort = envelope.outputEffort ?? spec.defaultThinkingEffort ?? 'low'
233
- if (effort && effort !== 'none') {
234
+ if (effort && spec.thinkingEffortLevels[effort] != null) {
234
235
  const headroom = spec.thinkingEffortLevels[effort]
235
236
  if (request.max_output_tokens && typeof headroom === 'number') {
236
237
  request.max_output_tokens += headroom
237
238
  }
238
- if (provider === 'openai') {
239
- request.reasoning = { effort }
240
- }
239
+ request.reasoning = { effort }
241
240
  }
242
241
  }
243
242
 
package/js/session/run.js CHANGED
@@ -264,12 +264,16 @@ export async function * run (envelope, {
264
264
  function normalizeModelEffort (envelope, resolveSpec) {
265
265
  const candidate = effortOf(envelope.model)
266
266
  if (!candidate) return { envelope }
267
- if (envelope.outputEffort) return { envelope } // explicit wins
268
267
 
269
268
  const base = catalogKey(envelope.model)
270
269
  const baseSpec = resolveSpec(base)
271
270
  if (!baseSpec) return { envelope } // base not known — let full string fall through to not-found
272
271
 
272
+ // Explicit outputEffort wins; still strip the suffix so spans/logs see the canonical id.
273
+ if (envelope.outputEffort) {
274
+ return { envelope: { ...envelope, model: base } }
275
+ }
276
+
273
277
  if (!baseSpec.thinkingEffortLevels) {
274
278
  return {
275
279
  envelope,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mohdel",
3
- "version": "0.101.0",
3
+ "version": "0.103.0",
4
4
  "license": "MIT",
5
5
  "author": {
6
6
  "name": "Christophe Le Bars",
@@ -84,19 +84,19 @@
84
84
  },
85
85
  "optionalDependencies": {
86
86
  "@clack/prompts": "^1.3.0",
87
- "@opentelemetry/exporter-trace-otlp-grpc": "^0.216.0",
88
- "@opentelemetry/sdk-node": "^0.216.0",
87
+ "@opentelemetry/exporter-trace-otlp-grpc": "^0.217.0",
88
+ "@opentelemetry/sdk-node": "^0.217.0",
89
89
  "chalk": "^5.4.0",
90
- "mohdel-thin-gate-linux-x64-gnu": "0.101.0"
90
+ "mohdel-thin-gate-linux-x64-gnu": "0.103.0"
91
91
  },
92
92
  "dependencies": {
93
- "@anthropic-ai/sdk": "^0.91.1",
93
+ "@anthropic-ai/sdk": "^0.95.1",
94
94
  "@cerebras/cerebras_cloud_sdk": "^1.61.1",
95
- "@google/genai": "^1.51.0",
95
+ "@google/genai": "^2.0.0",
96
96
  "@opentelemetry/api": "^1.9.1",
97
97
  "env-paths": "^4.0.0",
98
98
  "groq-sdk": "^1.1.2",
99
- "openai": "^6.35.0",
99
+ "openai": "^6.37.0",
100
100
  "undici": "^7.24.5"
101
101
  },
102
102
  "lint-staged": {
@@ -104,7 +104,7 @@
104
104
  },
105
105
  "devDependencies": {
106
106
  "gpt-tokenizer": "^3.4.0",
107
- "lint-staged": "^16.4.0",
107
+ "lint-staged": "^17.0.3",
108
108
  "release-it": "^20.0.1",
109
109
  "standard": "^17.1.2",
110
110
  "vitest": "^4.1.5"