npm - mohdel - Versions diffs - 0.91.0 → 0.93.0 - Mend

mohdel 0.91.0 → 0.93.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/js/core/events.js +13 -0
package/js/session/run.js +53 -10
package/package.json +3 -3
package/src/lib/catalog/anthropic.js +44 -0
package/src/lib/catalog/cerebras.js +26 -0
package/src/lib/catalog/fireworks.js +38 -0
package/src/lib/catalog/gemini.js +46 -0
package/src/lib/catalog/groq.js +30 -0
package/src/lib/catalog/openai.js +34 -0
package/src/lib/catalog/openrouter.js +48 -0
package/src/lib/select.js +15 -5

package/js/core/events.js CHANGED Viewed

@@ -64,6 +64,13 @@
  *   `'insufficientOutputBudget' | 'cancelled' | ...` additive union.
  * @property {ToolCall[]} [toolCalls]
  *   Present when `status === 'tool_use'`.
+ * @property {number} [maxInterFrameMs]
+ *   Longest gap (ms) between adapter events during the call —
+ *   from `startedAt` to the first frame, between consecutive
+ *   frames, and from the last frame to the terminal. Direct signal
+ *   for calibrating downstream read timeouts: a 15-min call that
+ *   streams deltas every 30s is safe; a 5-min call with zero
+ *   intermediate frames is dangerous.
  */
 /**
@@ -83,6 +90,12 @@
  * @property {string} id
  * @property {string} name
  * @property {object} arguments
+ * @property {string} [thoughtSignature]
+ *   Provider-specific opaque blob carried through tool-call
+ *   round-trips to preserve thinking state continuity. Set by the
+ *   Gemini adapter when the model emits one; absent for other
+ *   providers. Callers replaying tool results should pass the
+ *   ToolCall back unchanged so the adapter can re-attach it.
  */
 export const EVENT_TYPES = Object.freeze(['delta', 'done', 'error'])

package/js/session/run.js CHANGED Viewed

@@ -141,9 +141,33 @@ export async function * run (envelope, {
   }
   let sawTerminal = false
+  // Track whether the adapter emitted any `delta` events during the
+  // call. Recorded as `mohdel.stream` on the span at finalize time —
+  // a non-streaming terminal (adapter fell back to wait-for-done, or
+  // the provider SDK collapsed stream events) is a hazard for any
+  // host-side read-timeout watchdog and shows up in traces before
+  // it turns into an abort downstream.
+  let sawDelta = false
+  // Track the longest gap between adapter events within this call —
+  // from `startedAt` to the first frame, between consecutive frames,
+  // and from the last frame to the terminal. A direct signal for any
+  // host-side read-timeout watchdog: a 15-min call that streams
+  // deltas every 30s is safe; a 5-min call with zero intermediate
+  // frames is dangerous. Surfaced on `AnswerResult.maxInterFrameMs`
+  // and the span's `mohdel.max_inter_frame_ms` attribute so hosts
+  // can aggregate it for adaptive-timeout calibration.
+  let lastFrameAt = startedAt
+  let maxInterFrameMs = 0
   try {
     for await (const ev of adapter(envelope, { signal, log, span })) {
-      if (ev.type === 'done') {
+      const now = Date.now()
+      const gap = now - lastFrameAt
+      if (gap > maxInterFrameMs) maxInterFrameMs = gap
+      lastFrameAt = now
+      if (ev.type === 'delta') {
+        sawDelta = true
+      } else if (ev.type === 'done') {
         sawTerminal = true
         // A cancelled terminal is the caller's action, not evidence
         // of provider recovery — don't wipe an accumulated failure
@@ -160,7 +184,11 @@ export async function * run (envelope, {
             (ev.result.thinkingTokens || 0)
           if (total > 0) limiter.recordTokens(bucketKey, total)
         }
-        finalizeSpanOk(span, ev.result)
+        // Surface on AnswerResult so hosts that pass the whole
+        // result upstream pick it up without needing a separate
+        // wire field.
+        if (ev.result) ev.result.maxInterFrameMs = maxInterFrameMs
+        finalizeSpanOk(span, ev.result, sawDelta, maxInterFrameMs)
         log.debug(summarizeDone(ev.result, startedAt), '[mohdel:answer] done')
       } else if (ev.type === 'error') {
         sawTerminal = true
@@ -168,7 +196,8 @@ export async function * run (envelope, {
         log.warn({
           err: ev.error,
           provider: envelope.provider,
-          totalMs: Date.now() - startedAt
+          totalMs: Date.now() - startedAt,
+          maxInterFrameMs
         }, '[mohdel:answer] failed')
         endSpanError(span, new Error(ev.error?.message || 'adapter error'))
       }
@@ -177,11 +206,12 @@ export async function * run (envelope, {
   } catch (e) {
     if (signal?.aborted && !sawTerminal) {
       const fallback = cancelledFallback()
-      finalizeSpanOk(span, fallback.result)
+      if (fallback.result) fallback.result.maxInterFrameMs = maxInterFrameMs
+      finalizeSpanOk(span, fallback.result, sawDelta, maxInterFrameMs)
       yield fallback
       return
     }
-    log.warn({ err: e, provider: envelope.provider }, '[mohdel:answer] adapter threw')
+    log.warn({ err: e, provider: envelope.provider, maxInterFrameMs }, '[mohdel:answer] adapter threw')
     endSpanError(span, e)
     yield errorEvent(messageOf(e), 'SESSION_ADAPTER_THREW')
     return
@@ -190,11 +220,12 @@ export async function * run (envelope, {
   if (!sawTerminal) {
     if (signal?.aborted) {
       const fallback = cancelledFallback()
-      finalizeSpanOk(span, fallback.result)
+      if (fallback.result) fallback.result.maxInterFrameMs = maxInterFrameMs
+      finalizeSpanOk(span, fallback.result, sawDelta, maxInterFrameMs)
       yield fallback
     } else {
       const err = 'adapter returned without a terminal event'
-      log.error({ provider: envelope.provider }, '[mohdel:answer] no terminal event')
+      log.error({ provider: envelope.provider, maxInterFrameMs }, '[mohdel:answer] no terminal event')
       endSpanError(span, new Error(err))
       yield errorEvent(err, 'SESSION_ADAPTER_NO_TERMINAL')
     }
@@ -288,14 +319,25 @@ function scopedLogger (logger, envelope, span) {
 /**
  * @param {any} span
  * @param {any} result
+ * @param {boolean} sawDelta  Whether the adapter emitted at least one
+ *   `delta` event during the call. Surfaces as `mohdel.stream` on the
+ *   span so traces can flag calls that completed without streaming —
+ *   a non-streaming long-running call is invisible to upstream idle
+ *   watchdogs until the terminal lands.
+ * @param {number} maxInterFrameMs  Longest gap between adapter events
+ *   during the call (including pre-first-frame and post-last-frame).
+ *   Surfaces as `mohdel.max_inter_frame_ms` so downstream timeout
+ *   calibration has a direct signal independent of total elapsed.
  */
-function finalizeSpanOk (span, result) {
+function finalizeSpanOk (span, result, sawDelta = false, maxInterFrameMs = 0) {
   /** @type {Record<string, any>} */
   const attrs = {
     'gen_ai.usage.input_tokens': result?.inputTokens || 0,
     'gen_ai.usage.output_tokens': result?.outputTokens || 0,
     'mohdel.thinking_tokens': result?.thinkingTokens || 0,
-    'mohdel.status': result?.status || 'unknown'
+    'mohdel.status': result?.status || 'unknown',
+    'mohdel.stream': !!sawDelta,
+    'mohdel.max_inter_frame_ms': maxInterFrameMs
   }
   if (result?.cost != null) attrs['mohdel.cost'] = result.cost
   if (result?.warning) attrs['mohdel.warning'] = result.warning
@@ -323,7 +365,8 @@ function summarizeDone (result, startedAt) {
     think: result?.thinkingTokens || 0,
     cost: result?.cost,
     warning: result?.warning,
-    totalMs: Date.now() - startedAt
+    totalMs: Date.now() - startedAt,
+    maxInterFrameMs: result?.maxInterFrameMs
   }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "mohdel",
-  "version": "0.91.0",
+  "version": "0.93.0",
   "license": "MIT",
   "author": {
     "name": "Christophe Le Bars",
@@ -87,10 +87,10 @@
     "@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
     "@opentelemetry/sdk-node": "^0.215.0",
     "chalk": "^5.4.0",
-    "mohdel-thin-gate-linux-x64-gnu": "0.91.0"
+    "mohdel-thin-gate-linux-x64-gnu": "0.93.0"
   },
   "dependencies": {
-    "@anthropic-ai/sdk": "^0.90.0",
+    "@anthropic-ai/sdk": "^0.91.0",
     "@cerebras/cerebras_cloud_sdk": "^1.61.1",
     "@google/genai": "^1.50.1",
     "@opentelemetry/api": "^1.9.1",

package/src/lib/catalog/anthropic.js ADDED Viewed

@@ -0,0 +1,44 @@
+const BASE_URL = 'https://api.anthropic.com/v1'
+const API_VERSION = '2023-06-01'
+const fetchModels = async ({ apiKey }) => {
+  const all = []
+  let afterId = null
+  while (true) {
+    const url = new URL(`${BASE_URL}/models`)
+    url.searchParams.set('limit', '100')
+    if (afterId) url.searchParams.set('after_id', afterId)
+    const res = await fetch(url, {
+      headers: {
+        'x-api-key': apiKey,
+        'anthropic-version': API_VERSION
+      }
+    })
+    if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url}`)
+    const body = await res.json()
+    const page = Array.isArray(body?.data) ? body.data : []
+    all.push(...page)
+    if (!body?.has_more || !page.length) break
+    afterId = page[page.length - 1].id
+  }
+  return all
+}
+export default (sdkConfig) => {
+  let cache = null
+  const load = async () => {
+    if (!cache) cache = await fetchModels(sdkConfig)
+    return cache
+  }
+  return {
+    listModels: async () => (await load()).map(m => ({ id: m.id, label: m.display_name || m.id })),
+    getModelInfo: async (id) => {
+      const m = (await load()).find(x => x.id === id)
+      if (!m) return null
+      const info = { model: m.id }
+      if (m.display_name) info.label = m.display_name
+      if (m.created_at) info.createdAt = m.created_at
+      return info
+    }
+  }
+}

package/src/lib/catalog/cerebras.js ADDED Viewed

@@ -0,0 +1,26 @@
+const BASE_URL = 'https://api.cerebras.ai/v1'
+const fetchModels = async ({ apiKey }) => {
+  const res = await fetch(`${BASE_URL}/models`, { headers: { Authorization: `Bearer ${apiKey}` } })
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching cerebras models`)
+  const body = await res.json()
+  return Array.isArray(body?.data) ? body.data : []
+}
+export default (sdkConfig) => {
+  let cache = null
+  const load = async () => {
+    if (!cache) cache = await fetchModels(sdkConfig)
+    return cache
+  }
+  return {
+    listModels: async () => (await load()).map(m => ({ id: m.id, label: m.id })),
+    getModelInfo: async (id) => {
+      const m = (await load()).find(x => x.id === id)
+      if (!m) return null
+      const info = { model: m.id }
+      if (typeof m.created === 'number') info.created = m.created
+      return info
+    }
+  }
+}

package/src/lib/catalog/fireworks.js ADDED Viewed

@@ -0,0 +1,38 @@
+// Fireworks' /inference/v1/models is openai-compatible for listing.
+// Upstream IDs carry a `accounts/fireworks/models/` prefix that the
+// runtime adapter adds back automatically — strip it here so curated
+// entries stay short.
+const DEFAULT_BASE_URL = 'https://api.fireworks.ai/inference/v1'
+const FW_PREFIX = 'accounts/fireworks/models/'
+const shortId = (id) => id?.startsWith(FW_PREFIX) ? id.slice(FW_PREFIX.length) : id
+const fetchModels = async ({ apiKey, baseURL }) => {
+  const url = `${(baseURL || DEFAULT_BASE_URL).replace(/\/$/, '')}/models`
+  const res = await fetch(url, { headers: { Authorization: `Bearer ${apiKey}` } })
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url}`)
+  const body = await res.json()
+  return Array.isArray(body?.data) ? body.data : []
+}
+export default (sdkConfig) => {
+  let cache = null
+  const load = async () => {
+    if (!cache) cache = await fetchModels(sdkConfig)
+    return cache
+  }
+  return {
+    listModels: async () => (await load()).map(m => {
+      const id = shortId(m.id)
+      return { id, label: id }
+    }),
+    getModelInfo: async (id) => {
+      const m = (await load()).find(x => shortId(x.id) === id)
+      if (!m) return null
+      const info = { model: shortId(m.id) }
+      if (typeof m.context_length === 'number') info.contextTokenLimit = m.context_length
+      if (typeof m.created === 'number') info.created = m.created
+      return info
+    }
+  }
+}

package/src/lib/catalog/gemini.js ADDED Viewed

@@ -0,0 +1,46 @@
+const BASE_URL = 'https://generativelanguage.googleapis.com/v1beta'
+const fetchModels = async ({ apiKey }) => {
+  const all = []
+  let pageToken = null
+  while (true) {
+    const url = new URL(`${BASE_URL}/models`)
+    url.searchParams.set('key', apiKey)
+    url.searchParams.set('pageSize', '1000')
+    if (pageToken) url.searchParams.set('pageToken', pageToken)
+    const res = await fetch(url)
+    if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url.pathname}`)
+    const body = await res.json()
+    const page = Array.isArray(body?.models) ? body.models : []
+    all.push(...page)
+    if (!body?.nextPageToken) break
+    pageToken = body.nextPageToken
+  }
+  return all
+}
+const shortId = (name) => name.startsWith('models/') ? name.slice('models/'.length) : name
+export default (sdkConfig) => {
+  let cache = null
+  const load = async () => {
+    if (!cache) cache = await fetchModels(sdkConfig)
+    return cache
+  }
+  return {
+    listModels: async () => (await load())
+      .filter(m => (m.supportedGenerationMethods || []).includes('generateContent'))
+      .map(m => ({ id: shortId(m.name), label: m.displayName || shortId(m.name) })),
+    getModelInfo: async (id) => {
+      const m = (await load()).find(x => shortId(x.name) === id)
+      if (!m) return null
+      const info = { model: shortId(m.name) }
+      if (m.displayName) info.label = m.displayName
+      if (m.description) info.description = m.description
+      if (typeof m.inputTokenLimit === 'number') info.contextTokenLimit = m.inputTokenLimit
+      if (typeof m.outputTokenLimit === 'number') info.outputTokenLimit = m.outputTokenLimit
+      if (m.version) info.version = m.version
+      return info
+    }
+  }
+}

package/src/lib/catalog/groq.js ADDED Viewed

@@ -0,0 +1,30 @@
+const BASE_URL = 'https://api.groq.com/openai/v1'
+const fetchModels = async ({ apiKey }) => {
+  const res = await fetch(`${BASE_URL}/models`, { headers: { Authorization: `Bearer ${apiKey}` } })
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching groq models`)
+  const body = await res.json()
+  return Array.isArray(body?.data) ? body.data : []
+}
+export default (sdkConfig) => {
+  let cache = null
+  const load = async () => {
+    if (!cache) cache = await fetchModels(sdkConfig)
+    return cache
+  }
+  return {
+    listModels: async () => (await load())
+      .filter(m => m.active !== false)
+      .map(m => ({ id: m.id, label: m.id })),
+    getModelInfo: async (id) => {
+      const m = (await load()).find(x => x.id === id)
+      if (!m) return null
+      const info = { model: m.id }
+      if (typeof m.context_window === 'number') info.contextTokenLimit = m.context_window
+      if (typeof m.max_completion_tokens === 'number') info.outputTokenLimit = m.max_completion_tokens
+      if (typeof m.created === 'number') info.created = m.created
+      return info
+    }
+  }
+}

package/src/lib/catalog/openai.js ADDED Viewed

@@ -0,0 +1,34 @@
+// Catalog fetcher for any OpenAI-compatible /v1/models endpoint
+// (openai, deepseek, mistral, novita, xai). Inference path lives in
+// js/session/adapters/openai.js — this module is CLI-only.
+const DEFAULT_BASE_URL = 'https://api.openai.com/v1'
+const fetchModels = async ({ apiKey, baseURL }) => {
+  const url = `${(baseURL || DEFAULT_BASE_URL).replace(/\/$/, '')}/models`
+  const res = await fetch(url, { headers: { Authorization: `Bearer ${apiKey}` } })
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url}`)
+  const body = await res.json()
+  return Array.isArray(body?.data) ? body.data : []
+}
+export default (sdkConfig) => {
+  let cache = null
+  const load = async () => {
+    if (!cache) cache = await fetchModels(sdkConfig)
+    return cache
+  }
+  return {
+    listModels: async () => (await load()).map(m => ({ id: m.id, label: m.id })),
+    getModelInfo: async (id) => {
+      const m = (await load()).find(x => x.id === id)
+      if (!m) return null
+      const info = { model: m.id }
+      if (typeof m.context_window === 'number') info.contextTokenLimit = m.context_window
+      if (typeof m.max_context_length === 'number') info.contextTokenLimit = m.max_context_length
+      if (typeof m.max_completion_tokens === 'number') info.outputTokenLimit = m.max_completion_tokens
+      if (typeof m.created === 'number') info.created = m.created
+      return info
+    }
+  }
+}

package/src/lib/catalog/openrouter.js ADDED Viewed

@@ -0,0 +1,48 @@
+const BASE_URL = 'https://openrouter.ai/api/v1'
+const fetchModels = async ({ apiKey }) => {
+  const res = await fetch(`${BASE_URL}/models`, { headers: { Authorization: `Bearer ${apiKey}` } })
+  if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching openrouter models`)
+  const body = await res.json()
+  return Array.isArray(body?.data) ? body.data : []
+}
+// OpenRouter prices are decimal USD per token — convert to per-million-tokens.
+const toPerMillion = (raw) => {
+  const n = typeof raw === 'string' ? parseFloat(raw) : raw
+  if (!Number.isFinite(n)) return undefined
+  return n * 1_000_000
+}
+export default (sdkConfig) => {
+  let cache = null
+  const load = async () => {
+    if (!cache) cache = await fetchModels(sdkConfig)
+    return cache
+  }
+  return {
+    listModels: async () => (await load()).map(m => ({ id: m.id, label: m.name || m.id })),
+    getModelInfo: async (id) => {
+      const m = (await load()).find(x => x.id === id)
+      if (!m) return null
+      const info = { model: m.id }
+      if (m.name) info.label = m.name
+      if (m.description) info.description = m.description
+      if (typeof m.context_length === 'number') info.contextTokenLimit = m.context_length
+      const topCtx = m.top_provider?.context_length
+      if (!info.contextTokenLimit && typeof topCtx === 'number') info.contextTokenLimit = topCtx
+      const maxOut = m.top_provider?.max_completion_tokens
+      if (typeof maxOut === 'number') info.outputTokenLimit = maxOut
+      const inP = toPerMillion(m.pricing?.prompt)
+      const outP = toPerMillion(m.pricing?.completion)
+      if (inP !== undefined) info.inputPrice = inP
+      if (outP !== undefined) info.outputPrice = outP
+      const modalities = m.architecture?.input_modalities
+      if (Array.isArray(modalities) && modalities.length) {
+        info.inputFormat = modalities.filter(x => typeof x === 'string')
+      }
+      if (typeof m.created === 'number') info.created = m.created
+      return info
+    }
+  }
+}

package/src/lib/select.js CHANGED Viewed

@@ -37,11 +37,9 @@ export const initializeAPIs = async () => {
       // Create configuration
       const sdkConfig = config.createConfiguration(apiKey)
-      // Import the SDK module dynamically
-      const sdkPath = `./sdk/${config.sdk}.js`
+      const sdkPath = `./catalog/${config.sdk}.js`
       const { default: API } = await import(sdkPath)
-      // Initialize the provider with the configuration, no specs
       api[name] = API(sdkConfig, {}, silent)
       providersWithKeys.push(name)
     } catch (err) {
@@ -195,10 +193,22 @@ export const promptMissingFields = async (entry, curatedKey) => {
     } else {
       const creatorVal = await clack.select({
         message: `Creator for ${curatedKey}:`,
-        options: creatorsList.map(c => ({ value: c, label: c }))
+        options: [
+          ...creatorsList.map(c => ({ value: c, label: c })),
+          { value: '__other', label: 'Other… (enter a name)' }
+        ]
       })
       if (clack.isCancel(creatorVal)) return entry
-      entry.creator = creatorVal
+      if (creatorVal === '__other') {
+        const custom = await clack.text({
+          message: `New creator name for ${curatedKey}:`,
+          validate: (v) => v?.trim() ? undefined : 'Creator is required'
+        })
+        if (clack.isCancel(custom)) return entry
+        entry.creator = custom.trim()
+      } else {
+        entry.creator = creatorVal
+      }
     }
   }