mohdel 0.91.0 → 0.92.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/js/core/events.js CHANGED
@@ -64,6 +64,13 @@
64
64
  * `'insufficientOutputBudget' | 'cancelled' | ...` additive union.
65
65
  * @property {ToolCall[]} [toolCalls]
66
66
  * Present when `status === 'tool_use'`.
67
+ * @property {number} [maxInterFrameMs]
68
+ * Longest gap (ms) between adapter events during the call —
69
+ * from `startedAt` to the first frame, between consecutive
70
+ * frames, and from the last frame to the terminal. Direct signal
71
+ * for calibrating downstream read timeouts: a 15-min call that
72
+ * streams deltas every 30s is safe; a 5-min call with zero
73
+ * intermediate frames is dangerous.
67
74
  */
68
75
 
69
76
  /**
package/js/session/run.js CHANGED
@@ -141,9 +141,33 @@ export async function * run (envelope, {
141
141
  }
142
142
 
143
143
  let sawTerminal = false
144
+ // Track whether the adapter emitted any `delta` events during the
145
+ // call. Recorded as `mohdel.stream` on the span at finalize time —
146
+ // a non-streaming terminal (adapter fell back to wait-for-done, or
147
+ // the provider SDK collapsed stream events) is a hazard for any
148
+ // host-side read-timeout watchdog and shows up in traces before
149
+ // it turns into an abort downstream.
150
+ let sawDelta = false
151
+ // Track the longest gap between adapter events within this call —
152
+ // from `startedAt` to the first frame, between consecutive frames,
153
+ // and from the last frame to the terminal. A direct signal for any
154
+ // host-side read-timeout watchdog: a 15-min call that streams
155
+ // deltas every 30s is safe; a 5-min call with zero intermediate
156
+ // frames is dangerous. Surfaced on `AnswerResult.maxInterFrameMs`
157
+ // and the span's `mohdel.max_inter_frame_ms` attribute so hosts
158
+ // can aggregate it for adaptive-timeout calibration.
159
+ let lastFrameAt = startedAt
160
+ let maxInterFrameMs = 0
144
161
  try {
145
162
  for await (const ev of adapter(envelope, { signal, log, span })) {
146
- if (ev.type === 'done') {
163
+ const now = Date.now()
164
+ const gap = now - lastFrameAt
165
+ if (gap > maxInterFrameMs) maxInterFrameMs = gap
166
+ lastFrameAt = now
167
+
168
+ if (ev.type === 'delta') {
169
+ sawDelta = true
170
+ } else if (ev.type === 'done') {
147
171
  sawTerminal = true
148
172
  // A cancelled terminal is the caller's action, not evidence
149
173
  // of provider recovery — don't wipe an accumulated failure
@@ -160,7 +184,11 @@ export async function * run (envelope, {
160
184
  (ev.result.thinkingTokens || 0)
161
185
  if (total > 0) limiter.recordTokens(bucketKey, total)
162
186
  }
163
- finalizeSpanOk(span, ev.result)
187
+ // Surface on AnswerResult so hosts that pass the whole
188
+ // result upstream pick it up without needing a separate
189
+ // wire field.
190
+ if (ev.result) ev.result.maxInterFrameMs = maxInterFrameMs
191
+ finalizeSpanOk(span, ev.result, sawDelta, maxInterFrameMs)
164
192
  log.debug(summarizeDone(ev.result, startedAt), '[mohdel:answer] done')
165
193
  } else if (ev.type === 'error') {
166
194
  sawTerminal = true
@@ -168,7 +196,8 @@ export async function * run (envelope, {
168
196
  log.warn({
169
197
  err: ev.error,
170
198
  provider: envelope.provider,
171
- totalMs: Date.now() - startedAt
199
+ totalMs: Date.now() - startedAt,
200
+ maxInterFrameMs
172
201
  }, '[mohdel:answer] failed')
173
202
  endSpanError(span, new Error(ev.error?.message || 'adapter error'))
174
203
  }
@@ -177,11 +206,12 @@ export async function * run (envelope, {
177
206
  } catch (e) {
178
207
  if (signal?.aborted && !sawTerminal) {
179
208
  const fallback = cancelledFallback()
180
- finalizeSpanOk(span, fallback.result)
209
+ if (fallback.result) fallback.result.maxInterFrameMs = maxInterFrameMs
210
+ finalizeSpanOk(span, fallback.result, sawDelta, maxInterFrameMs)
181
211
  yield fallback
182
212
  return
183
213
  }
184
- log.warn({ err: e, provider: envelope.provider }, '[mohdel:answer] adapter threw')
214
+ log.warn({ err: e, provider: envelope.provider, maxInterFrameMs }, '[mohdel:answer] adapter threw')
185
215
  endSpanError(span, e)
186
216
  yield errorEvent(messageOf(e), 'SESSION_ADAPTER_THREW')
187
217
  return
@@ -190,11 +220,12 @@ export async function * run (envelope, {
190
220
  if (!sawTerminal) {
191
221
  if (signal?.aborted) {
192
222
  const fallback = cancelledFallback()
193
- finalizeSpanOk(span, fallback.result)
223
+ if (fallback.result) fallback.result.maxInterFrameMs = maxInterFrameMs
224
+ finalizeSpanOk(span, fallback.result, sawDelta, maxInterFrameMs)
194
225
  yield fallback
195
226
  } else {
196
227
  const err = 'adapter returned without a terminal event'
197
- log.error({ provider: envelope.provider }, '[mohdel:answer] no terminal event')
228
+ log.error({ provider: envelope.provider, maxInterFrameMs }, '[mohdel:answer] no terminal event')
198
229
  endSpanError(span, new Error(err))
199
230
  yield errorEvent(err, 'SESSION_ADAPTER_NO_TERMINAL')
200
231
  }
@@ -288,14 +319,25 @@ function scopedLogger (logger, envelope, span) {
288
319
  /**
289
320
  * @param {any} span
290
321
  * @param {any} result
322
+ * @param {boolean} sawDelta Whether the adapter emitted at least one
323
+ * `delta` event during the call. Surfaces as `mohdel.stream` on the
324
+ * span so traces can flag calls that completed without streaming —
325
+ * a non-streaming long-running call is invisible to upstream idle
326
+ * watchdogs until the terminal lands.
327
+ * @param {number} maxInterFrameMs Longest gap between adapter events
328
+ * during the call (including pre-first-frame and post-last-frame).
329
+ * Surfaces as `mohdel.max_inter_frame_ms` so downstream timeout
330
+ * calibration has a direct signal independent of total elapsed.
291
331
  */
292
- function finalizeSpanOk (span, result) {
332
+ function finalizeSpanOk (span, result, sawDelta = false, maxInterFrameMs = 0) {
293
333
  /** @type {Record<string, any>} */
294
334
  const attrs = {
295
335
  'gen_ai.usage.input_tokens': result?.inputTokens || 0,
296
336
  'gen_ai.usage.output_tokens': result?.outputTokens || 0,
297
337
  'mohdel.thinking_tokens': result?.thinkingTokens || 0,
298
- 'mohdel.status': result?.status || 'unknown'
338
+ 'mohdel.status': result?.status || 'unknown',
339
+ 'mohdel.stream': !!sawDelta,
340
+ 'mohdel.max_inter_frame_ms': maxInterFrameMs
299
341
  }
300
342
  if (result?.cost != null) attrs['mohdel.cost'] = result.cost
301
343
  if (result?.warning) attrs['mohdel.warning'] = result.warning
@@ -323,7 +365,8 @@ function summarizeDone (result, startedAt) {
323
365
  think: result?.thinkingTokens || 0,
324
366
  cost: result?.cost,
325
367
  warning: result?.warning,
326
- totalMs: Date.now() - startedAt
368
+ totalMs: Date.now() - startedAt,
369
+ maxInterFrameMs: result?.maxInterFrameMs
327
370
  }
328
371
  }
329
372
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "mohdel",
3
- "version": "0.91.0",
3
+ "version": "0.92.0",
4
4
  "license": "MIT",
5
5
  "author": {
6
6
  "name": "Christophe Le Bars",
@@ -87,7 +87,7 @@
87
87
  "@opentelemetry/exporter-trace-otlp-grpc": "^0.215.0",
88
88
  "@opentelemetry/sdk-node": "^0.215.0",
89
89
  "chalk": "^5.4.0",
90
- "mohdel-thin-gate-linux-x64-gnu": "0.91.0"
90
+ "mohdel-thin-gate-linux-x64-gnu": "0.92.0"
91
91
  },
92
92
  "dependencies": {
93
93
  "@anthropic-ai/sdk": "^0.90.0",
@@ -0,0 +1,44 @@
1
+ const BASE_URL = 'https://api.anthropic.com/v1'
2
+ const API_VERSION = '2023-06-01'
3
+
4
+ const fetchModels = async ({ apiKey }) => {
5
+ const all = []
6
+ let afterId = null
7
+ while (true) {
8
+ const url = new URL(`${BASE_URL}/models`)
9
+ url.searchParams.set('limit', '100')
10
+ if (afterId) url.searchParams.set('after_id', afterId)
11
+ const res = await fetch(url, {
12
+ headers: {
13
+ 'x-api-key': apiKey,
14
+ 'anthropic-version': API_VERSION
15
+ }
16
+ })
17
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url}`)
18
+ const body = await res.json()
19
+ const page = Array.isArray(body?.data) ? body.data : []
20
+ all.push(...page)
21
+ if (!body?.has_more || !page.length) break
22
+ afterId = page[page.length - 1].id
23
+ }
24
+ return all
25
+ }
26
+
27
+ export default (sdkConfig) => {
28
+ let cache = null
29
+ const load = async () => {
30
+ if (!cache) cache = await fetchModels(sdkConfig)
31
+ return cache
32
+ }
33
+ return {
34
+ listModels: async () => (await load()).map(m => ({ id: m.id, label: m.display_name || m.id })),
35
+ getModelInfo: async (id) => {
36
+ const m = (await load()).find(x => x.id === id)
37
+ if (!m) return null
38
+ const info = { model: m.id }
39
+ if (m.display_name) info.label = m.display_name
40
+ if (m.created_at) info.createdAt = m.created_at
41
+ return info
42
+ }
43
+ }
44
+ }
@@ -0,0 +1,26 @@
1
+ const BASE_URL = 'https://api.cerebras.ai/v1'
2
+
3
+ const fetchModels = async ({ apiKey }) => {
4
+ const res = await fetch(`${BASE_URL}/models`, { headers: { Authorization: `Bearer ${apiKey}` } })
5
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching cerebras models`)
6
+ const body = await res.json()
7
+ return Array.isArray(body?.data) ? body.data : []
8
+ }
9
+
10
+ export default (sdkConfig) => {
11
+ let cache = null
12
+ const load = async () => {
13
+ if (!cache) cache = await fetchModels(sdkConfig)
14
+ return cache
15
+ }
16
+ return {
17
+ listModels: async () => (await load()).map(m => ({ id: m.id, label: m.id })),
18
+ getModelInfo: async (id) => {
19
+ const m = (await load()).find(x => x.id === id)
20
+ if (!m) return null
21
+ const info = { model: m.id }
22
+ if (typeof m.created === 'number') info.created = m.created
23
+ return info
24
+ }
25
+ }
26
+ }
@@ -0,0 +1,38 @@
1
+ // Fireworks' /inference/v1/models is openai-compatible for listing.
2
+ // Upstream IDs carry a `accounts/fireworks/models/` prefix that the
3
+ // runtime adapter adds back automatically — strip it here so curated
4
+ // entries stay short.
5
+ const DEFAULT_BASE_URL = 'https://api.fireworks.ai/inference/v1'
6
+ const FW_PREFIX = 'accounts/fireworks/models/'
7
+
8
+ const shortId = (id) => id?.startsWith(FW_PREFIX) ? id.slice(FW_PREFIX.length) : id
9
+
10
+ const fetchModels = async ({ apiKey, baseURL }) => {
11
+ const url = `${(baseURL || DEFAULT_BASE_URL).replace(/\/$/, '')}/models`
12
+ const res = await fetch(url, { headers: { Authorization: `Bearer ${apiKey}` } })
13
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url}`)
14
+ const body = await res.json()
15
+ return Array.isArray(body?.data) ? body.data : []
16
+ }
17
+
18
+ export default (sdkConfig) => {
19
+ let cache = null
20
+ const load = async () => {
21
+ if (!cache) cache = await fetchModels(sdkConfig)
22
+ return cache
23
+ }
24
+ return {
25
+ listModels: async () => (await load()).map(m => {
26
+ const id = shortId(m.id)
27
+ return { id, label: id }
28
+ }),
29
+ getModelInfo: async (id) => {
30
+ const m = (await load()).find(x => shortId(x.id) === id)
31
+ if (!m) return null
32
+ const info = { model: shortId(m.id) }
33
+ if (typeof m.context_length === 'number') info.contextTokenLimit = m.context_length
34
+ if (typeof m.created === 'number') info.created = m.created
35
+ return info
36
+ }
37
+ }
38
+ }
@@ -0,0 +1,46 @@
1
+ const BASE_URL = 'https://generativelanguage.googleapis.com/v1beta'
2
+
3
+ const fetchModels = async ({ apiKey }) => {
4
+ const all = []
5
+ let pageToken = null
6
+ while (true) {
7
+ const url = new URL(`${BASE_URL}/models`)
8
+ url.searchParams.set('key', apiKey)
9
+ url.searchParams.set('pageSize', '1000')
10
+ if (pageToken) url.searchParams.set('pageToken', pageToken)
11
+ const res = await fetch(url)
12
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url.pathname}`)
13
+ const body = await res.json()
14
+ const page = Array.isArray(body?.models) ? body.models : []
15
+ all.push(...page)
16
+ if (!body?.nextPageToken) break
17
+ pageToken = body.nextPageToken
18
+ }
19
+ return all
20
+ }
21
+
22
+ const shortId = (name) => name.startsWith('models/') ? name.slice('models/'.length) : name
23
+
24
+ export default (sdkConfig) => {
25
+ let cache = null
26
+ const load = async () => {
27
+ if (!cache) cache = await fetchModels(sdkConfig)
28
+ return cache
29
+ }
30
+ return {
31
+ listModels: async () => (await load())
32
+ .filter(m => (m.supportedGenerationMethods || []).includes('generateContent'))
33
+ .map(m => ({ id: shortId(m.name), label: m.displayName || shortId(m.name) })),
34
+ getModelInfo: async (id) => {
35
+ const m = (await load()).find(x => shortId(x.name) === id)
36
+ if (!m) return null
37
+ const info = { model: shortId(m.name) }
38
+ if (m.displayName) info.label = m.displayName
39
+ if (m.description) info.description = m.description
40
+ if (typeof m.inputTokenLimit === 'number') info.contextTokenLimit = m.inputTokenLimit
41
+ if (typeof m.outputTokenLimit === 'number') info.outputTokenLimit = m.outputTokenLimit
42
+ if (m.version) info.version = m.version
43
+ return info
44
+ }
45
+ }
46
+ }
@@ -0,0 +1,30 @@
1
+ const BASE_URL = 'https://api.groq.com/openai/v1'
2
+
3
+ const fetchModels = async ({ apiKey }) => {
4
+ const res = await fetch(`${BASE_URL}/models`, { headers: { Authorization: `Bearer ${apiKey}` } })
5
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching groq models`)
6
+ const body = await res.json()
7
+ return Array.isArray(body?.data) ? body.data : []
8
+ }
9
+
10
+ export default (sdkConfig) => {
11
+ let cache = null
12
+ const load = async () => {
13
+ if (!cache) cache = await fetchModels(sdkConfig)
14
+ return cache
15
+ }
16
+ return {
17
+ listModels: async () => (await load())
18
+ .filter(m => m.active !== false)
19
+ .map(m => ({ id: m.id, label: m.id })),
20
+ getModelInfo: async (id) => {
21
+ const m = (await load()).find(x => x.id === id)
22
+ if (!m) return null
23
+ const info = { model: m.id }
24
+ if (typeof m.context_window === 'number') info.contextTokenLimit = m.context_window
25
+ if (typeof m.max_completion_tokens === 'number') info.outputTokenLimit = m.max_completion_tokens
26
+ if (typeof m.created === 'number') info.created = m.created
27
+ return info
28
+ }
29
+ }
30
+ }
@@ -0,0 +1,34 @@
1
+ // Catalog fetcher for any OpenAI-compatible /v1/models endpoint
2
+ // (openai, deepseek, mistral, novita, xai). Inference path lives in
3
+ // js/session/adapters/openai.js — this module is CLI-only.
4
+
5
+ const DEFAULT_BASE_URL = 'https://api.openai.com/v1'
6
+
7
+ const fetchModels = async ({ apiKey, baseURL }) => {
8
+ const url = `${(baseURL || DEFAULT_BASE_URL).replace(/\/$/, '')}/models`
9
+ const res = await fetch(url, { headers: { Authorization: `Bearer ${apiKey}` } })
10
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching ${url}`)
11
+ const body = await res.json()
12
+ return Array.isArray(body?.data) ? body.data : []
13
+ }
14
+
15
+ export default (sdkConfig) => {
16
+ let cache = null
17
+ const load = async () => {
18
+ if (!cache) cache = await fetchModels(sdkConfig)
19
+ return cache
20
+ }
21
+ return {
22
+ listModels: async () => (await load()).map(m => ({ id: m.id, label: m.id })),
23
+ getModelInfo: async (id) => {
24
+ const m = (await load()).find(x => x.id === id)
25
+ if (!m) return null
26
+ const info = { model: m.id }
27
+ if (typeof m.context_window === 'number') info.contextTokenLimit = m.context_window
28
+ if (typeof m.max_context_length === 'number') info.contextTokenLimit = m.max_context_length
29
+ if (typeof m.max_completion_tokens === 'number') info.outputTokenLimit = m.max_completion_tokens
30
+ if (typeof m.created === 'number') info.created = m.created
31
+ return info
32
+ }
33
+ }
34
+ }
@@ -0,0 +1,48 @@
1
+ const BASE_URL = 'https://openrouter.ai/api/v1'
2
+
3
+ const fetchModels = async ({ apiKey }) => {
4
+ const res = await fetch(`${BASE_URL}/models`, { headers: { Authorization: `Bearer ${apiKey}` } })
5
+ if (!res.ok) throw new Error(`${res.status} ${res.statusText} fetching openrouter models`)
6
+ const body = await res.json()
7
+ return Array.isArray(body?.data) ? body.data : []
8
+ }
9
+
10
+ // OpenRouter prices are decimal USD per token — convert to per-million-tokens.
11
+ const toPerMillion = (raw) => {
12
+ const n = typeof raw === 'string' ? parseFloat(raw) : raw
13
+ if (!Number.isFinite(n)) return undefined
14
+ return n * 1_000_000
15
+ }
16
+
17
+ export default (sdkConfig) => {
18
+ let cache = null
19
+ const load = async () => {
20
+ if (!cache) cache = await fetchModels(sdkConfig)
21
+ return cache
22
+ }
23
+ return {
24
+ listModels: async () => (await load()).map(m => ({ id: m.id, label: m.name || m.id })),
25
+ getModelInfo: async (id) => {
26
+ const m = (await load()).find(x => x.id === id)
27
+ if (!m) return null
28
+ const info = { model: m.id }
29
+ if (m.name) info.label = m.name
30
+ if (m.description) info.description = m.description
31
+ if (typeof m.context_length === 'number') info.contextTokenLimit = m.context_length
32
+ const topCtx = m.top_provider?.context_length
33
+ if (!info.contextTokenLimit && typeof topCtx === 'number') info.contextTokenLimit = topCtx
34
+ const maxOut = m.top_provider?.max_completion_tokens
35
+ if (typeof maxOut === 'number') info.outputTokenLimit = maxOut
36
+ const inP = toPerMillion(m.pricing?.prompt)
37
+ const outP = toPerMillion(m.pricing?.completion)
38
+ if (inP !== undefined) info.inputPrice = inP
39
+ if (outP !== undefined) info.outputPrice = outP
40
+ const modalities = m.architecture?.input_modalities
41
+ if (Array.isArray(modalities) && modalities.length) {
42
+ info.inputFormat = modalities.filter(x => typeof x === 'string')
43
+ }
44
+ if (typeof m.created === 'number') info.created = m.created
45
+ return info
46
+ }
47
+ }
48
+ }
package/src/lib/select.js CHANGED
@@ -37,11 +37,9 @@ export const initializeAPIs = async () => {
37
37
  // Create configuration
38
38
  const sdkConfig = config.createConfiguration(apiKey)
39
39
 
40
- // Import the SDK module dynamically
41
- const sdkPath = `./sdk/${config.sdk}.js`
40
+ const sdkPath = `./catalog/${config.sdk}.js`
42
41
  const { default: API } = await import(sdkPath)
43
42
 
44
- // Initialize the provider with the configuration, no specs
45
43
  api[name] = API(sdkConfig, {}, silent)
46
44
  providersWithKeys.push(name)
47
45
  } catch (err) {
@@ -195,10 +193,22 @@ export const promptMissingFields = async (entry, curatedKey) => {
195
193
  } else {
196
194
  const creatorVal = await clack.select({
197
195
  message: `Creator for ${curatedKey}:`,
198
- options: creatorsList.map(c => ({ value: c, label: c }))
196
+ options: [
197
+ ...creatorsList.map(c => ({ value: c, label: c })),
198
+ { value: '__other', label: 'Other… (enter a name)' }
199
+ ]
199
200
  })
200
201
  if (clack.isCancel(creatorVal)) return entry
201
- entry.creator = creatorVal
202
+ if (creatorVal === '__other') {
203
+ const custom = await clack.text({
204
+ message: `New creator name for ${curatedKey}:`,
205
+ validate: (v) => v?.trim() ? undefined : 'Creator is required'
206
+ })
207
+ if (clack.isCancel(custom)) return entry
208
+ entry.creator = custom.trim()
209
+ } else {
210
+ entry.creator = creatorVal
211
+ }
202
212
  }
203
213
  }
204
214