mohdel 0.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +377 -0
- package/config/benchmarks.json +39 -0
- package/js/client/call.js +75 -0
- package/js/client/call_image.js +82 -0
- package/js/client/gate-binary.js +72 -0
- package/js/client/index.js +16 -0
- package/js/client/ndjson.js +29 -0
- package/js/client/transport.js +48 -0
- package/js/core/envelope.js +141 -0
- package/js/core/errors.js +75 -0
- package/js/core/events.js +96 -0
- package/js/core/image.js +58 -0
- package/js/core/index.js +10 -0
- package/js/core/status.js +48 -0
- package/js/factory/bridge.js +372 -0
- package/js/session/_cooldown.js +114 -0
- package/js/session/_logger.js +138 -0
- package/js/session/_rate_limiter.js +77 -0
- package/js/session/_tracing.js +58 -0
- package/js/session/adapters/_cancelled.js +44 -0
- package/js/session/adapters/_catalog.js +58 -0
- package/js/session/adapters/_chat_completions.js +439 -0
- package/js/session/adapters/_errors.js +85 -0
- package/js/session/adapters/_images.js +60 -0
- package/js/session/adapters/_lazy_json_cache.js +76 -0
- package/js/session/adapters/_pricing.js +67 -0
- package/js/session/adapters/_providers.js +60 -0
- package/js/session/adapters/_tools.js +185 -0
- package/js/session/adapters/_videos.js +283 -0
- package/js/session/adapters/anthropic.js +397 -0
- package/js/session/adapters/cerebras.js +28 -0
- package/js/session/adapters/deepseek.js +32 -0
- package/js/session/adapters/echo.js +51 -0
- package/js/session/adapters/fake.js +262 -0
- package/js/session/adapters/fireworks.js +46 -0
- package/js/session/adapters/gemini.js +381 -0
- package/js/session/adapters/groq.js +23 -0
- package/js/session/adapters/image/fake.js +55 -0
- package/js/session/adapters/image/index.js +40 -0
- package/js/session/adapters/image/novita.js +135 -0
- package/js/session/adapters/image/openai.js +50 -0
- package/js/session/adapters/index.js +53 -0
- package/js/session/adapters/mistral.js +31 -0
- package/js/session/adapters/novita.js +29 -0
- package/js/session/adapters/openai.js +381 -0
- package/js/session/adapters/openrouter.js +66 -0
- package/js/session/adapters/xai.js +27 -0
- package/js/session/bin.js +54 -0
- package/js/session/driver.js +160 -0
- package/js/session/index.js +18 -0
- package/js/session/run.js +393 -0
- package/js/session/run_image.js +61 -0
- package/package.json +107 -0
- package/src/cli/ask.js +160 -0
- package/src/cli/backup.js +107 -0
- package/src/cli/bench.js +262 -0
- package/src/cli/check.js +123 -0
- package/src/cli/colored-logger.js +67 -0
- package/src/cli/colors.js +13 -0
- package/src/cli/default.js +39 -0
- package/src/cli/index.js +150 -0
- package/src/cli/json-output.js +60 -0
- package/src/cli/model.js +571 -0
- package/src/cli/onboard.js +232 -0
- package/src/cli/rank.js +176 -0
- package/src/cli/ratelimit.js +160 -0
- package/src/cli/tag.js +105 -0
- package/src/lib/assets/alibaba.svg +1 -0
- package/src/lib/assets/anthropic.svg +5 -0
- package/src/lib/assets/deepseek.svg +1 -0
- package/src/lib/assets/gemini.svg +1 -0
- package/src/lib/assets/google.svg +2 -0
- package/src/lib/assets/kwaipilot.svg +1 -0
- package/src/lib/assets/meta.svg +1 -0
- package/src/lib/assets/minimax.svg +9 -0
- package/src/lib/assets/moonshotai.svg +4 -0
- package/src/lib/assets/openai.svg +5 -0
- package/src/lib/assets/xai.svg +1 -0
- package/src/lib/assets/xiaomi.svg +2 -0
- package/src/lib/assets/zai.svg +219 -0
- package/src/lib/benchmark-score.js +215 -0
- package/src/lib/benchmark-truth.js +68 -0
- package/src/lib/cache.js +76 -0
- package/src/lib/common.js +208 -0
- package/src/lib/cooldown.js +63 -0
- package/src/lib/creators.js +71 -0
- package/src/lib/curated-cache.js +146 -0
- package/src/lib/errors.js +126 -0
- package/src/lib/index.js +726 -0
- package/src/lib/logger.js +29 -0
- package/src/lib/providers.js +87 -0
- package/src/lib/rank.js +390 -0
- package/src/lib/rate-limiter.js +50 -0
- package/src/lib/schema.js +150 -0
- package/src/lib/select.js +474 -0
- package/src/lib/tracing.js +62 -0
- package/src/lib/utils.js +85 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NDJSON stdio driver.
|
|
3
|
+
*
|
|
4
|
+
* Reads lines from stdin concurrently with running calls:
|
|
5
|
+
* - Envelope line (no `op` field): queued for sequential dispatch
|
|
6
|
+
* via `run()`. Events written to stdout as NDJSON.
|
|
7
|
+
* - Control message `{op:"cancel", callId}`: if it matches the
|
|
8
|
+
* in-flight call, aborts via AbortController.
|
|
9
|
+
*
|
|
10
|
+
* Single-call-at-a-time per process.
|
|
11
|
+
*
|
|
12
|
+
* @module session/driver
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import readline from 'node:readline'
|
|
16
|
+
|
|
17
|
+
import { run } from './run.js'
|
|
18
|
+
import { runImage } from './run_image.js'
|
|
19
|
+
import { setCatalog } from './adapters/_catalog.js'
|
|
20
|
+
|
|
21
|
+
// Bounded memory for pre-dequeue cancels. Hostile/buggy supervisors
|
|
22
|
+
// spamming random callIds can't grow the set without bound.
|
|
23
|
+
const PRECANCEL_CAP = 128
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* @param {NodeJS.ReadableStream} stdin
|
|
27
|
+
* @param {NodeJS.WritableStream} stdout
|
|
28
|
+
* @returns {Promise<void>}
|
|
29
|
+
*/
|
|
30
|
+
export async function drive (stdin, stdout) {
|
|
31
|
+
const rl = readline.createInterface({ input: stdin, crlfDelay: Infinity })
|
|
32
|
+
|
|
33
|
+
/** @type {{callId: string, controller: AbortController} | null} */
|
|
34
|
+
let currentCall = null
|
|
35
|
+
/** @type {import('#core/envelope.js').CallEnvelope[]} */
|
|
36
|
+
const envelopeQueue = []
|
|
37
|
+
/** @type {(() => void) | null} */
|
|
38
|
+
let queueNotify = null
|
|
39
|
+
let stdinClosed = false
|
|
40
|
+
/** Cancel messages received before their envelope was dequeued.
|
|
41
|
+
* JS Sets are insertion-ordered, so `values().next()` is the
|
|
42
|
+
* oldest entry — cheap FIFO eviction at cap. */
|
|
43
|
+
const precancelled = new Set()
|
|
44
|
+
|
|
45
|
+
function recordPrecancel (callId) {
|
|
46
|
+
if (precancelled.has(callId)) return
|
|
47
|
+
if (precancelled.size >= PRECANCEL_CAP) {
|
|
48
|
+
precancelled.delete(precancelled.values().next().value)
|
|
49
|
+
}
|
|
50
|
+
precancelled.add(callId)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
rl.on('line', (line) => {
|
|
54
|
+
const trimmed = line.trim()
|
|
55
|
+
if (!trimmed) return
|
|
56
|
+
|
|
57
|
+
let obj
|
|
58
|
+
try {
|
|
59
|
+
obj = JSON.parse(trimmed)
|
|
60
|
+
} catch (e) {
|
|
61
|
+
process.stderr.write(`session: failed to parse stdin line: ${e.message}\n`)
|
|
62
|
+
return
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
if (obj && typeof obj === 'object' && obj.op === 'cancel') {
|
|
66
|
+
if (currentCall && currentCall.callId === obj.callId) {
|
|
67
|
+
currentCall.controller.abort()
|
|
68
|
+
} else {
|
|
69
|
+
// Pre-dequeue cancel: remember the callId so the envelope
|
|
70
|
+
// aborts immediately on dispatch. Honored once then cleared.
|
|
71
|
+
recordPrecancel(obj.callId)
|
|
72
|
+
}
|
|
73
|
+
return
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Readiness heartbeat from supervisor: the pool sends `ping`
|
|
77
|
+
// before marking a fresh session "pool-ready". Reply immediately
|
|
78
|
+
// with `pong` on stdout — emitted as a standalone control frame,
|
|
79
|
+
// outside any in-flight call's event stream.
|
|
80
|
+
//
|
|
81
|
+
// Current protocol pings only between calls. If a supervisor
|
|
82
|
+
// violates that invariant and pings a busy session, emitting
|
|
83
|
+
// `{op:"pong"}` mid-stream would land in the gate's
|
|
84
|
+
// `pool_stream_next` read buffer, fail Event parse, and get
|
|
85
|
+
// classified as `SESSION_INVALID_EVENT` — killing the session.
|
|
86
|
+
// Drop mid-call pings; the supervisor can re-ping after the
|
|
87
|
+
// call terminates.
|
|
88
|
+
// Catalog injection from the supervisor. Supersedes whatever was
|
|
89
|
+
// loaded from disk at startup. Lets a supervisor (e.g. a
|
|
90
|
+
// thin-gate session pool) run sessions in contexts without
|
|
91
|
+
// access to `~/.config/mohdel/` by providing the catalog via
|
|
92
|
+
// stdin instead.
|
|
93
|
+
if (obj && typeof obj === 'object' && obj.op === 'set_catalog') {
|
|
94
|
+
if (obj.table && typeof obj.table === 'object') {
|
|
95
|
+
setCatalog(obj.table)
|
|
96
|
+
} else {
|
|
97
|
+
process.stderr.write('session: set_catalog requires `table: object`\n')
|
|
98
|
+
}
|
|
99
|
+
return
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (obj && typeof obj === 'object' && obj.op === 'ping') {
|
|
103
|
+
if (currentCall) {
|
|
104
|
+
process.stderr.write(
|
|
105
|
+
`session: ping during in-flight call ${currentCall.callId}; ignored\n`
|
|
106
|
+
)
|
|
107
|
+
return
|
|
108
|
+
}
|
|
109
|
+
stdout.write(JSON.stringify({ op: 'pong' }) + '\n')
|
|
110
|
+
return
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
envelopeQueue.push(obj)
|
|
114
|
+
if (queueNotify) { queueNotify(); queueNotify = null }
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
rl.on('close', () => {
|
|
118
|
+
stdinClosed = true
|
|
119
|
+
if (queueNotify) { queueNotify(); queueNotify = null }
|
|
120
|
+
})
|
|
121
|
+
|
|
122
|
+
while (true) {
|
|
123
|
+
// `stdinClosed` flips asynchronously inside the `rl.on('close')`
|
|
124
|
+
// callback above, which also signals `queueNotify`. The linter's
|
|
125
|
+
// no-unmodified-loop-condition rule can't see callback mutation,
|
|
126
|
+
// so it false-positives here.
|
|
127
|
+
// eslint-disable-next-line no-unmodified-loop-condition
|
|
128
|
+
while (envelopeQueue.length === 0 && !stdinClosed) {
|
|
129
|
+
await new Promise(resolve => { queueNotify = resolve })
|
|
130
|
+
}
|
|
131
|
+
if (envelopeQueue.length === 0) return
|
|
132
|
+
|
|
133
|
+
const envelope = envelopeQueue.shift()
|
|
134
|
+
const controller = new AbortController()
|
|
135
|
+
if (precancelled.delete(envelope.callId)) controller.abort()
|
|
136
|
+
currentCall = { callId: envelope.callId, controller }
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
if (envelope.op === 'image') {
|
|
140
|
+
// Image envelopes are one-shot: a single terminal line on
|
|
141
|
+
// stdout, no streaming. `op` is a driver-internal tag
|
|
142
|
+
// (strip before dispatch so the JS ImageEnvelope shape
|
|
143
|
+
// matches `js/core/image.js` exactly).
|
|
144
|
+
const { op: _op, ...imgEnv } = envelope
|
|
145
|
+
const out = await runImage(imgEnv)
|
|
146
|
+
if (out.ok) {
|
|
147
|
+
stdout.write(JSON.stringify({ type: 'image_done', result: out.result }) + '\n')
|
|
148
|
+
} else {
|
|
149
|
+
stdout.write(JSON.stringify({ type: 'error', error: out.error }) + '\n')
|
|
150
|
+
}
|
|
151
|
+
} else {
|
|
152
|
+
for await (const ev of run(envelope, { signal: controller.signal })) {
|
|
153
|
+
stdout.write(JSON.stringify(ev) + '\n')
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
} finally {
|
|
157
|
+
currentCall = null
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* mohdel session — provider executor.
|
|
3
|
+
*
|
|
4
|
+
* Public surface (0.90):
|
|
5
|
+
* - run(envelope): AsyncGenerator<Event>
|
|
6
|
+
* - adapters: { [provider]: adapter }
|
|
7
|
+
* - getAdapter(provider): adapter
|
|
8
|
+
* - drive(stdin, stdout): NDJSON stdio driver
|
|
9
|
+
*
|
|
10
|
+
* Usually invoked as a subprocess via `./bin.js`. Exports exist so
|
|
11
|
+
* the pieces can also be embedded in tests or custom supervisors.
|
|
12
|
+
*
|
|
13
|
+
* @module session
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
export { run } from './run.js'
|
|
17
|
+
export { adapters, getAdapter } from './adapters/index.js'
|
|
18
|
+
export { drive } from './driver.js'
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dispatch a CallEnvelope to the matching adapter and yield its
|
|
3
|
+
* Event stream.
|
|
4
|
+
*
|
|
5
|
+
* Per call:
|
|
6
|
+
* - Opens a `mohdel.session.answer` OTel span under the envelope's
|
|
7
|
+
* remote parent (`traceparent`). Log lines carry `traceId` /
|
|
8
|
+
* `spanId` for SigNoz/Jaeger correlation even when no exporter
|
|
9
|
+
* is wired.
|
|
10
|
+
* - Pre-dispatch: cooldown fast-fail, then rpm/tpm throttle
|
|
11
|
+
* (await + record).
|
|
12
|
+
* - Post-dispatch: on `done`, reset cooldown and record tokens
|
|
13
|
+
* (TPM); on `error`, recordFailure (immediate for AUTH_INVALID,
|
|
14
|
+
* skipped for non-retryable provider errors). Span ends with
|
|
15
|
+
* `gen_ai.*` + `mohdel.*` attributes.
|
|
16
|
+
*
|
|
17
|
+
* Terminal events are `done` (success / incomplete / cancelled /
|
|
18
|
+
* tool_use) and `error`. The adapter is expected to emit exactly
|
|
19
|
+
* one terminal per call. If it returns without one, `run()`
|
|
20
|
+
* synthesizes an `error`.
|
|
21
|
+
*
|
|
22
|
+
* @module session/run
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { getAdapter } from './adapters/index.js'
|
|
26
|
+
import { isImageProvider } from './adapters/image/index.js'
|
|
27
|
+
import { getSpec } from './adapters/_catalog.js'
|
|
28
|
+
import { getProviderLimits } from './adapters/_providers.js'
|
|
29
|
+
import * as defaultCooldown from './_cooldown.js'
|
|
30
|
+
import * as defaultLimiter from './_rate_limiter.js'
|
|
31
|
+
import { logger as defaultLogger } from './_logger.js'
|
|
32
|
+
import {
|
|
33
|
+
startSpan,
|
|
34
|
+
endSpanOk,
|
|
35
|
+
endSpanError,
|
|
36
|
+
remoteParentFromTraceparent
|
|
37
|
+
} from './_tracing.js'
|
|
38
|
+
import { STATUS_INCOMPLETE, WARNING_CANCELLED } from '#core/status.js'
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
42
|
+
* @param {{
|
|
43
|
+
* resolveAdapter?: (provider: string) => (
|
|
44
|
+
* env: import('#core/envelope.js').CallEnvelope,
|
|
45
|
+
* deps?: {signal?: AbortSignal, log?: any, span?: any}
|
|
46
|
+
* ) => AsyncGenerator<import('#core/events.js').Event>,
|
|
47
|
+
* resolveSpec?: (key: string) => any,
|
|
48
|
+
* resolveProviderLimits?: (provider: string) => any,
|
|
49
|
+
* cooldown?: any,
|
|
50
|
+
* limiter?: any,
|
|
51
|
+
* logger?: any,
|
|
52
|
+
* sleep?: (ms: number) => Promise<void>,
|
|
53
|
+
* signal?: AbortSignal
|
|
54
|
+
* }} [options]
|
|
55
|
+
* @returns {AsyncGenerator<import('#core/events.js').Event>}
|
|
56
|
+
*/
|
|
57
|
+
export async function * run (envelope, {
|
|
58
|
+
resolveAdapter = getAdapter,
|
|
59
|
+
resolveSpec = getSpec,
|
|
60
|
+
resolveProviderLimits = getProviderLimits,
|
|
61
|
+
cooldown = defaultCooldown,
|
|
62
|
+
limiter = defaultLimiter,
|
|
63
|
+
logger = defaultLogger,
|
|
64
|
+
sleep = defaultSleep,
|
|
65
|
+
signal
|
|
66
|
+
} = {}) {
|
|
67
|
+
// Honor the `model:effort` shortcut on the wire (mirrors the
|
|
68
|
+
// factory-side `mohdel().use('model:effort')` convenience). If
|
|
69
|
+
// the envelope's `model` field ends in `:<effort>` and the base
|
|
70
|
+
// resolves to a known spec, split the suffix into
|
|
71
|
+
// `envelope.outputEffort`. Explicit `outputEffort` wins when both
|
|
72
|
+
// are set (suffix is a shortcut, not an override).
|
|
73
|
+
const effortNorm = normalizeModelEffort(envelope, resolveSpec)
|
|
74
|
+
if (effortNorm.error) { yield effortNorm.error; return }
|
|
75
|
+
envelope = effortNorm.envelope
|
|
76
|
+
|
|
77
|
+
const span = openSpan(envelope)
|
|
78
|
+
const log = scopedLogger(logger, envelope, span)
|
|
79
|
+
const startedAt = Date.now()
|
|
80
|
+
|
|
81
|
+
log.debug({
|
|
82
|
+
provider: envelope.provider,
|
|
83
|
+
model: envelope.model,
|
|
84
|
+
effort: envelope.outputEffort ?? 'default',
|
|
85
|
+
outputBudget: envelope.outputBudget ?? null,
|
|
86
|
+
tools: envelope.tools?.length || 0,
|
|
87
|
+
images: envelope.images?.length || 0
|
|
88
|
+
}, '[mohdel:answer] start')
|
|
89
|
+
|
|
90
|
+
let adapter
|
|
91
|
+
try {
|
|
92
|
+
adapter = resolveAdapter(envelope.provider)
|
|
93
|
+
} catch (e) {
|
|
94
|
+
// Distinguish "image-only provider invoked via answer" from
|
|
95
|
+
// truly-unknown. Novita-and-friends have no text adapter but a
|
|
96
|
+
// caller using `mohdel.use('novita/...').answer(...)` otherwise
|
|
97
|
+
// gets a bare "unknown provider" with no hint.
|
|
98
|
+
if (isImageProvider(envelope.provider)) {
|
|
99
|
+
const detail = `provider '${envelope.provider}' supports image generation only; use mohdel.image(...) instead`
|
|
100
|
+
const err = errorEvent(detail, 'PROVIDER_TEXT_NOT_SUPPORTED')
|
|
101
|
+
log.warn({ provider: envelope.provider }, '[mohdel:answer] image-only provider via answer')
|
|
102
|
+
endSpanError(span, new Error(detail))
|
|
103
|
+
yield err
|
|
104
|
+
return
|
|
105
|
+
}
|
|
106
|
+
const err = errorEvent(messageOf(e), 'SESSION_UNKNOWN_PROVIDER')
|
|
107
|
+
log.warn({ err: e, provider: envelope.provider }, '[mohdel:answer] unknown provider')
|
|
108
|
+
endSpanError(span, e)
|
|
109
|
+
yield err
|
|
110
|
+
return
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
const coolErr = cooldown.coolingDownError(envelope.provider)
|
|
114
|
+
if (coolErr) {
|
|
115
|
+
log.debug({ provider: envelope.provider, detail: coolErr.detail }, '[mohdel:cooldown] fast-fail')
|
|
116
|
+
span.setAttribute('mohdel.cooldown', true)
|
|
117
|
+
endSpanOk(span, { 'mohdel.status': 'cooldown' })
|
|
118
|
+
yield { type: 'error', error: coolErr }
|
|
119
|
+
return
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
const spec = resolveSpec(`${envelope.provider}/${envelope.model}`)
|
|
123
|
+
const providerCfg = resolveProviderLimits(envelope.provider) || {}
|
|
124
|
+
const rpmLimit = spec?.rpmLimit ?? providerCfg.rpmLimit
|
|
125
|
+
const tpmLimit = spec?.tpmLimit ?? providerCfg.tpmLimit
|
|
126
|
+
const bucketKey = (spec?.rateLimitScope === 'model')
|
|
127
|
+
? `${envelope.provider}/${envelope.model}`
|
|
128
|
+
: envelope.provider
|
|
129
|
+
|
|
130
|
+
// `0` is a killswitch ("deny all"), not "unset"; `undefined`/`null`
|
|
131
|
+
// means no limit configured for that dimension. Gate on nullability
|
|
132
|
+
// so an intentional `Some(0)` quota is enforced.
|
|
133
|
+
if (rpmLimit != null || tpmLimit != null) {
|
|
134
|
+
const delay = limiter.check(bucketKey, { rpmLimit, tpmLimit })
|
|
135
|
+
if (delay > 0) {
|
|
136
|
+
log.debug({ key: bucketKey, delayMs: delay }, '[mohdel:ratelimit] throttling')
|
|
137
|
+
span.setAttribute('mohdel.rate_limit_delay_ms', delay)
|
|
138
|
+
await sleep(delay)
|
|
139
|
+
}
|
|
140
|
+
limiter.recordRequest(bucketKey)
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
let sawTerminal = false
|
|
144
|
+
try {
|
|
145
|
+
for await (const ev of adapter(envelope, { signal, log, span })) {
|
|
146
|
+
if (ev.type === 'done') {
|
|
147
|
+
sawTerminal = true
|
|
148
|
+
// A cancelled terminal is the caller's action, not evidence
|
|
149
|
+
// of provider recovery — don't wipe an accumulated failure
|
|
150
|
+
// streak. Every other `done` state (completed /
|
|
151
|
+
// incomplete-budget / tool_use) IS a genuine provider-side
|
|
152
|
+
// success and resets the streak.
|
|
153
|
+
if (ev.result?.warning !== WARNING_CANCELLED) {
|
|
154
|
+
cooldown.reset(envelope.provider)
|
|
155
|
+
}
|
|
156
|
+
if (tpmLimit != null && ev.result) {
|
|
157
|
+
const total =
|
|
158
|
+
(ev.result.inputTokens || 0) +
|
|
159
|
+
(ev.result.outputTokens || 0) +
|
|
160
|
+
(ev.result.thinkingTokens || 0)
|
|
161
|
+
if (total > 0) limiter.recordTokens(bucketKey, total)
|
|
162
|
+
}
|
|
163
|
+
finalizeSpanOk(span, ev.result)
|
|
164
|
+
log.debug(summarizeDone(ev.result, startedAt), '[mohdel:answer] done')
|
|
165
|
+
} else if (ev.type === 'error') {
|
|
166
|
+
sawTerminal = true
|
|
167
|
+
recordFailureFromError(cooldown, envelope.provider, ev.error)
|
|
168
|
+
log.warn({
|
|
169
|
+
err: ev.error,
|
|
170
|
+
provider: envelope.provider,
|
|
171
|
+
totalMs: Date.now() - startedAt
|
|
172
|
+
}, '[mohdel:answer] failed')
|
|
173
|
+
endSpanError(span, new Error(ev.error?.message || 'adapter error'))
|
|
174
|
+
}
|
|
175
|
+
yield ev
|
|
176
|
+
}
|
|
177
|
+
} catch (e) {
|
|
178
|
+
if (signal?.aborted && !sawTerminal) {
|
|
179
|
+
const fallback = cancelledFallback()
|
|
180
|
+
finalizeSpanOk(span, fallback.result)
|
|
181
|
+
yield fallback
|
|
182
|
+
return
|
|
183
|
+
}
|
|
184
|
+
log.warn({ err: e, provider: envelope.provider }, '[mohdel:answer] adapter threw')
|
|
185
|
+
endSpanError(span, e)
|
|
186
|
+
yield errorEvent(messageOf(e), 'SESSION_ADAPTER_THREW')
|
|
187
|
+
return
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
if (!sawTerminal) {
|
|
191
|
+
if (signal?.aborted) {
|
|
192
|
+
const fallback = cancelledFallback()
|
|
193
|
+
finalizeSpanOk(span, fallback.result)
|
|
194
|
+
yield fallback
|
|
195
|
+
} else {
|
|
196
|
+
const err = 'adapter returned without a terminal event'
|
|
197
|
+
log.error({ provider: envelope.provider }, '[mohdel:answer] no terminal event')
|
|
198
|
+
endSpanError(span, new Error(err))
|
|
199
|
+
yield errorEvent(err, 'SESSION_ADAPTER_NO_TERMINAL')
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
/**
|
|
205
|
+
* Split an optional `:effort` suffix from `envelope.model`. If the
|
|
206
|
+
* base resolves to a known spec, rewrites `envelope.model` and sets
|
|
207
|
+
* `envelope.outputEffort` (unless already set). Emits a typed error
|
|
208
|
+
* when the suffix is present and the spec rejects it.
|
|
209
|
+
*
|
|
210
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
211
|
+
* @param {(key: string) => any} resolveSpec
|
|
212
|
+
* @returns {{
|
|
213
|
+
* envelope: import('#core/envelope.js').CallEnvelope,
|
|
214
|
+
* error?: import('#core/events.js').ErrorEvent
|
|
215
|
+
* }}
|
|
216
|
+
*/
|
|
217
|
+
function normalizeModelEffort (envelope, resolveSpec) {
|
|
218
|
+
const modelStr = envelope.model || ''
|
|
219
|
+
const colonIdx = modelStr.lastIndexOf(':')
|
|
220
|
+
if (colonIdx <= 0) return { envelope }
|
|
221
|
+
if (envelope.outputEffort) return { envelope } // explicit wins
|
|
222
|
+
|
|
223
|
+
const candidate = modelStr.slice(colonIdx + 1)
|
|
224
|
+
const base = modelStr.slice(0, colonIdx)
|
|
225
|
+
const baseSpec = resolveSpec(`${envelope.provider}/${base}`)
|
|
226
|
+
if (!baseSpec) return { envelope } // base not known — let full string fall through to not-found
|
|
227
|
+
|
|
228
|
+
if (!baseSpec.thinkingEffortLevels) {
|
|
229
|
+
return {
|
|
230
|
+
envelope,
|
|
231
|
+
error: errorEvent(
|
|
232
|
+
`Model '${envelope.provider}/${base}' does not support output effort (no thinkingEffortLevels). Cannot use ':${candidate}' suffix.`,
|
|
233
|
+
'SESSION_INVALID_OUTPUT_EFFORT'
|
|
234
|
+
)
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
if (candidate !== 'none' && !baseSpec.thinkingEffortLevels[candidate]) {
|
|
238
|
+
return {
|
|
239
|
+
envelope,
|
|
240
|
+
error: errorEvent(
|
|
241
|
+
`Model '${envelope.provider}/${base}' does not support output effort level '${candidate}'. Available: ${Object.keys(baseSpec.thinkingEffortLevels).join(', ')}`,
|
|
242
|
+
'SESSION_INVALID_OUTPUT_EFFORT'
|
|
243
|
+
)
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
return {
|
|
248
|
+
envelope: { ...envelope, model: base, outputEffort: candidate }
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
|
|
252
|
+
/**
|
|
253
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
254
|
+
*/
|
|
255
|
+
function openSpan (envelope) {
|
|
256
|
+
const parent = envelope.traceparent
|
|
257
|
+
? remoteParentFromTraceparent(envelope.traceparent)
|
|
258
|
+
: null
|
|
259
|
+
/** @type {Record<string, any>} */
|
|
260
|
+
const attrs = {
|
|
261
|
+
'gen_ai.request.model': envelope.model,
|
|
262
|
+
'gen_ai.system': envelope.provider,
|
|
263
|
+
'mohdel.call_id': envelope.callId,
|
|
264
|
+
'mohdel.auth_id': envelope.authId
|
|
265
|
+
}
|
|
266
|
+
if (envelope.outputBudget) attrs['gen_ai.request.max_tokens'] = envelope.outputBudget
|
|
267
|
+
if (envelope.outputEffort) attrs['mohdel.output_effort'] = envelope.outputEffort
|
|
268
|
+
return startSpan('mohdel.session.answer', attrs, parent)
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
/**
|
|
272
|
+
* @param {any} logger
|
|
273
|
+
* @param {import('#core/envelope.js').CallEnvelope} envelope
|
|
274
|
+
* @param {any} span
|
|
275
|
+
*/
|
|
276
|
+
function scopedLogger (logger, envelope, span) {
|
|
277
|
+
const ctx = span?.spanContext?.() || {}
|
|
278
|
+
return logger.withContext({
|
|
279
|
+
callId: envelope.callId,
|
|
280
|
+
authId: envelope.authId,
|
|
281
|
+
provider: envelope.provider,
|
|
282
|
+
model: envelope.model,
|
|
283
|
+
traceId: ctx.traceId,
|
|
284
|
+
spanId: ctx.spanId
|
|
285
|
+
})
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
/**
|
|
289
|
+
* @param {any} span
|
|
290
|
+
* @param {any} result
|
|
291
|
+
*/
|
|
292
|
+
function finalizeSpanOk (span, result) {
|
|
293
|
+
/** @type {Record<string, any>} */
|
|
294
|
+
const attrs = {
|
|
295
|
+
'gen_ai.usage.input_tokens': result?.inputTokens || 0,
|
|
296
|
+
'gen_ai.usage.output_tokens': result?.outputTokens || 0,
|
|
297
|
+
'mohdel.thinking_tokens': result?.thinkingTokens || 0,
|
|
298
|
+
'mohdel.status': result?.status || 'unknown'
|
|
299
|
+
}
|
|
300
|
+
if (result?.cost != null) attrs['mohdel.cost'] = result.cost
|
|
301
|
+
if (result?.warning) attrs['mohdel.warning'] = result.warning
|
|
302
|
+
if (result?.timestamps?.start && result?.timestamps?.first) {
|
|
303
|
+
try {
|
|
304
|
+
const start = BigInt(result.timestamps.start)
|
|
305
|
+
const first = BigInt(result.timestamps.first)
|
|
306
|
+
if (first > start) attrs['mohdel.time_to_first_token_ms'] = Number(first - start) / 1e6
|
|
307
|
+
} catch {
|
|
308
|
+
// ignore parse failures — timestamps may be zero/unknown strings
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
endSpanOk(span, attrs)
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* @param {any} result
|
|
316
|
+
* @param {number} startedAt
|
|
317
|
+
*/
|
|
318
|
+
function summarizeDone (result, startedAt) {
|
|
319
|
+
return {
|
|
320
|
+
status: result?.status,
|
|
321
|
+
in: result?.inputTokens || 0,
|
|
322
|
+
out: result?.outputTokens || 0,
|
|
323
|
+
think: result?.thinkingTokens || 0,
|
|
324
|
+
cost: result?.cost,
|
|
325
|
+
warning: result?.warning,
|
|
326
|
+
totalMs: Date.now() - startedAt
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
/**
|
|
331
|
+
* Cooldown accounting rules:
|
|
332
|
+
* - 401/403 (AUTH_INVALID) → immediate cooldown, 1 failure.
|
|
333
|
+
* - Non-retryable client errors (400/404) → skip cooldown entirely.
|
|
334
|
+
* - 429/5xx/network errors → normal consecutive-failure tracking.
|
|
335
|
+
*
|
|
336
|
+
* @param {{recordFailure: (k: string, o?: {immediate?: boolean}) => boolean}} cooldown
|
|
337
|
+
* @param {string} provider
|
|
338
|
+
* @param {import('#core/errors.js').TypedError | undefined} err
|
|
339
|
+
*/
|
|
340
|
+
function recordFailureFromError (cooldown, provider, err) {
|
|
341
|
+
if (!err) return
|
|
342
|
+
if (err.type === 'AUTH_INVALID') {
|
|
343
|
+
cooldown.recordFailure(provider, { immediate: true })
|
|
344
|
+
return
|
|
345
|
+
}
|
|
346
|
+
if (err.retryable === false) return
|
|
347
|
+
cooldown.recordFailure(provider)
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/** @returns {import('#core/events.js').DoneEvent} */
|
|
351
|
+
function cancelledFallback () {
|
|
352
|
+
const now = String(process.hrtime.bigint())
|
|
353
|
+
return {
|
|
354
|
+
type: 'done',
|
|
355
|
+
result: {
|
|
356
|
+
status: STATUS_INCOMPLETE,
|
|
357
|
+
output: null,
|
|
358
|
+
inputTokens: 0,
|
|
359
|
+
outputTokens: 0,
|
|
360
|
+
thinkingTokens: 0,
|
|
361
|
+
cost: 0,
|
|
362
|
+
timestamps: { start: now, first: now, end: now },
|
|
363
|
+
warning: WARNING_CANCELLED
|
|
364
|
+
}
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
/**
|
|
369
|
+
* @param {string} message
|
|
370
|
+
* @param {string} type
|
|
371
|
+
* @returns {import('#core/events.js').ErrorEvent}
|
|
372
|
+
*/
|
|
373
|
+
function errorEvent (message, type) {
|
|
374
|
+
return {
|
|
375
|
+
type: 'error',
|
|
376
|
+
error: {
|
|
377
|
+
message,
|
|
378
|
+
severity: 'error',
|
|
379
|
+
retryable: false,
|
|
380
|
+
type
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/** @param {unknown} e */
|
|
386
|
+
function messageOf (e) {
|
|
387
|
+
return e instanceof Error ? e.message : String(e)
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/** @param {number} ms */
|
|
391
|
+
function defaultSleep (ms) {
|
|
392
|
+
return new Promise(resolve => setTimeout(resolve, ms))
|
|
393
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dispatch an ImageEnvelope to the matching image adapter.
|
|
3
|
+
*
|
|
4
|
+
* Image generation is a single request/response — no streaming — so
|
|
5
|
+
* this returns a Promise of `ImageResult` rather than an event
|
|
6
|
+
* generator. On adapter failure, the resolved error is a `TypedError`
|
|
7
|
+
* (structured, serializable) rather than a thrown JS `Error`.
|
|
8
|
+
*
|
|
9
|
+
* The image path skips rate-limit and cooldown — images are
|
|
10
|
+
* low-frequency one-shots that don't justify the per-call tracking
|
|
11
|
+
* overhead. Wire it here if that assumption ever changes.
|
|
12
|
+
*
|
|
13
|
+
* @module session/run_image
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { getImageAdapter } from './adapters/image/index.js'
|
|
17
|
+
import { classifyProviderError } from './adapters/_errors.js'
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* @param {import('#core/image.js').ImageEnvelope} envelope
|
|
21
|
+
* @param {{
|
|
22
|
+
* resolveAdapter?: (provider: string) => (
|
|
23
|
+
* env: import('#core/image.js').ImageEnvelope,
|
|
24
|
+
* deps?: any
|
|
25
|
+
* ) => Promise<import('#core/image.js').ImageResult>,
|
|
26
|
+
* spec?: any
|
|
27
|
+
* }} [options]
|
|
28
|
+
* @returns {Promise<
|
|
29
|
+
* | {ok: true, result: import('#core/image.js').ImageResult}
|
|
30
|
+
* | {ok: false, error: import('#core/errors.js').TypedError}
|
|
31
|
+
* >}
|
|
32
|
+
*/
|
|
33
|
+
export async function runImage (envelope, { resolveAdapter = getImageAdapter, spec } = {}) {
|
|
34
|
+
let adapter
|
|
35
|
+
try {
|
|
36
|
+
adapter = resolveAdapter(envelope.provider)
|
|
37
|
+
} catch (e) {
|
|
38
|
+
return {
|
|
39
|
+
ok: false,
|
|
40
|
+
error: {
|
|
41
|
+
message: messageOf(e),
|
|
42
|
+
severity: 'error',
|
|
43
|
+
retryable: false,
|
|
44
|
+
type: 'SESSION_UNKNOWN_PROVIDER'
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
try {
|
|
50
|
+
const result = await adapter(envelope, spec ? { spec } : {})
|
|
51
|
+
return { ok: true, result }
|
|
52
|
+
} catch (e) {
|
|
53
|
+
const typed = /** @type {any} */(e).typed || classifyProviderError(e)
|
|
54
|
+
return { ok: false, error: typed }
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** @param {unknown} e */
|
|
59
|
+
function messageOf (e) {
|
|
60
|
+
return e instanceof Error ? e.message : String(e)
|
|
61
|
+
}
|