mohdel 0.90.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +377 -0
  3. package/config/benchmarks.json +39 -0
  4. package/js/client/call.js +75 -0
  5. package/js/client/call_image.js +82 -0
  6. package/js/client/gate-binary.js +72 -0
  7. package/js/client/index.js +16 -0
  8. package/js/client/ndjson.js +29 -0
  9. package/js/client/transport.js +48 -0
  10. package/js/core/envelope.js +141 -0
  11. package/js/core/errors.js +75 -0
  12. package/js/core/events.js +96 -0
  13. package/js/core/image.js +58 -0
  14. package/js/core/index.js +10 -0
  15. package/js/core/status.js +48 -0
  16. package/js/factory/bridge.js +372 -0
  17. package/js/session/_cooldown.js +114 -0
  18. package/js/session/_logger.js +138 -0
  19. package/js/session/_rate_limiter.js +77 -0
  20. package/js/session/_tracing.js +58 -0
  21. package/js/session/adapters/_cancelled.js +44 -0
  22. package/js/session/adapters/_catalog.js +58 -0
  23. package/js/session/adapters/_chat_completions.js +439 -0
  24. package/js/session/adapters/_errors.js +85 -0
  25. package/js/session/adapters/_images.js +60 -0
  26. package/js/session/adapters/_lazy_json_cache.js +76 -0
  27. package/js/session/adapters/_pricing.js +67 -0
  28. package/js/session/adapters/_providers.js +60 -0
  29. package/js/session/adapters/_tools.js +185 -0
  30. package/js/session/adapters/_videos.js +283 -0
  31. package/js/session/adapters/anthropic.js +397 -0
  32. package/js/session/adapters/cerebras.js +28 -0
  33. package/js/session/adapters/deepseek.js +32 -0
  34. package/js/session/adapters/echo.js +51 -0
  35. package/js/session/adapters/fake.js +262 -0
  36. package/js/session/adapters/fireworks.js +46 -0
  37. package/js/session/adapters/gemini.js +381 -0
  38. package/js/session/adapters/groq.js +23 -0
  39. package/js/session/adapters/image/fake.js +55 -0
  40. package/js/session/adapters/image/index.js +40 -0
  41. package/js/session/adapters/image/novita.js +135 -0
  42. package/js/session/adapters/image/openai.js +50 -0
  43. package/js/session/adapters/index.js +53 -0
  44. package/js/session/adapters/mistral.js +31 -0
  45. package/js/session/adapters/novita.js +29 -0
  46. package/js/session/adapters/openai.js +381 -0
  47. package/js/session/adapters/openrouter.js +66 -0
  48. package/js/session/adapters/xai.js +27 -0
  49. package/js/session/bin.js +54 -0
  50. package/js/session/driver.js +160 -0
  51. package/js/session/index.js +18 -0
  52. package/js/session/run.js +393 -0
  53. package/js/session/run_image.js +61 -0
  54. package/package.json +107 -0
  55. package/src/cli/ask.js +160 -0
  56. package/src/cli/backup.js +107 -0
  57. package/src/cli/bench.js +262 -0
  58. package/src/cli/check.js +123 -0
  59. package/src/cli/colored-logger.js +67 -0
  60. package/src/cli/colors.js +13 -0
  61. package/src/cli/default.js +39 -0
  62. package/src/cli/index.js +150 -0
  63. package/src/cli/json-output.js +60 -0
  64. package/src/cli/model.js +571 -0
  65. package/src/cli/onboard.js +232 -0
  66. package/src/cli/rank.js +176 -0
  67. package/src/cli/ratelimit.js +160 -0
  68. package/src/cli/tag.js +105 -0
  69. package/src/lib/assets/alibaba.svg +1 -0
  70. package/src/lib/assets/anthropic.svg +5 -0
  71. package/src/lib/assets/deepseek.svg +1 -0
  72. package/src/lib/assets/gemini.svg +1 -0
  73. package/src/lib/assets/google.svg +2 -0
  74. package/src/lib/assets/kwaipilot.svg +1 -0
  75. package/src/lib/assets/meta.svg +1 -0
  76. package/src/lib/assets/minimax.svg +9 -0
  77. package/src/lib/assets/moonshotai.svg +4 -0
  78. package/src/lib/assets/openai.svg +5 -0
  79. package/src/lib/assets/xai.svg +1 -0
  80. package/src/lib/assets/xiaomi.svg +2 -0
  81. package/src/lib/assets/zai.svg +219 -0
  82. package/src/lib/benchmark-score.js +215 -0
  83. package/src/lib/benchmark-truth.js +68 -0
  84. package/src/lib/cache.js +76 -0
  85. package/src/lib/common.js +208 -0
  86. package/src/lib/cooldown.js +63 -0
  87. package/src/lib/creators.js +71 -0
  88. package/src/lib/curated-cache.js +146 -0
  89. package/src/lib/errors.js +126 -0
  90. package/src/lib/index.js +726 -0
  91. package/src/lib/logger.js +29 -0
  92. package/src/lib/providers.js +87 -0
  93. package/src/lib/rank.js +390 -0
  94. package/src/lib/rate-limiter.js +50 -0
  95. package/src/lib/schema.js +150 -0
  96. package/src/lib/select.js +474 -0
  97. package/src/lib/tracing.js +62 -0
  98. package/src/lib/utils.js +85 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 Christophe Le Bars <clb@toort.net>
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,377 @@
1
+ # Mohdel
2
+
3
+ Self-hosted LLM gateway with an embeddable SDK. Process-isolated, OpenTelemetry-native inference across 11 providers — streaming, tools, thinking control, image generation — without orchestration. Run `thin-gate` as a subprocess for fault isolation, cross-process quota, and any-language HTTP callers; or use the Node factory in-process for CLI tools, scripts, and single-process services.
4
+
5
+ Providers: Anthropic, OpenAI, Gemini, Mistral, Groq, xAI, Cerebras, Fireworks, DeepSeek, OpenRouter, Novita.
6
+
7
+ Node 22+, ES modules.
8
+
9
+ This README covers install, the `mo` CLI, and configuration. For the JS library guide see [INTEGRATION.md](INTEGRATION.md). For design rationale see [ARCHITECTURE.md](ARCHITECTURE.md). For logging conventions see [LOGGING.md](LOGGING.md).
10
+
11
+ Three planes: JS client over a unix socket, Rust thin-gate as the scheduler + state owner, JS session as the provider executor. The `mohdel()` factory path runs the same session inline for single-process consumers. See the **Architecture** section below for a tour.
12
+
13
+ ## What mohdel is not
14
+
15
+ Scope-capping is deliberate. If you're shopping for any of the following, mohdel is the wrong layer — use it *alongside* your framework of choice, not instead of it.
16
+
17
+ - **Not an orchestrator.** No chains, no agents, no memory, no prompt templates, no retrieval. Wrap mohdel with LangChain, LangGraph, LlamaIndex, Vercel AI SDK, or your own tool loop — mohdel exposes the inference primitive, orchestration stays in your application.
18
+ - **Not a retry / fallback engine.** Errors are classified (`retryable`, `severity`, `type`) so the caller can decide, but mohdel never retries or swaps models silently. Silent model-swapping would conflict with existing multi-model logic upstream; the caller owns the retry budget and fallback choice.
19
+ - **Not a response cache.** The `cache: true` flag on envelopes is for provider-side prompt caching (Anthropic, OpenAI) — not mohdel-level memoization. Caching inference *results* is orchestration-policy territory and depends on invariants only the caller knows.
20
+ - **Not a context-window / token manager.** No pre-call token count, no projected-cost guard. The caller owns what goes in the prompt and is the source of truth for what counts.
21
+ - **Not a SaaS proxy.** Self-hosted. Your API keys, your infra. No routing through a third party, no vendor lock-in.
22
+
23
+ See [ARCHITECTURE.md §Design principles](ARCHITECTURE.md#design-principles) for the full rationale behind each.
24
+
25
+ ## Observability out of the box
26
+
27
+ Every call emits:
28
+
29
+ - **OpenTelemetry span** (`mohdel.session.answer`) under the caller's `traceparent`, with GenAI semantic-convention attributes (`gen_ai.request.model`, `gen_ai.system`, `gen_ai.usage.input_tokens`, `gen_ai.usage.output_tokens`) plus mohdel's own (`mohdel.status`, `mohdel.cost`, `mohdel.thinking_tokens`, `mohdel.time_to_first_token_ms`, `mohdel.cooldown` on fast-fail).
30
+ - **Trace-linked logs** — every stderr log line carries `{traceId, spanId, callId, authId, provider, model}`. Dump logs + traces into the same collector (SigNoz, Honeycomb, Jaeger + Loki) and they're correlated for free. No per-call instrumentation code.
31
+ - **Gate-side OTLP metrics** (when running `thin-gate`): `mohdel.sessions.{alive,respawned,spawn_failures}`, `mohdel.calls{provider,status}`, `mohdel.call.duration_ms`, `mohdel.cooldown.rejections`, `mohdel.quota.rejections`, `mohdel.policy.errors`.
32
+
33
+ One endpoint for everything: set `OTEL_EXPORTER_OTLP_ENDPOINT` and spans + metrics flow to it over gRPC. No-op when unset — zero overhead for callers who aren't wired. See [INTEGRATION.md §OpenTelemetry](INTEGRATION.md#opentelemetry) and [LOGGING.md](LOGGING.md) for details.
34
+
35
+ The OTel SDK packages (`@opentelemetry/sdk-node`, `@opentelemetry/exporter-trace-otlp-grpc`) are **`optionalDependencies`** — installed by default, but `npm install --omit=optional` skips them (along with their gRPC transitive tree). If you do that and later want trace export, install them explicitly:
36
+
37
+ ```bash
38
+ npm install @opentelemetry/sdk-node @opentelemetry/exporter-trace-otlp-grpc
39
+ ```
40
+
41
+ `@opentelemetry/api` stays in `dependencies` — the no-op tracer needs it regardless of whether export is wired.
42
+
43
+ ## Quick Start
44
+
45
+ ```bash
46
+ npm install -g mohdel
47
+ mo # interactive setup — pick a provider, paste your API key
48
+ mo ask gemini/gemini-3-flash-preview "why is the sky blue"
49
+ ```
50
+
51
+ That's it. `mo` guides you through getting an API key (Gemini, Groq, and Cerebras all have free tiers).
52
+
53
+ Model IDs always use the `<provider>/<model>` format:
54
+
55
+ ```
56
+ gemini/gemini-3-flash-preview
57
+ anthropic/claude-sonnet-4-6
58
+ openai/gpt-5.4-mini
59
+ groq/llama-4-scout-17b-16e-instruct
60
+ ```
61
+
62
+ ## CLI
63
+
64
+ ```bash
65
+ # One-shot inference — pipeable
66
+ mo ask anthropic/claude-sonnet-4-6 "explain monads"
67
+ cat article.txt | mo ask openai/gpt-5.4 "summarize in 3 bullets"
68
+ echo "hello" | mo ask gemini/gemini-3-flash-preview --json | jq .cost
69
+
70
+ # Streaming
71
+ mo ask anthropic/claude-sonnet-4-6 --stream "write a haiku about recursion"
72
+
73
+ # With thinking effort
74
+ mo ask anthropic/claude-opus-4-6 --effort high "prove P != NP"
75
+
76
+ # Browse the model catalog
77
+ mo ls # list all curated models
78
+ mo ls --sort price # sorted by input price
79
+ mo search sonnet # filter by name/label
80
+ mo show anthropic/claude-sonnet-4-6 # model details
81
+ mo stats # catalog summary
82
+ mo providers # providers with key status & rate limits
83
+
84
+ # Rank models by benchmarks
85
+ mo rank # curated models, balanced weights
86
+ mo rank --use-case tool-loop # weighted for tool reliability
87
+ mo rank --json # machine-readable
88
+
89
+ # Manage the catalog
90
+ mo curate anthropic # add new models from a provider
91
+ mo setup anthropic # configure API key
92
+ mo model add fireworks/deepseek-r1 # add a model manually
93
+ mo model set <model> <key> <value> # set any field on a model
94
+ mo model rm <model> <key> # remove a field
95
+ mo check # validate schema + upstream drift
96
+
97
+ # Rate limits
98
+ mo rl show anthropic # provider or model limits
99
+ mo rl set anthropic/claude-sonnet-4-6 60 100000
100
+
101
+ # Benchmark with live inference
102
+ mo bench anthropic/claude-sonnet-4-6 # single model
103
+ mo bench --tag fast --effort low # suite by tag
104
+ ```
105
+
106
+ All list/show commands support `--json [fields]` — bare `--json` lists available fields (like `gh`).
107
+
108
+ ## Library Usage
109
+
110
+ Two integration paths: the **client** (primary, cross-process) and the **factory** (in-process shortcut).
111
+
112
+ ### Client — cross-process (recommended)
113
+
114
+ ```js
115
+ import { call } from 'mohdel/client'
116
+
117
+ const envelope = {
118
+ callId: 'c-1', authId: 'u-1', auth: { key: process.env.ANTHROPIC_API_SK },
119
+ model: 'anthropic/claude-haiku-4-5', prompt: 'Hello'
120
+ }
121
+
122
+ for await (const ev of call(envelope, { socketPath: '/tmp/mohdel-data.sock' })) {
123
+ if (ev.type === 'delta') process.stdout.write(ev.delta.delta)
124
+ else if (ev.type === 'done') console.log('\n→', ev.result.cost)
125
+ }
126
+ ```
127
+
128
+ Requires a running `thin-gate` subprocess. See [INTEGRATION.md §Client](INTEGRATION.md#client-cross-process--primary-production-integration) for setup.
129
+
130
+ ### Factory — in-process shortcut
131
+
132
+ ```js
133
+ import mohdel from 'mohdel'
134
+
135
+ const mo = await mohdel()
136
+ const result = await mo.use('anthropic/claude-sonnet-4-6').answer('Hello')
137
+ console.log(result.output, result.cost)
138
+ ```
139
+
140
+ No subprocess; the factory runs the same session adapters inline. Right for CLI (`mo ask`), scripts, tests, single-process services.
141
+
142
+ For the full API — initialization, alias resolution, answer options, response shape, tool use, streaming, vision, error handling, OpenTelemetry, sub-path exports — see **[INTEGRATION.md](INTEGRATION.md)**.
143
+
144
+ ## Architecture
145
+
146
+ Mohdel splits into three planes that can be deployed independently:
147
+
148
+ ```
149
+ ┌──────────┐ unix ┌─────────────┐ stdin/stdout ┌──────────┐
150
+ │ client │ socket │ thin-gate │ NDJSON │ session │ × N
151
+ caller ──► │ (JS) │ ─HTTP─►│ (Rust) │ ─────────────► │ (JS) │
152
+ └──────────┘ └─────────────┘ └──────────┘
153
+
154
+ ▼ admin plane (unix socket, HTTP)
155
+ GET /v1/health
156
+ ```
157
+
158
+ - **`mohdel/client`** (JS) — thin stub that callers import. Opens a unix socket to thin-gate, sends a `CallEnvelope`, receives an async-iterable of `Event`s. Zero transitive provider-SDK imports — caller-side code stays light.
159
+ - **`mohdel-thin-gate`** (Rust binary, prebuilt and shipped via the `mohdel-thin-gate-<platform>` npm sub-packages) — scheduler / state owner / supervisor. Binds the data-plane socket, validates the envelope, dispatches to a pooled session subprocess, relays events back, handles graceful cancellation on client disconnect. Binds the admin plane for `GET /v1/health`. Pushes OTLP metrics (sessions alive/respawned, calls by provider/status, call-duration histogram, cooldown / quota / policy rejections) when `OTEL_EXPORTER_OTLP_ENDPOINT` is set. Internal trait hooks (`RoutePolicy`, `QuotaPolicy`, `ConfigSource`, `CachePolicy`) make the crate testable and fork-friendly for deployments that need bespoke policy — not a published-library surface.
160
+ - **`mohdel/session`** (JS subprocess) — provider executor. Spawned by thin-gate, reads envelopes from stdin, dispatches to the matching adapter, writes events to stdout. A napi-rs addon was scoped for hot-loop optimization but current benchmarks show per-call JS CPU is not the bottleneck; the stub stays under `rust/napi-addon/` for future reactivation.
161
+
162
+ ### Running thin-gate
163
+
164
+ ```bash
165
+ cargo run --bin mohdel-thin-gate /tmp/mohdel-data.sock /tmp/mohdel-admin.sock /path/to/js/session/bin.js
166
+
167
+ # or with a pre-built release binary:
168
+ ./target/release/mohdel-thin-gate /tmp/mohdel-data.sock /tmp/mohdel-admin.sock ./js/session/bin.js
169
+ ```
170
+
171
+ Positional args are optional (data socket, admin socket, session bin). Env overrides:
172
+ - `MOHDEL_SESSION_BIN` — path to session entrypoint (defaults to none; if unset, data plane returns synthetic events)
173
+ - `MOHDEL_SESSION_POOL_SIZE` — pre-warmed sessions (default 2)
174
+
175
+ With no session-bin configured, thin-gate runs in demo mode: `POST /v1/call` returns a synthetic echo event sequence. Useful for health-checking the HTTP layer without a runtime dependency on Node.
176
+
177
+ ### Calling from JS
178
+
179
+ ```js
180
+ import { call } from 'mohdel/client'
181
+
182
+ const envelope = {
183
+ callId: 'c-1',
184
+ authId: 'u-1',
185
+ auth: { key: process.env.ANTHROPIC_API_SK },
186
+ model: 'anthropic/claude-haiku-4-5',
187
+ prompt: 'Say hi.',
188
+ outputBudget: 100
189
+ }
190
+
191
+ for await (const ev of call(envelope, { socketPath: '/tmp/mohdel-data.sock' })) {
192
+ if (ev.type === 'delta') process.stdout.write(ev.delta.delta)
193
+ else if (ev.type === 'done') console.log('\n→ status:', ev.result.status, 'cost:', ev.result.cost)
194
+ else if (ev.type === 'error') console.error('error:', ev.error.message)
195
+ }
196
+ ```
197
+
198
+ Client surface is deliberately tiny: `call(envelope, { socketPath, signal? })`. Pass an `AbortSignal` to cancel in flight; thin-gate will forward a cancel control message to the session and reuse it on the pool. The envelope is the flat `answer(prompt, options)` surface plus transport metadata (`callId`, `authId`, `auth.key`, optional `traceparent`); see [`js/core/envelope.js`](js/core/envelope.js) for the full field list.
199
+
200
+ ### Canonical types (frozen wire contract)
201
+
202
+ Wire format is JSON over NDJSON frames, camelCase. Types are defined in `js/core/` (JSDoc) and mirrored in `rust/thin-gate/src/protocol.rs` (serde). Cross-language conformance tests enforce round-trip fidelity. The session-side protocol (envelopes in, events out, cancel control messages) is specified in [PROTOCOL.md](PROTOCOL.md) — read that to implement a session in another language.
203
+
204
+ - **`CallEnvelope`** — flat `answer()` options plus transport metadata: `callId`, `authId`, `auth.key`, `traceparent?`, `baggage?`, `provider`, `model`, `prompt`, `outputBudget?`, `outputType?`, `outputStyle?`, `outputEffort?`, `images?`, `videos?`, `cache?`, `tools?`, `toolChoice?`, `parallelToolCalls?`, `identifier?`.
205
+ - **`Event`** — three-variant union discriminated on `type`:
206
+ - `{ type: 'delta', delta: { type: 'message' | 'function_call', delta: string } }`
207
+ - `{ type: 'done', result: AnswerResult }`
208
+ - `{ type: 'error', error: TypedError }`
209
+ - **`AnswerResult`** — `status`, `output`, `inputTokens`, `outputTokens`, `thinkingTokens`, `cost` (single number), `timestamps`, `warning?`, `toolCalls?`.
210
+ - **`Status`** — `'completed' | 'tool_use' | 'incomplete'`.
211
+ - **`Warning`** — additive string union: `'insufficientOutputBudget'`, `'cancelled'`, ...
212
+ - **`TypedError`** — `{ message, detail?, severity, retryable, type }`. `message` is a stable machine key; `detail` is user-facing context; `severity` is `'trace' | 'debug' | 'info' | 'warn' | 'error' | 'fatal'`; `type` is an optional canonical tag (e.g. `'AUTH_INVALID'`, `'PROVIDER_COOLDOWN'`).
213
+
214
+ A `cancel` control message `{ op: "cancel", callId }` on session stdin aborts the matching in-flight call.
215
+
216
+ Extending the frozen wire types is breaking — additive changes only on trait method sets and non-frozen internals. See [ARCHITECTURE.md §What isn't frozen](ARCHITECTURE.md#what-isnt-frozen) for the refinable-vs-frozen split.
217
+
218
+ ### Adding a new provider adapter
219
+
220
+ See [CONTRIBUTING.md](CONTRIBUTING.md#adding-a-session-adapter-090). Short version:
221
+
222
+ 1. Create `js/session/adapters/<provider>.js` exporting `async function* <provider>(envelope, { client?, signal? })`.
223
+ 2. Map provider-native events to the canonical Event union.
224
+ 3. Pass `{ signal }` to the SDK's streaming method so cancellation aborts in-flight HTTP.
225
+ 4. On SDK throw: if `signal?.aborted`, return silently (run() emits call.cancelled); else yield `call.error` via `classifyProviderError(e)` from `./_errors.js`.
226
+ 5. Register in `js/session/adapters/index.js`.
227
+ 6. Write unit tests with a dependency-injected mock client.
228
+ 7. Optionally add a gated live test in `test/live/<provider>.live.test.js`.
229
+
230
+ ## Configuration
231
+
232
+ API keys live in `~/.config/mohdel/environment` (one `KEY=value` per line, loaded automatically):
233
+
234
+ ```
235
+ ANTHROPIC_API_SK=sk-ant-...
236
+ OPENAI_API_SK=sk-...
237
+ GEMINI_API_SK=AI...
238
+ GROQ_API_SK=gsk_...
239
+ XAI_API_SK=xai-...
240
+ CEREBRAS_API_SK=csk-...
241
+ MISTRAL_API_SK=...
242
+ FIREWORKS_API_SK=fw_...
243
+ DEEPSEEK_API_SK=sk-...
244
+ OPENROUTER_API_SK=sk-or-...
245
+ NOVITA_API_SK=...
246
+ ```
247
+
248
+ Only set keys for providers you use. Run `mo` with no arguments for interactive setup.
249
+
250
+ ### File locations
251
+
252
+ | Path | Purpose |
253
+ |------|---------|
254
+ | `~/.config/mohdel/environment` | API keys |
255
+ | `~/.config/mohdel/default.json` | Default model selection |
256
+ | `~/.config/mohdel/curated.json` | Model catalog with metadata, tags, pricing |
257
+ | `~/.config/mohdel/providers.json` | Provider-level rate limits |
258
+ | `~/.config/mohdel/excluded.json` | Excluded models |
259
+ | `~/.cache/mohdel/uploaded-files.json` | Gemini file upload cache |
260
+
261
+ Paths follow the [XDG convention](https://specifications.freedesktop.org/basedir-spec/basedir-spec-latest.html) via `env-paths`.
262
+
263
+ ## Provider Matrix
264
+
265
+ What each provider supports through mohdel's unified interface:
266
+
267
+ | Provider | Streaming | Tools | Vision | Video | Thinking | Notes |
268
+ |----------|-----------|-------|--------|-------|----------|-------|
269
+ | Anthropic | Yes | Yes | Yes | No | Yes (adaptive / budget) | `identifier` → `metadata.user_id` |
270
+ | OpenAI | Yes | Yes | Yes | No | Yes (o-series) | GPT-5 verbosity via `outputStyle` |
271
+ | Gemini | Yes | Yes | Yes | Yes | Yes (`thinkingLevel` / `thinkingBudget`) | Auto-uploads large videos; content-hashed cache |
272
+ | Cerebras | No | Yes | Yes | No | Yes (`reasoning_effort` or zai `disable_reasoning`) | Non-streaming chat completions |
273
+ | Groq | No | Yes | Yes | No | No | Non-streaming; shared chat-completions path |
274
+ | xAI | Yes | Yes | Yes | No | Auto | OpenAI Responses API over `api.x.ai/v1` |
275
+ | DeepSeek | No | Yes | Yes | No | No | DSML tool-call fallback when model emits tags in content |
276
+ | Fireworks | Yes | Yes | Yes | No | Yes (`reasoning_effort`) | OpenAI SDK + `baseURL`; model id auto-prefixed |
277
+ | Mistral | No | Yes | Yes | No | No | `tool_choice: "any"` = required |
278
+ | OpenRouter | Yes | Yes | Yes | No | Varies | Meta-provider; `providerOptions.openrouter` for routing prefs |
279
+ | Novita | No | No | No | No | No | Image generation only |
280
+
281
+ Adapter capability ≠ model capability — whether a given model accepts images, tools, or thinking effort depends on the model spec in `curated.json`. The adapter passes through what the envelope supplies; the provider rejects unsupported combos.
282
+
283
+ ## Local Development
284
+
285
+ ```bash
286
+ git clone <repo> && cd mohdel
287
+ npm install
288
+ npm test # unit tests, no API keys
289
+ ```
290
+
291
+ ### Rust tests
292
+
293
+ ```bash
294
+ cargo test --workspace # thin-gate + napi-addon
295
+ cargo build --release --bin mohdel-thin-gate
296
+ ```
297
+
298
+ Test files under `rust/thin-gate/tests/`:
299
+
300
+ | File | Coverage |
301
+ |------|----------|
302
+ | `conformance.rs` | JS↔Rust protocol round-trip |
303
+ | `protocol.rs` | serde (de)serialization of envelope/events/results |
304
+ | `server.rs` | HTTP layer, synthetic dispatch, 404/400 paths |
305
+ | `session_dispatch.rs` | real `node js/session/bin.js` spawn + dispatch + graceful cancel |
306
+ | `policy.rs` | `RoutePolicy` + `QuotaPolicy` + `Enforcer` end-to-end |
307
+ | `config.rs` | TOML `ConfigSource` parsing, defaults, malformed, env override |
308
+ | `supervision.rs` | readiness ping/pong + readiness timeout + garbage-response handling |
309
+ | `stress.rs` | 100 concurrent calls, cancel storm, session-death-under-load |
310
+
311
+ Spawning tests require `node` in PATH.
312
+
313
+ ### Provider integration tests
314
+
315
+ These hit real provider APIs. Models are drawn from your local `curated.json` — one per provider. Each provider block is skipped automatically when its API key is missing.
316
+
317
+ ```bash
318
+ npm run test:provider # all providers via the factory path
319
+ TAG=fast npm run test:provider # filter by model tag
320
+ npm run test:multiturn # multi-turn conversation tests (incl. tool round-trip)
321
+ npm run test:vision # image input tests
322
+ ```
323
+
324
+ ### Live adapter tests
325
+
326
+ Exercise the session adapters directly against real provider APIs. Gated on env keys; skipped cleanly when keys are absent. See `test/live/README.md` for details.
327
+
328
+ ```bash
329
+ ANTHROPIC_API_SK=sk-ant-... npm run test:live
330
+ OPENAI_API_SK=sk-... npm run test:live
331
+ ```
332
+
333
+ ### Scenario-driven testing (the `fake` provider)
334
+
335
+ For deterministic stress, benchmark, and bug-repro work, register `provider: "fake"` in the envelope with a JSON `prompt` that drives the scenario:
336
+
337
+ ```js
338
+ { mode: 'volume', tokens: 1000 } // throughput stress
339
+ { mode: 'slow', tokens: 50, delayMs: 100 } // streaming cadence
340
+ { mode: 'error', type: 'AUTH_INVALID' } // error classification
341
+ { mode: 'hang' } // cancel / timeout plumbing
342
+ { mode: 'tool', name: 'f', args: { x: 1 } } // tool round-trip
343
+ { mode: 'incomplete' } // status contract
344
+ { mode: 'crash' } // process isolation (exits the adapter process)
345
+ { mode: 'cancel_after', tokens: 5 } // cancel mid-stream
346
+ ```
347
+
348
+ All modes honor `AbortSignal`. The benchmarks in `bench/` use this to pin adapter work to a fixed shape and isolate what's being measured — see `bench/bench.js` (throughput) and `bench/isolation.js` (crash containment).
349
+
350
+ ### npm scripts
351
+
352
+ | Command | Description |
353
+ |---------|-------------|
354
+ | `npm test` | Unit tests (vitest) |
355
+ | `npm run test:provider` | Provider integration via the factory — real API calls |
356
+ | `npm run test:live` | Live session-adapter tests (env-key gated) |
357
+ | `npm run lint` | StandardJS lint |
358
+ | `npm run cli` | Interactive model picker |
359
+ | `cargo test --workspace` | Rust tests (thin-gate + protocol + policy + stress + ...) |
360
+ | `node bench/bench.js` | In-process vs via-gate throughput benchmark |
361
+ | `node bench/isolation.js` | Crash-isolation demo (in-process dies, via-gate contains) |
362
+
363
+ ## Contributing
364
+
365
+ Fork the repository and submit a pull request. Code style: Node 22+, ES modules, no semicolons, 2-space indent, single quotes (StandardJS). See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
366
+
367
+ **Mohdel's wire is language-agnostic.** The JS client is the first implementation, not the only one — a Python / Go / Ruby / Swift / Elixir / ... client is a great starter contribution. See [CONTRIBUTING.md §Porting a client to another language](CONTRIBUTING.md#porting-a-client-to-another-language) and [PROTOCOL.md](PROTOCOL.md).
368
+
369
+ ## See Also
370
+
371
+ - [INTEGRATION.md](INTEGRATION.md) — embed mohdel in your code (factory, model proxy, answer options, tool use, streaming, vision, errors, OTel)
372
+ - [ARCHITECTURE.md](ARCHITECTURE.md) — design decisions and rationale
373
+ - [LOGGING.md](LOGGING.md) — log levels, prefixes, pino integration
374
+
375
+ ## License
376
+
377
+ MIT. See `LICENSE`.
@@ -0,0 +1,39 @@
1
+ {
2
+ "benchmarks": {
3
+ "gpqa_score": { "weight": 0.20, "group": "analysis", "label": "GPQA Diamond", "scale": "0-1" },
4
+ "mmmu_pro_score": { "weight": 0.15, "group": "analysis", "label": "MMMU-Pro", "scale": "0-1" },
5
+ "mrcr_v2_score": { "weight": 0.10, "group": "analysis", "label": "MRCR v2", "scale": "0-1" },
6
+ "tau_bench_retail_score": { "weight": 0.20, "group": "tool_loop", "label": "Tau2 Retail", "scale": "0-1" },
7
+ "toolathlon_score": { "weight": 0.15, "group": "tool_loop", "label": "Toolathlon", "scale": "0-1" },
8
+ "swe_bench_verified_score": { "weight": 0.20, "group": "cowork", "label": "SWE-bench Verified", "scale": "0-1" }
9
+ },
10
+ "minCoverage": 2,
11
+ "defaultTop": 20,
12
+ "cacheTtlHours": 24,
13
+ "useCasePresets": {
14
+ "analysis": {
15
+ "gpqa_score": 0.35,
16
+ "mmmu_pro_score": 0.25,
17
+ "mrcr_v2_score": 0.20,
18
+ "tau_bench_retail_score": 0.10,
19
+ "toolathlon_score": 0.05,
20
+ "swe_bench_verified_score": 0.05
21
+ },
22
+ "tool_loop": {
23
+ "gpqa_score": 0.05,
24
+ "mmmu_pro_score": 0.05,
25
+ "mrcr_v2_score": 0.05,
26
+ "tau_bench_retail_score": 0.40,
27
+ "toolathlon_score": 0.25,
28
+ "swe_bench_verified_score": 0.20
29
+ },
30
+ "cowork": {
31
+ "gpqa_score": 0.10,
32
+ "mmmu_pro_score": 0.05,
33
+ "mrcr_v2_score": 0.05,
34
+ "tau_bench_retail_score": 0.10,
35
+ "toolathlon_score": 0.10,
36
+ "swe_bench_verified_score": 0.60
37
+ }
38
+ }
39
+ }
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Send a CallEnvelope to thin-gate; returns an async iterable of Events.
3
+ *
4
+ * Cancellation: pass an AbortSignal. Aborting destroys the HTTP request;
5
+ * thin-gate infers cancel from connection close and emits
6
+ * `call.cancelled` upstream.
7
+ *
8
+ * @module client/call
9
+ */
10
+
11
+ import { requestUnix } from './transport.js'
12
+ import { parseNDJSON } from './ndjson.js'
13
+ import { isEvent, MohdelTypedError } from '#core'
14
+
15
+ /**
16
+ * @param {import('#core/envelope.js').CallEnvelope} envelope
17
+ * @param {object} options
18
+ * @param {string} options.socketPath
19
+ * @param {AbortSignal} [options.signal]
20
+ * @param {string} [options.path] HTTP path; defaults to '/v1/call'
21
+ * @returns {AsyncGenerator<import('#core/events.js').Event>}
22
+ */
23
+ export async function * call (envelope, { socketPath, signal, path = '/v1/call' }) {
24
+ const res = await requestUnix({
25
+ socketPath,
26
+ path,
27
+ method: 'POST',
28
+ body: envelope,
29
+ signal
30
+ })
31
+
32
+ if (res.statusCode !== 200) {
33
+ const body = await readAll(res)
34
+ throw MohdelTypedError.fromJSON(parseErrorBody(body, res.statusCode ?? 0))
35
+ }
36
+
37
+ for await (const obj of parseNDJSON(res)) {
38
+ if (!isEvent(obj)) {
39
+ throw new MohdelTypedError(
40
+ 'received non-Event object from thin-gate',
41
+ { type: 'PROTOCOL_INVALID_EVENT', retryable: false }
42
+ )
43
+ }
44
+ yield /** @type {import('#core/events.js').Event} */(obj)
45
+ }
46
+ }
47
+
48
+ /**
49
+ * @param {AsyncIterable<Buffer|string>} stream
50
+ * @returns {Promise<string>}
51
+ */
52
+ async function readAll (stream) {
53
+ let s = ''
54
+ for await (const c of stream) s += typeof c === 'string' ? c : c.toString('utf8')
55
+ return s
56
+ }
57
+
58
+ /**
59
+ * @param {string} body
60
+ * @param {number} status
61
+ * @returns {import('#core/errors.js').TypedError}
62
+ */
63
+ function parseErrorBody (body, status) {
64
+ try {
65
+ const parsed = JSON.parse(body)
66
+ if (parsed && typeof parsed === 'object' && typeof parsed.type === 'string') {
67
+ return parsed
68
+ }
69
+ } catch {}
70
+ return {
71
+ type: 'PROTOCOL_HTTP_ERROR',
72
+ message: `thin-gate returned HTTP ${status}`,
73
+ retryable: status >= 500
74
+ }
75
+ }
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Send an ImageEnvelope to thin-gate's `POST /v1/image`.
3
+ *
4
+ * Image generation is one-shot: single JSON response body, no
5
+ * streaming, no cooldown/rate-limit.
6
+ *
7
+ * @module client/call_image
8
+ */
9
+
10
+ import { requestUnix } from './transport.js'
11
+ import { MohdelTypedError } from '#core'
12
+
13
+ /**
14
+ * @param {import('#core/image.js').ImageEnvelope} envelope
15
+ * @param {object} options
16
+ * @param {string} options.socketPath
17
+ * @param {AbortSignal} [options.signal]
18
+ * @param {string} [options.path] HTTP path; defaults to '/v1/image'
19
+ * @returns {Promise<import('#core/image.js').ImageResult>}
20
+ */
21
+ export async function callImage (envelope, { socketPath, signal, path = '/v1/image' }) {
22
+ const res = await requestUnix({
23
+ socketPath,
24
+ path,
25
+ method: 'POST',
26
+ body: envelope,
27
+ signal
28
+ })
29
+
30
+ const body = await readAll(res)
31
+
32
+ if (res.statusCode !== 200) {
33
+ throw MohdelTypedError.fromJSON(parseErrorBody(body, res.statusCode ?? 0))
34
+ }
35
+
36
+ let parsed
37
+ try {
38
+ parsed = JSON.parse(body)
39
+ } catch (e) {
40
+ throw new MohdelTypedError(
41
+ 'thin-gate returned non-JSON image response',
42
+ { type: 'PROTOCOL_INVALID_EVENT', retryable: false }
43
+ )
44
+ }
45
+
46
+ if (!parsed || typeof parsed !== 'object' || parsed.status !== 'completed' || !Array.isArray(parsed.images)) {
47
+ throw new MohdelTypedError(
48
+ 'thin-gate returned malformed ImageResult',
49
+ { type: 'PROTOCOL_INVALID_EVENT', retryable: false }
50
+ )
51
+ }
52
+ return parsed
53
+ }
54
+
55
+ /**
56
+ * @param {AsyncIterable<Buffer|string>} stream
57
+ * @returns {Promise<string>}
58
+ */
59
+ async function readAll (stream) {
60
+ let s = ''
61
+ for await (const c of stream) s += typeof c === 'string' ? c : c.toString('utf8')
62
+ return s
63
+ }
64
+
65
+ /**
66
+ * @param {string} body
67
+ * @param {number} status
68
+ * @returns {import('#core/errors.js').TypedError}
69
+ */
70
+ function parseErrorBody (body, status) {
71
+ try {
72
+ const parsed = JSON.parse(body)
73
+ if (parsed && typeof parsed === 'object' && typeof parsed.type === 'string') {
74
+ return parsed
75
+ }
76
+ } catch {}
77
+ return {
78
+ type: 'PROTOCOL_HTTP_ERROR',
79
+ message: `thin-gate returned HTTP ${status}`,
80
+ retryable: status >= 500
81
+ }
82
+ }
@@ -0,0 +1,72 @@
1
+ /**
2
+ * Resolve the absolute path of the prebuilt `thin-gate` binary that
3
+ * was installed alongside mohdel via `optionalDependencies`.
4
+ *
5
+ * ## How distribution works
6
+ *
7
+ * The main `mohdel` package declares a per-platform sub-package as an
8
+ * optional dependency, e.g. `mohdel-thin-gate-linux-x64-gnu`. npm
9
+ * only installs the sub-package whose `os` / `cpu` / `libc` filters
10
+ * match the host, silently skips the rest. Each sub-package ships a
11
+ * single `bin/mohdel-thin-gate` artifact and an `index.js` that
12
+ * exports its absolute path.
13
+ *
14
+ * This module picks the right sub-package name from
15
+ * `process.platform` / `process.arch`, dynamically imports it, and
16
+ * returns the path. If no matching sub-package installed (unsupported
17
+ * host, `--no-optional`, or a pre-publish build), throws with a
18
+ * diagnostic message.
19
+ *
20
+ * ## Supported platforms (0.90)
21
+ *
22
+ * - Linux x64 glibc (`linux-x64-gnu`)
23
+ *
24
+ * More platforms are additive post-0.90; the resolver expands without
25
+ * a wire-level change.
26
+ *
27
+ * @module client/gate-binary
28
+ */
29
+
30
+ /**
31
+ * @returns {Promise<string>} absolute path to the `thin-gate` binary
32
+ * @throws if no sub-package matches the current host
33
+ */
34
+ export async function resolveGateBinary () {
35
+ const pkg = platformPackageName()
36
+ if (!pkg) {
37
+ throw new Error(
38
+ `mohdel: no prebuilt thin-gate binary for platform ${process.platform}/${process.arch}. ` +
39
+ 'Supported in 0.90: linux-x64-gnu. Build from source (\'cargo build --release -p mohdel-thin-gate\') ' +
40
+ 'and set MOHDEL_GATE_BINARY to the resulting path, or file an issue for your platform.'
41
+ )
42
+ }
43
+
44
+ try {
45
+ /** @type {any} */
46
+ const mod = await import(pkg)
47
+ return mod.default
48
+ } catch (e) {
49
+ throw new Error(
50
+ `mohdel: prebuilt binary package '${pkg}' is not installed. ` +
51
+ 'This usually means npm skipped the optional dependency — reinstall without ' +
52
+ '\'--no-optional\' / \'--omit=optional\', or build from source and set MOHDEL_GATE_BINARY. ' +
53
+ `(cause: ${/** @type {Error} */(e)?.message})`
54
+ )
55
+ }
56
+ }
57
+
58
+ /**
59
+ * Map `(process.platform, process.arch)` to the sub-package name.
60
+ * Returns `null` for unsupported hosts.
61
+ *
62
+ * Note: this ignores libc. 0.90 ships glibc only; `detect-libc`
63
+ * lands when we add a musl sub-package.
64
+ *
65
+ * @returns {string | null}
66
+ */
67
+ function platformPackageName () {
68
+ if (process.platform === 'linux' && process.arch === 'x64') {
69
+ return 'mohdel-thin-gate-linux-x64-gnu'
70
+ }
71
+ return null
72
+ }